Search code examples
python-2.7tensorflowtensorflow-slim

Input to reshape is a tensor with 89401 values, but the requested shape has 268203


When retraining inception_v3 network, i create TFrecorde file with images (all are jpeg encoding) of shape [299,299] for training data. I just only get step=0 result, then i get error as follow.

tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 89401 values, but the requested shape has 268203

[[Node: Reshape = Reshape[T=DT_UINT8, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](DecodeRaw, Reshape/shape)]]

89401=299x299, 268023 = 299x299x3. My create TFrecorde code is:

import os
import tensorflow as tf
from PIL import Image 
import matplotlib.pyplot as plt
import numpy as np

cwd = '/home/xzy/input_data/testnet/images/'
tfrecord_dir = '/home/xzy/input_data/testnet/'
width, height = 299, 299

def create_tfrecord(file_path):
    classes = {'boat', 'junk', 'carrier', 'warship', 'raft', 'speedboat'} 
    writer = tf.python_io.TFRecordWriter(tfrecord_dir + 'train.tfrecords')  

    for index, name in enumerate(classes):
        class_path = file_path + name + '/'
        for img_name in os.listdir(class_path):
            img_path = class_path + img_name  

            img = Image.open(img_path)
            img = img.resize((width, height))
            img_raw = img.tobytes() 
            example = tf.train.Example(features=tf.train.Features(feature={
                "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[index])),
                'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))
            }))  
            writer.write(example.SerializeToString()) 

    writer.close()

def read_record(path):
    filename_queue = tf.train.string_input_producer([path])
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(serialized_example,
                                       features={
                                           'label': tf.FixedLenFeature([], tf.int64),
                                           'img_raw': tf.FixedLenFeature([], tf.string),
                                       })
    image = tf.decode_raw(features['img_raw'], tf.uint8)
    image = tf.reshape(image, [299, 299, 3])
    label = tf.cast(features['label'], tf.int32)
    image_batch, label_batch = tf.train.batch([image, label],
                                              batch_size=32, num_threads=4, capacity=300)
    label_batch = tf.one_hot(label_batch, depth=6)
    label_batch = tf.cast(label_batch, dtype=tf.int32)
    label_batch = tf.reshape(label_batch, [32, 6])
    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        try:
            imgs, labs = sess.run([image_batch, label_batch])
            imgs = tf.to_float(imgs)
            init_op = tf.global_variables_initializer()
            sess.run(init_op)
        except tf.errors.OutOfRangeError:
            print('Done training -- epoch limit reached')
        coord.request_stop()
        coord.join(threads)
        array = imgs.eval()
        print("##########################################")
        for i in range(32):
            ar = array[i].flatten()
            print(len(ar))
        print('#######################################')
        sess.close()
    return imgs, labs

Besides, my classes are 6, 32 images per class. batch_size = 32. I print the image matrix size is '2680203' before training. The output log is:

##########################################
i=0, len(ar)=268203
i=1, len(ar)=268203
i=2, len(ar)=268203
i=3, len(ar)=268203
i=4, len(ar)=268203
i=5, len(ar)=268203
i=6, len(ar)=268203
i=7, len(ar)=268203
i=8, len(ar)=268203
i=9, len(ar)=268203
i=10, len(ar)=268203
i=11, len(ar)=268203
i=12, len(ar)=268203
i=13, len(ar)=268203
i=14, len(ar)=268203
i=15, len(ar)=268203
i=16, len(ar)=268203
i=17, len(ar)=268203
i=18, len(ar)=268203
i=19, len(ar)=268203
i=20, len(ar)=268203
i=21, len(ar)=268203
i=22, len(ar)=268203
i=23, len(ar)=268203
i=24, len(ar)=268203
i=25, len(ar)=268203
i=26, len(ar)=268203
i=27, len(ar)=268203
i=28, len(ar)=268203
i=29, len(ar)=268203
i=30, len(ar)=268203
i=31, len(ar)=268203
#######################################

Why i only get step=0 result. Then i get the error. The stack trace is:

Step: 0, loss: 4.9976
2018-01-12 11:11:12.502597: W tensorflow/core/kernels/queue_base.cc:277] _3_input_producer: Skipping cancelled enqueue attempt with queue not closed
2018-01-12 11:11:12.502703: W tensorflow/core/kernels/queue_base.cc:277] _4_batch/fifo_queue: Skipping cancelled enqueue attempt with queue not closed
2018-01-12 11:11:12.502721: W tensorflow/core/kernels/queue_base.cc:277] _4_batch/fifo_queue: Skipping cancelled enqueue attempt with queue not closed
2018-01-12 11:11:12.502729: W tensorflow/core/kernels/queue_base.cc:277] _4_batch/fifo_queue: Skipping cancelled enqueue attempt with queue not closed
2018-01-12 11:11:12.502736: W tensorflow/core/kernels/queue_base.cc:277] _4_batch/fifo_queue: Skipping cancelled enqueue attempt with queue not closed
Traceback (most recent call last):
  File "/home/xzy/PycharmProjects/bishe/inceptionv3_train.py", line 112, in <module>
    train()
  File "/home/xzy/PycharmProjects/bishe/inceptionv3_train.py", line 87, in train
    logits = inference(imgs, num_classes=classes)
  File "/home/xzy/PycharmProjects/bishe/inceptionv3_train.py", line 48, in inference
    logits, end_points = v3.inception_v3(image, num_classes, is_training=True)
  File "/home/xzy/PycharmProjects/bishe/inception_v3_module.py", line 434, in inception_v3
    depth_multiplier=depth_multiplier)
  File "/home/xzy/PycharmProjects/bishe/inception_v3_module.py", line 91, in inception_v3_base
    net = slim.conv2d(inputs, depth(32), [3, 3], stride=2, scope=end_point)  
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 182, in func_with_args
    return func(*args, **current_args)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1057, in convolution
    outputs = layer.apply(inputs)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 762, in apply
    return self.__call__(inputs, *args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 636, in __call__
    self.build(input_shapes)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/convolutional.py", line 143, in build
    dtype=self.dtype)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 504, in add_variable
    partitioner=partitioner)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 1262, in get_variable
    constraint=constraint)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 1097, in get_variable
    constraint=constraint)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 427, in get_variable
    return custom_getter(**custom_getter_kwargs)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1567, in layer_variable_getter
    return _model_variable_getter(getter, *args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1559, in _model_variable_getter
    custom_getter=getter, use_resource=use_resource)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 182, in func_with_args
    return func(*args, **current_args)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/variables.py", line 291, in model_variable
    use_resource=use_resource)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 182, in func_with_args
    return func(*args, **current_args)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/variables.py", line 246, in variable
    use_resource=use_resource)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 404, in _true_getter
    use_resource=use_resource, constraint=constraint)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 743, in _get_single_variable
    name, "".join(traceback.format_list(tb))))
ValueError: Variable InceptionV3/Conv2d_1a_3x3/weights already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at:

  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/variables.py", line 246, in variable
    use_resource=use_resource)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 182, in func_with_args
    return func(*args, **current_args)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/variables.py", line 291, in model_variable
    use_resource=use_resource)

Why? Could you please give me some ideas, thanks?


Solution

  • It's my fault. When I paste trace log, i paste another error log. Luckily, i fixed upwards errors.

    Input to reshape is a tensor with 89401 values, but the requested shape has 268203

    1. deleting black images of my train data
    2. misuse tensor as numpy, or misuse numpy as tensor, find it, correct it.

    Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope?

    If reuse=Flase, getting step=0 result. Then weight & bias exist next step, so get the error Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope?

    If reuse=True, you will get uninitialized wight&bias error, because wight&bias not exist at the first step

    When i set resue=tf.AUTO_REUSE, i success:

    logits, end_points = v3.inception_v3(image, num_classes, 
                                             is_training=True, reuse=tf.AUTO_REUSE)