When retraining inception_v3 network
, i create TFrecorde
file with images (all are jpeg encoding) of shape [299,299] for training data. I just only get step=0
result, then i get error as follow.
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 89401 values, but the requested shape has 268203
[[Node: Reshape = Reshape[T=DT_UINT8, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](DecodeRaw, Reshape/shape)]]
89401=299x299, 268023 = 299x299x3. My create TFrecorde code is:
import os
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
cwd = '/home/xzy/input_data/testnet/images/'
tfrecord_dir = '/home/xzy/input_data/testnet/'
width, height = 299, 299
def create_tfrecord(file_path):
classes = {'boat', 'junk', 'carrier', 'warship', 'raft', 'speedboat'}
writer = tf.python_io.TFRecordWriter(tfrecord_dir + 'train.tfrecords')
for index, name in enumerate(classes):
class_path = file_path + name + '/'
for img_name in os.listdir(class_path):
img_path = class_path + img_name
img = Image.open(img_path)
img = img.resize((width, height))
img_raw = img.tobytes()
example = tf.train.Example(features=tf.train.Features(feature={
"label": tf.train.Feature(int64_list=tf.train.Int64List(value=[index])),
'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))
}))
writer.write(example.SerializeToString())
writer.close()
def read_record(path):
filename_queue = tf.train.string_input_producer([path])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features={
'label': tf.FixedLenFeature([], tf.int64),
'img_raw': tf.FixedLenFeature([], tf.string),
})
image = tf.decode_raw(features['img_raw'], tf.uint8)
image = tf.reshape(image, [299, 299, 3])
label = tf.cast(features['label'], tf.int32)
image_batch, label_batch = tf.train.batch([image, label],
batch_size=32, num_threads=4, capacity=300)
label_batch = tf.one_hot(label_batch, depth=6)
label_batch = tf.cast(label_batch, dtype=tf.int32)
label_batch = tf.reshape(label_batch, [32, 6])
with tf.Session() as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
try:
imgs, labs = sess.run([image_batch, label_batch])
imgs = tf.to_float(imgs)
init_op = tf.global_variables_initializer()
sess.run(init_op)
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
coord.request_stop()
coord.join(threads)
array = imgs.eval()
print("##########################################")
for i in range(32):
ar = array[i].flatten()
print(len(ar))
print('#######################################')
sess.close()
return imgs, labs
Besides, my classes are 6, 32 images per class. batch_size = 32. I print the image matrix size is '2680203' before training. The output log is:
##########################################
i=0, len(ar)=268203
i=1, len(ar)=268203
i=2, len(ar)=268203
i=3, len(ar)=268203
i=4, len(ar)=268203
i=5, len(ar)=268203
i=6, len(ar)=268203
i=7, len(ar)=268203
i=8, len(ar)=268203
i=9, len(ar)=268203
i=10, len(ar)=268203
i=11, len(ar)=268203
i=12, len(ar)=268203
i=13, len(ar)=268203
i=14, len(ar)=268203
i=15, len(ar)=268203
i=16, len(ar)=268203
i=17, len(ar)=268203
i=18, len(ar)=268203
i=19, len(ar)=268203
i=20, len(ar)=268203
i=21, len(ar)=268203
i=22, len(ar)=268203
i=23, len(ar)=268203
i=24, len(ar)=268203
i=25, len(ar)=268203
i=26, len(ar)=268203
i=27, len(ar)=268203
i=28, len(ar)=268203
i=29, len(ar)=268203
i=30, len(ar)=268203
i=31, len(ar)=268203
#######################################
Why i only get step=0
result. Then i get the error. The stack trace is:
Step: 0, loss: 4.9976
2018-01-12 11:11:12.502597: W tensorflow/core/kernels/queue_base.cc:277] _3_input_producer: Skipping cancelled enqueue attempt with queue not closed
2018-01-12 11:11:12.502703: W tensorflow/core/kernels/queue_base.cc:277] _4_batch/fifo_queue: Skipping cancelled enqueue attempt with queue not closed
2018-01-12 11:11:12.502721: W tensorflow/core/kernels/queue_base.cc:277] _4_batch/fifo_queue: Skipping cancelled enqueue attempt with queue not closed
2018-01-12 11:11:12.502729: W tensorflow/core/kernels/queue_base.cc:277] _4_batch/fifo_queue: Skipping cancelled enqueue attempt with queue not closed
2018-01-12 11:11:12.502736: W tensorflow/core/kernels/queue_base.cc:277] _4_batch/fifo_queue: Skipping cancelled enqueue attempt with queue not closed
Traceback (most recent call last):
File "/home/xzy/PycharmProjects/bishe/inceptionv3_train.py", line 112, in <module>
train()
File "/home/xzy/PycharmProjects/bishe/inceptionv3_train.py", line 87, in train
logits = inference(imgs, num_classes=classes)
File "/home/xzy/PycharmProjects/bishe/inceptionv3_train.py", line 48, in inference
logits, end_points = v3.inception_v3(image, num_classes, is_training=True)
File "/home/xzy/PycharmProjects/bishe/inception_v3_module.py", line 434, in inception_v3
depth_multiplier=depth_multiplier)
File "/home/xzy/PycharmProjects/bishe/inception_v3_module.py", line 91, in inception_v3_base
net = slim.conv2d(inputs, depth(32), [3, 3], stride=2, scope=end_point)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 182, in func_with_args
return func(*args, **current_args)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1057, in convolution
outputs = layer.apply(inputs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 762, in apply
return self.__call__(inputs, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 636, in __call__
self.build(input_shapes)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/convolutional.py", line 143, in build
dtype=self.dtype)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 504, in add_variable
partitioner=partitioner)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 1262, in get_variable
constraint=constraint)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 1097, in get_variable
constraint=constraint)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 427, in get_variable
return custom_getter(**custom_getter_kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1567, in layer_variable_getter
return _model_variable_getter(getter, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1559, in _model_variable_getter
custom_getter=getter, use_resource=use_resource)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 182, in func_with_args
return func(*args, **current_args)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/variables.py", line 291, in model_variable
use_resource=use_resource)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 182, in func_with_args
return func(*args, **current_args)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/variables.py", line 246, in variable
use_resource=use_resource)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 404, in _true_getter
use_resource=use_resource, constraint=constraint)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 743, in _get_single_variable
name, "".join(traceback.format_list(tb))))
ValueError: Variable InceptionV3/Conv2d_1a_3x3/weights already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at:
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/variables.py", line 246, in variable
use_resource=use_resource)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 182, in func_with_args
return func(*args, **current_args)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/variables.py", line 291, in model_variable
use_resource=use_resource)
Why? Could you please give me some ideas, thanks?
It's my fault. When I paste trace log, i paste another error log. Luckily, i fixed upwards errors.
Input to reshape is a tensor with 89401 values, but the requested shape has 268203
tensor
as numpy
, or misuse numpy
as tensor
, find it, correct it.Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope?
If reuse=Flase
, getting step=0
result. Then weight & bias exist next step, so get the error Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope?
If reuse=True
, you will get uninitialized wight&bias error
, because wight&bias not exist at the first step
When i set resue=tf.AUTO_REUSE
, i success:
logits, end_points = v3.inception_v3(image, num_classes,
is_training=True, reuse=tf.AUTO_REUSE)