I am trying to train the model defined here using the function train
defined in the same script. I have attempted to use a tf record file provided by the authors in this link (to do that, the only modifications that I made were changing init_path=None
and writing the path to the tfrecord file in the txt file train_list=train_tfs.txt
, both defined in shift_params.py
's shift_v1
).
However, even trying this simple test, I get the following error:
OutOfRangeError (see above for traceback): RandomShuffleQueue '_1_shuffle_batch_join/random_shuffle_queue' is closed and has insufficient elements (requested 15, current size 0)
[[Node: shuffle_batch_join = QueueDequeueManyV2[component_types=[DT_UINT8, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT64, DT_STRING], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/device:CPU:0"](shuffle_batch_join/random_shuffle_queue, shuffle_batch_join/n)]]
As far as I understand, this error means that no data is being loaded. So my guess is that the problem is in the function read_example
in shift_dset.py
, as shown below (I have kept only the part of the code that loads the images ims
and the audio sound
. For the complete code, please check here.)
def read_example(rec_queue, pr, input_types):
reader = tf.TFRecordReader()
k, serialized_example = reader.read(rec_queue)
full = pr.full_im_dim
feats = {}
feats['im_0'] = tf.FixedLenFeature([], dtype=tf.string)
feats['im_1'] = tf.FixedLenFeature([], dtype=tf.string)
feats['sound'] = tf.FixedLenFeature([], dtype=tf.string)
if pr.variable_frame_count:
feats['num_frames'] = tf.FixedLenFeature([1], dtype=tf.int64)
example = tf.parse_single_example(serialized_example, features = feats)
total_frames = pr.total_frames if not pr.variable_frame_count \
else tf.cast(example['num_frames'][0], tf.int32)
assert not pr.variable_frame_count
def f(x):
x.set_shape((full*total_frames/2, full, 3))
return tf.reshape(x, (total_frames/2, full, full, 3))
im_parts = map(f, [tf.image.decode_jpeg(example['im_0'], channels = 3, name = 'decode_im1'),
tf.image.decode_jpeg(example['im_1'], channels = 3, name = 'decode_im2')])
samples = tf.decode_raw(example['sound'], tf.int16)
samples.set_shape((pr.full_samples_len*2))
samples = tf.reshape(samples, (pr.full_samples_len, 2))
samples = tf.cast(samples, 'float32') / np.iinfo(np.dtype(np.int16)).max
num_slice_frames = pr.sampled_frames
num_samples = int(pr.samples_per_frame * float(num_slice_frames))
if pr.do_shift:
choices = []
max_frame = total_frames - num_slice_frames
frames1 = ([0] if pr.fix_frame else xrange(max_frame))
for frame1 in frames1:
found = False
for frame2 in reversed(range(max_frame)):
inv1 = xrange(frame1, frame1 + num_slice_frames)
inv2 = xrange(frame2, frame2 + num_slice_frames)
if len(set(inv1).intersection(inv2)) <= pr.max_intersection:
found = True
choices.append([frame1, frame2])
if pr.fix_frame:
break
if pr.skip_notfound:
pass
else:
assert found
print 'Number of frame choices:', len(choices)
choices = tf.constant(np.array(choices), dtype = tf.int32)
idx = tf.random_uniform([1], 0, shape(choices, 0), dtype = tf.int64)[0]
start_frame_gt = choices[idx, 0]
shift_frame = choices[idx, 1]
elif ut.hastrue(pr, 'use_first_frame'):
shift_frame = start_frame_gt = tf.constant(0, dtype = tf.int32)
else:
shift_frame = start_frame_gt = tf.random_uniform(
[1], 0, total_frames - num_slice_frames, dtype = tf.int32)[0]
if pr.augment_ims:
print 'Augment:', pr.augment_ims
r = tf.random_uniform(
[2], 0, pr.full_im_dim - pr.crop_im_dim, dtype = tf.int32)
x, y = r[0], r[1]
else:
if hasattr(pr, 'resize_dims'):
y = pr.resize_dims[0]/2 - pr.crop_im_dim/2
x = pr.resize_dims[1]/2 - pr.crop_im_dim/2
else:
y = x = pr.full_im_dim/2 - pr.crop_im_dim/2
offset = [start_frame_gt, y, x, 0]
d = pr.crop_im_dim
size_im = [num_slice_frames, d, d, 3]
slice_parts = []
for j in xrange(len(im_parts)):
num_frames_in_part = total_frames/len(im_parts)
part_start = j*num_frames_in_part
frame_offset = tf.maximum(0, tf.minimum(start_frame_gt - part_start, num_frames_in_part))
end_offset = tf.maximum(0, tf.minimum(start_frame_gt + num_slice_frames - part_start, num_frames_in_part))
num_frames_in_part_slice = tf.maximum(0, end_offset - frame_offset)
offset = [frame_offset, y, x, 0]
d = pr.crop_im_dim
size_im = [num_frames_in_part_slice, d, d, 3]
p = tf.slice(im_parts[j], offset, size_im)
slice_parts.append(p)
ims_slice = tf.concat(slice_parts, 0)
ims_slice.set_shape([num_slice_frames, pr.crop_im_dim, pr.crop_im_dim, 3])
ims = ims_slice
if pr.augment_ims:
ims = tf.cond(tf.cast(tf.random_uniform([1], 0, 2, dtype = tf.int64)[0], tf.bool),
lambda : tf.map_fn(tf.image.flip_left_right, ims),
lambda : ims)
def slice_samples(frame):
start = round_int(pr.samples_per_frame * cast_float(frame))
offset = [start, 0]
size = [num_samples, 2]
r = tf.slice(samples, offset, size, name = 'slice_sample')
r.set_shape([num_samples] + list(shape(r)[1:]))
return r
if 'samples' in input_types:
samples_gt = slice_samples(start_frame_gt)
samples_shift = slice_samples(shift_frame)
else:
samples_gt = samples_shift = tf.zeros((1, 1), dtype = tf.int16)
samples_exs = tf.concat([ed(samples_shift, 0), ed(samples_gt, 0)], 0)
return ims, _, samples_exs, _, _, _
I also tried loading my own tfrecords by changing this function and I got a similar error. Also, when no tfrecord paths are provided (i.e., train_list=train_tfs.txt
does not contain a path to a tfrecord, thus it is impossible to load anything), I get the same error, which indicates that something is wrong with the way the data is being loaded.
Thank you in advance for any help.
How to reproduce the same test
shift_v1
of shift_params.py
:
init_path=None
and the content of train_tfs.txt
(it should
contain the path to the downloaded tf file)From the folder src
, run the following command:
python -c "import shift_params, shift_net; shift_net.train(shift_params.shift_v1(num_gpus=3), [0, 1, 2], restore = False)"
Environment details
I am using the following packages in an anaconda environment (Linux):
I have solved the problem, so I am posting the solution that I have found here in case somebody else faces the same problem.
I have not managed to run the data loading code with the tfrecord provided (as presented in the question), so I created my own data loading function to substitute read_example
, which I post here for further reference:
def new_read_example(rec_queue, pr, input_types):
reader = tf.TFRecordReader()
k, serialized_example = reader.read(rec_queue)
full = pr.full_im_dim # full frame dimension
feature_description = {
'video':tf.FixedLenFeature([], tf.string),
'sync_audio':tf.FixedLenFeature([], tf.string),
'shift_audio':tf.FixedLenFeature([], tf.string),
'label':tf.FixedLenFeature([1], tf.int64),
'num_frames': tf.FixedLenFeature([1], tf.int64)
}
# parse one example
example = tf.parse_single_example(serialized_example, feature_description)
# get label and other variables
label = tf.cast(example['label'], tf.int64)
# I could not transform the total_frames from tensor to an integer
# so I just assigned 100 to total_frames,
# which is the number of frames that I use
# total_frames = tf.cast(example['num_frames'], tf.int64)
total_frames = 100
ims = tf.image.decode_jpeg(example['video'], channels=3)
ims.set_shape((full*total_frames, full, 3))
ims = tf.reshape(ims, (total_frames, full, full, 3))
# The audio stored in the tfrecords is an array of floats
# which is encoded as binary by calling audio.tostring()
sync_audio = tf.decode_raw(example['sync_audio'], tf.float32)
shift_audio = tf.decode_raw(example['shift_audio'], tf.float32)
# The audio that I am reading is mono, so I had to change the
# dimension of the audio data
sync_audio.set_shape((pr.full_samples_len))
sync_audio = tf.reshape(sync_audio, (pr.full_samples_len, 1))
shift_audio.set_shape((pr.full_samples_len))
shift_audio = tf.reshape(shift_audio, (pr.full_samples_len, 1))
samples_exs = tf.concat([ed(shift_audio, 0), ed(sync_audio, 0)], 0)
return ims, samples_exs
The conclusion that I take from here is that it is almost impossible to read a tfrecord without knowing exactly how it was created (the best is to have the code that generated the tfrecords) and, in my case, it was easier to write my own pair of tfrecords writer and reader.