Search code examples
tensorflowtfrecord

Tensorflow TFRecord: Can't parse serialized example when loop through tensor


I am trying to read a tfrecord but keep getting this error InvalidArgumentError: Key: sign. Can't parse serialized Example.

Here's what I did:

def decode_tfrec(record_bytes):
    features = tf.io.parse_single_example(record_bytes, {
        'coordinates': tf.io.FixedLenFeature([], tf.string),
        'sign': tf.io.FixedLenSequenceFeature([59], tf.int64, allow_missing=True),
    })
    out = {}
    out['coordinates']  = tf.reshape(tf.io.decode_raw(features['coordinates'], tf.float32), (-1,ROWS_PER_FRAME,3))
    out['sign'] = features['sign']
    return out


def get_tfrec_dataset(tfrecords):
    # Initialize dataset with TFRecords
    ds = tf.data.TFRecordDataset(tfrecords, num_parallel_reads=tf.data.AUTOTUNE, compression_type='GZIP')
    ds = ds.map(decode_tfrec, tf.data.AUTOTUNE)
    ds = ds.prefetch(tf.data.AUTOTUNE)
    return ds
ds = get_tfrec_dataset(TRAIN_FILENAMES)

This is where I got the error:

for x in ds:
    temp_train = x
    break

Update: How I created the tfrecords

record_bytes = tf.train.Example(features=tf.train.Features(feature={
                'coordinates': tf.train.Feature(bytes_list=tf.train.BytesList(value=[coordinates_encoded])), 
                'sign':tf.train.Feature(int64_list=tf.train.Int64List(value=phrase)),
                })).SerializeToString()

How can I make this work? I'd really appreciate any help!


Solution

  • I have managed to fix this by using VarLenFeature since my data does not have a fixed length.

    def decode_tfrec(record_bytes):
        features = tf.io.parse_single_example(record_bytes, {
            'coordinates': tf.io.FixedLenFeature([], tf.string),
            'phrase': tf.io.VarLenFeature(tf.int64),
        })
        out = {}
        out['coordinates']  = tf.reshape(tf.io.decode_raw(features['coordinates'], tf.float32), (-1,1,3))
        out['phrase'] = tf.sparse.to_dense(features['phrase'])
        return out