I am currently trying to use Kaggle TPUs with the cifar10 dataset. The following code shows how I encode the data in TFRecords, but I have now idea how to store them in a file afterwards.
def _bytes_feature(value):
"""Returns a bytes_list from a string / byte"""
if isinstance(value, type(tf.constant(0))):
value = value.numpy() # BytesList won't unpack a string from an EagerTensor (what??)
return tf.train.Feature(bytes_list = tf.train.BytesList(value=[value]))
def _float_feature(value):
"""Returns a float_list from a float / double"""
return tf.train.Feature(float_list = tf.train.FloatList(value=[value]))
def _int64_feature(value):
""""Returns an int64_list from a bool / enum / int / uint"""
return tf.train.Feature(int64_list = tf.train.Int64List(value=[value]))
def image_example(image, label, dimension):
feature = {
'dimension': _int64_feature(dimension),
'label': _int64_feature(label),
'image_raw': _bytes_feature(image.tobytes()),
}
return tf.train.Example(features=tf.train.Features(feature=feature))
And to write the data in TFRecords:
record_file = './cifar10.tfrecords'
n_samples = x_train.shape[0]
dimension = x_train.shape[1]
depth = x_train.shape[3]
# print(x_train.shape)
with tf.io.TFRecordWriter(record_file) as writer:
for i in range(n_samples):
image = x_train[i]
label = y_train[i]
tf_example = image_example(image, label, dimension) # function defined above
writer.write(tf_example.SerializeToString()) # serializes the input to store the data
Now I suppose I only have to run this to get my data:
data = tf.data.TFRecordDataset(record_file)
And if I try to parse the records I get the following error:
UnimplementedError: File system scheme '[local]' not implemented (file: './cifar10.tfrecords')
But instead it just does nothing (and actually reinitializes the Kaggle session, as if I had not run anything before). Do you have any idea of the error I'm doing?
Thank you very much for any help you can come up with!!
UnimplementedError: File system scheme '[local]' not implemented (file: './cifar10.tfrecords').
This is because Cloud TPU cannot write (or access to) files on Kaggle/Colab file system, it needs files to be placed on google bucket.
https://cloud.google.com/tpu/docs/troubleshooting#cannot_use_local_filesystem