Getting AttributeError: '_NumpyIterator' object has no attribute 'shard' while executing below code.My dataset is having images and labels which I want to convert to tfrecords
ds_train = tf.keras.utils.image_dataset_from_directory(some parameters)
ds_train = (
ds_train
.unbatch()
)
def encode_image(image, label):
image_converted = tf.image.convert_image_dtype(image, dtype=tf.uint8)
image = tf.io.encode_jpeg(image_converted)
label = tf.argmax(label)
return image, label
encode_ds = (
ds_train.map(encode_image)
)
NUM_SHARD=10
PATH = "some path"
for shard_no in range(NUM_SHARD):
encode_ds = (
encode_ds
.shard(NUM_SHARD, shard_no)
.as_numpy_iterator()
)
with tf.io.TFRecordWriter(PATH.format(shard_no)) as file_writer:
for image, label in encode_ds:
file_writer.write(create_example(image, label))
This is the correct working code. The problem was with the dataset variable assignment in for loop below. I was using same variable name (encode_ds) which was causing problem.
for shard_no in range(NUM_SHARD):
***shard_ds*** = (
encode_ds
.shard(NUM_SHARD, shard_no)
.as_numpy_iterator()
)
with tf.io.TFRecordWriter(PATH.format(shard_no)) as file_writer:
for image, label in shard_ds:
file_writer.write(create_example(image, label))