Search code examples
pythontensorflowtensorflow2.0tensorflow-datasets

Tensorflow AttributeError: '_NumpyIterator' object has no attribute 'shard'


Getting AttributeError: '_NumpyIterator' object has no attribute 'shard' while executing below code.My dataset is having images and labels which I want to convert to tfrecords

ds_train = tf.keras.utils.image_dataset_from_directory(some parameters)

ds_train = (
            ds_train
            .unbatch()
       )

def encode_image(image, label):
    image_converted = tf.image.convert_image_dtype(image, dtype=tf.uint8)
    image = tf.io.encode_jpeg(image_converted)
    label = tf.argmax(label)

   return image, label

encode_ds = (
        ds_train.map(encode_image)
        )

NUM_SHARD=10
PATH = "some path"

for shard_no in range(NUM_SHARD):
encode_ds = (
    encode_ds
    .shard(NUM_SHARD, shard_no)
    .as_numpy_iterator()
)

with tf.io.TFRecordWriter(PATH.format(shard_no)) as file_writer:
  for image, label in encode_ds:
      file_writer.write(create_example(image, label))

Solution

  • This is the correct working code. The problem was with the dataset variable assignment in for loop below. I was using same variable name (encode_ds) which was causing problem.

    for shard_no in range(NUM_SHARD):
    ***shard_ds*** = (
     encode_ds
     .shard(NUM_SHARD, shard_no)
     .as_numpy_iterator()
     )
    
     with tf.io.TFRecordWriter(PATH.format(shard_no)) as file_writer:
       for image, label in shard_ds:
           file_writer.write(create_example(image, label))