I have a folder structure where every subfolder represents a class, for every class there is exactly one example picture. I want to load the data in a Keras dataset as described here: https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image_dataset_from_directory I expect to get the tuple "(images, labels)" returned, but when I assign the functions output to a tuple, I get an error.
Here is my code:
import pathlib
data_path = "./patterns"
data_dir = pathlib.Path(data_path)
batch_size = 32
img_height = 120
img_width = 30
train_ds, train_labels = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
labels='inferred',
label_mode='categorical', #int
validation_split=0.2,
subset="training",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
And this is the error I receive:
Found 2160 files belonging to 2160 classes.
Using 1728 files for training.
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
/usr/local/lib/python3.7/site-packages/tensorflow/python/eager/context.py in execution_mode(mode)
2101 ctx.executor = executor_new
-> 2102 yield
2103 finally:
/usr/local/lib/python3.7/site-packages/tensorflow/python/data/ops/iterator_ops.py in _next_internal(self)
757 output_types=self._flat_output_types,
--> 758 output_shapes=self._flat_output_shapes)
759
/usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gen_dataset_ops.py in iterator_get_next(iterator, output_types, output_shapes, name)
2609 except _core._NotOkStatusException as e:
-> 2610 _ops.raise_from_not_ok_status(e, name)
2611 except _core._FallbackException:
/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in raise_from_not_ok_status(e, name)
6842 # pylint: disable=protected-access
-> 6843 six.raise_from(core._status_to_exception(e.code, message), None)
6844 # pylint: enable=protected-access
/usr/local/lib/python3.7/site-packages/six.py in raise_from(value, from_value)
InvalidArgumentError: assertion failed: [Unable to decode bytes as JPEG, PNG, GIF, or BMP]
[[{{node decode_image/cond_jpeg/else/_1/decode_image/cond_jpeg/cond_png/else/_20/decode_image/cond_jpeg/cond_png/cond_gif/else/_39/decode_image/cond_jpeg/cond_png/cond_gif/Assert/Assert}}]] [Op:IteratorGetNext]
During handling of the above exception, another exception occurred:
InvalidArgumentError Traceback (most recent call last)
<ipython-input-74-f878a6f234dd> in <module>
7 seed=123,
8 image_size=(img_height, img_width),
----> 9 batch_size=batch_size)
/usr/local/lib/python3.7/site-packages/tensorflow/python/data/ops/iterator_ops.py in __next__(self)
734
735 def __next__(self): # For Python 3 compatibility
--> 736 return self.next()
737
738 def _next_internal(self):
/usr/local/lib/python3.7/site-packages/tensorflow/python/data/ops/iterator_ops.py in next(self)
770 def next(self):
771 try:
--> 772 return self._next_internal()
773 except errors.OutOfRangeError:
774 raise StopIteration
/usr/local/lib/python3.7/site-packages/tensorflow/python/data/ops/iterator_ops.py in _next_internal(self)
762 return self._element_spec._from_compatible_tensor_list(ret) # pylint: disable=protected-access
763 except AttributeError:
--> 764 return structure.from_compatible_tensor_list(self._element_spec, ret)
765
766 @property
/usr/local/lib/python3.7/contextlib.py in __exit__(self, type, value, traceback)
128 value = type()
129 try:
--> 130 self.gen.throw(type, value, traceback)
131 except StopIteration as exc:
132 # Suppress StopIteration *unless* it's the same exception that
/usr/local/lib/python3.7/site-packages/tensorflow/python/eager/context.py in execution_mode(mode)
2103 finally:
2104 ctx.executor = executor_old
-> 2105 executor_new.wait()
2106
2107
/usr/local/lib/python3.7/site-packages/tensorflow/python/eager/executor.py in wait(self)
65 def wait(self):
66 """Waits for ops dispatched in this executor to finish."""
---> 67 pywrap_tfe.TFE_ExecutorWaitForAllPendingNodes(self._handle)
68
69 def clear_error(self):
InvalidArgumentError: assertion failed: [Unable to decode bytes as JPEG, PNG, GIF, or BMP]
[[{{node decode_image/cond_jpeg/else/_1/decode_image/cond_jpeg/cond_png/else/_20/decode_image/cond_jpeg/cond_png/cond_gif/else/_39/decode_image/cond_jpeg/cond_png/cond_gif/Assert/Assert}}]]
Anyway it is interesting that I still get the output:
Found 2160 files belonging to 2160 classes.
Using 1728 files for training.
When I assign the function output just to a single variable (train_ds), I do not receive an error.
I think one of your images is corrupted. Use this function and see if it crashes. It will print the filename before reading it, so you'll see which picture is corrupted.
Modify the os.listdir()
part so it includes all your images in the different folders.
import tensorflow as tf
import os
def validate_image(file_name):
tf.py_function(tf.print, inp=[file_name], Tout=[])
image = tf.io.read_file(file_name)
image = tf.io.decode_image(image, channels=3)
return image
os.chdir(r'path\to\images')
accepted_extensions = ('jpg', 'png', 'bmp', 'gif')
files = list(filter(lambda x: x.lower().endswith(accepted_extensions), os.listdir()))
ds = tf.data.Dataset.from_tensor_slices(files).map(validate_image)
for i in ds:
pass