**Update After implementing @jdehesa answer: My code looks like this:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
import PIL as pil
from tensorflow import feature_column
from tensorflow_core.python.platform.flags import FLAGS
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
def print_type(name , x):
print(" {} type = {}".format(name, type(x)))
def _bytes_feature(value):
"""Returns a bytes_list from a string / byte."""
if isinstance(value, type(tf.constant(0))):
value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _float_feature(value):
if not isinstance(value, np.ndarray):
value = [value]
"""Returns a float_list from a float / double."""
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def _int64_feature(value):
"""Returns an int64_list from a bool / enum / int / uint."""
if not isinstance(value, np.ndarray):
value = [value]
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def encode_example(arr, label):
shape = arr.shape
feature = {
'height': _int64_feature(shape[0]),
'width': _int64_feature(shape[1]),
'label': _int64_feature(label),
'image_raw': _bytes_feature(arr.flatten().tobytes()),
}
# print("Encoding type {}".format(type(feature['image_raw'])))
return tf.train.Example(features=tf.train.Features(feature=feature))
def decode_example(serialized_example):
# Create a dictionary describing the features.
image_feature_description = {
'height': tf.io.FixedLenFeature([], tf.int64),
'width': tf.io.FixedLenFeature([], tf.int64),
'label': tf.io.FixedLenFeature([], tf.int64),
'image_raw': tf.io.FixedLenFeature([], tf.string),
}
example = tf.io.parse_single_example(serialized_example, image_feature_description)
return example
def map_example(height, width, image_raw, label):
# Assumes little endian decoding, pass little_endian=False for big endian
image_data = tf.io.decode_raw(image_raw, tf.uint8)
image_data = tf.reshape(image_data, [height, width])
return image_data, label
def make_dataset(partition):
files = tf.data.Dataset.list_files("images_" + partition + "*.tfrecord")
dataset = tf.data.TFRecordDataset(files)
# dataset = dataset.shuffle(buffer_size=FLAGS.shuffle_buffer_size)
dataset = dataset.map(decode_example)
dataset = dataset.map(
lambda x: map_example(x['height'], x['width'], x['image_raw'], x['label']))
# dataset = dataset.batch(batch_size=FLAGS.batch_size)
return dataset
def write_examples_to_record_file(file_name, x , y):
with tf.io.TFRecordWriter(file_name) as writer:
for i in range(len(x)):
tf_example = encode_example(x[i], y[i])
writer.write(tf_example.SerializeToString())
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train, x_test, y_test = x_train[:100], y_train[:100], x_test[:100], y_test[:100]
# x_train, x_test = x_train.astype(np.int8), x_test.astype(np.int8)
write_examples_to_record_file('images_train.tfrecord', x_train, y_train)
# write_examples_to_record_file('images_test.tfrecord', x_test, y_test)
train_dataset = make_dataset("train")
# test_dataset = make_dataset("test")
it = iter(train_dataset)
r = next(it)
print_type("r",r)
(x,y) = r
print_type("X", x)
print_type("Y", y)
print("x is" , x)
print("y is" , y)
print("x shape is" , x.shape())
print("y shape is" , y.shape())
# print(next(it))
# for r,label in next(it):
# print(repr(r))
# print("feature shape = {}".format(r.shape.as_list()))
# print("label shape = {}".format(label.shape.as_list()))
# feature_column = [tf.feature_column.numeric_column(key='image', shape=(28,28))]
# feature_layer = tf.keras.layers.DenseFeatures(feature_column)
#
# it = iter(train_dataset)
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=[28,28]),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(train_dataset, epochs=5)
# model.evaluate(test_dataset, verbose=2)
But now the error is different and I think the is the reason for the original question error, that there is a problem getting the shape of the x,y components of the next() tuple:
2019-11-19 11:11:54.540221: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'cudart64_100.dll'; dlerror: cudart64_100.dll not found
2019-11-19 11:11:56.763955: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'nvcuda.dll'; dlerror: nvcuda.dll not found
2019-11-19 11:11:56.764410: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: UNKNOWN ERROR (303)
2019-11-19 11:11:56.767167: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: szclu-dvcasa027
2019-11-19 11:11:56.767572: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: szclu-dvcasa027
2019-11-19 11:11:56.768026: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
WARNING:tensorflow:Entity <function decode_example at 0x000002A67F5EE438> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: No module named 'tensorflow_core.estimator'
WARNING:tensorflow:Entity <function make_dataset.<locals>.<lambda> at 0x000002A67F61C0D8> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: No module named 'tensorflow_core.estimator'
r type = <class 'tuple'>
X type = <class 'tensorflow.python.framework.ops.EagerTensor'>
Y type = <class 'tensorflow.python.framework.ops.EagerTensor'>
x is tf.Tensor(
[[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 0 0 3 18 18 18 126 136
175 26 166 255 247 127 0 0 0 0]
[ 0 0 0 0 0 0 0 0 30 36 94 154 170 253 253 253 253 253
225 172 253 242 195 64 0 0 0 0]
[ 0 0 0 0 0 0 0 49 238 253 253 253 253 253 253 253 253 251
93 82 82 56 39 0 0 0 0 0]
[ 0 0 0 0 0 0 0 18 219 253 253 253 253 253 198 182 247 241
0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 80 156 107 253 253 205 11 0 43 154
0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 14 1 154 253 90 0 0 0 0
0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 0 139 253 190 2 0 0 0
0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 0 11 190 253 70 0 0 0
0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 0 0 35 241 225 160 108 1
0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 81 240 253 253 119
25 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 45 186 253 253
150 27 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 16 93 252
253 187 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 249
253 249 64 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 46 130 183 253
253 207 2 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 0 0 39 148 229 253 253 253
250 182 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 24 114 221 253 253 253 253 201
78 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 23 66 213 253 253 253 253 198 81 2
0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 18 171 219 253 253 253 253 195 80 9 0 0
0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 55 172 226 253 253 253 253 244 133 11 0 0 0 0
0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 136 253 253 253 212 135 132 16 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0]], shape=(28, 28), dtype=uint8)
y is tf.Tensor(5, shape=(), dtype=int64)
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "C:\Users\me\.IntelliJIdea2019.3\config\plugins\python\helpers\pydev\_pydev_bundle\pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "C:\Users\me\.IntelliJIdea2019.3\config\plugins\python\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/data/projects/cludeeplearning/train_model_so.py", line 108, in <module>
print("x shape is" , x.shape())
TypeError: 'TensorShape' object is not callable
============================================================================= I am trying to experiment with tfrecords because eventually we will have lots of data stored in multiple files that may not fit in memory.
The basic idea is for this POC is to load data from mnist dataset, save it in tfrecrod files as tf.example, load it from tfrecord using tfrecorddataset and train a model.
Here's my code sample:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
import PIL as pil
from tensorflow import feature_column
from tensorflow_core.python.platform.flags import FLAGS
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
def _bytes_feature(value):
"""Returns a bytes_list from a string / byte."""
if isinstance(value, type(tf.constant(0))):
value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _float_feature(value):
if not isinstance(value, np.ndarray):
value = [value]
"""Returns a float_list from a float / double."""
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def _int64_feature(value):
"""Returns an int64_list from a bool / enum / int / uint."""
if not isinstance(value, np.ndarray):
value = [value]
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def encode_example(arr, label):
shape = arr.shape
feature = {
'height': _int64_feature(shape[0]),
'width': _int64_feature(shape[1]),
'label': _int64_feature(label),
'image_raw': _bytes_feature(arr.flatten().tobytes()),
}
# print("Encoding type {}".format(type(feature['image_raw'])))
return tf.train.Example(features=tf.train.Features(feature=feature))
def decode_example(serialized_example):
# Create a dictionary describing the features.
image_feature_description = {
'height': tf.io.FixedLenFeature([], tf.int64),
'width': tf.io.FixedLenFeature([], tf.int64),
'label': tf.io.FixedLenFeature([], tf.int64),
'image_raw': tf.io.FixedLenFeature([], tf.string),
}
example = tf.io.parse_single_example(serialized_example, image_feature_description)
return example
def map_example(height, width, image_raw, label):
image_data = np.frombuffer(image_raw.numpy(), dtype=np.dtype('int64'))
image_data = tf.reshape(image_data, [height.numpy(), width.numpy()])
# image_data.set_shape([28,28])
label = tf.constant([label.numpy()], tf.int64)
return image_data, label
def make_dataset(partition):
files = tf.data.Dataset.list_files("images_" + partition + "*.tfrecord")
dataset = tf.data.TFRecordDataset(files)
# dataset = dataset.shuffle(buffer_size=FLAGS.shuffle_buffer_size)
dataset = dataset.map(decode_example)
dataset = dataset.map(lambda x: tf.py_function(func=map_example, inp=[x['height'], x['width'], x['image_raw'], x['label']], Tout=(tf.int64, tf.int64)))
# dataset = dataset.batch(batch_size=FLAGS.batch_size)
return dataset
def write_examples_to_record_file(file_name, x , y):
with tf.io.TFRecordWriter(file_name) as writer:
for i in range(len(x)):
tf_example = encode_example(x[i], y[i])
writer.write(tf_example.SerializeToString())
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train, x_test, y_test = x_train[:100], y_train[:100], x_test[:100], y_test[:100]
x_train, x_test = x_train / 255.0, x_test / 255.0
write_examples_to_record_file('images_train.tfrecord', x_train, y_train)
# write_examples_to_record_file('images_test.tfrecord', x_test, y_test)
train_dataset = make_dataset("train")
# test_dataset = make_dataset("test")
for r in train_dataset.take(1):
print(r[0].shape.as_list())
print(r[1])
# feature_column = [tf.feature_column.numeric_column(key='image', shape=(28,28))]
# feature_layer = tf.keras.layers.DenseFeatures(feature_column)
#
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=[28,28,1]),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(train_dataset, epochs=5)
# model.evaluate(test_dataset, verbose=2)
I get the following error just before it starts training, any idea why it does not work?
2019-11-19 06:31:21.067987: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'cudart64_100.dll'; dlerror: cudart64_100.dll not found
2019-11-19 06:31:23.315270: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'nvcuda.dll'; dlerror: nvcuda.dll not found
2019-11-19 06:31:23.315617: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: UNKNOWN ERROR (303)
2019-11-19 06:31:23.320751: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: szclu-dvcasa027
2019-11-19 06:31:23.321132: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: szclu-dvcasa027
2019-11-19 06:31:23.321927: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
WARNING:tensorflow:Entity <function decode_example at 0x00000250EE94A1F8> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: No module named 'tensorflow_core.estimator'
WARNING:tensorflow:Entity <function make_dataset.<locals>.<lambda> at 0x00000250EE94A438> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: No module named 'tensorflow_core.estimator'
feature shape = [28, 28]
label shape = [1]
Epoch 1/5
Traceback (most recent call last):
File "C:/data/projects/cludeeplearning/train_model.py", line 110, in <module>
model.fit(train_dataset, epochs=5)
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 728, in fit
1/Unknown - 0s 13ms/step
1/Unknown - 0s 13ms/step use_multiprocessing=use_multiprocessing)
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 324, in fit
total_epochs=epochs)
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 123, in run_one_epoch
batch_outs = execution_function(iterator)
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py", line 86, in execution_function
distributed_function(input_fn))
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\eager\def_function.py", line 457, in __call__
result = self._call(*args, **kwds)
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\eager\def_function.py", line 503, in _call
self._initialize(args, kwds, add_initializers_to=initializer_map)
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\eager\def_function.py", line 408, in _initialize
*args, **kwds))
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\eager\function.py", line 1848, in _get_concrete_function_internal_garbage_collected
graph_function, _, _ = self._maybe_define_function(args, kwargs)
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\eager\function.py", line 2150, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\eager\function.py", line 2041, in _create_graph_function
capture_by_value=self._capture_by_value),
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\framework\func_graph.py", line 915, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\eager\def_function.py", line 358, in wrapped_fn
return weak_wrapped_fn().__wrapped__(*args, **kwds)
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py", line 66, in distributed_function
model, input_iterator, mode)
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py", line 112, in _prepare_feed_values
inputs, targets, sample_weights = _get_input_from_iterator(inputs)
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py", line 149, in _get_input_from_iterator
distribution_strategy_context.get_strategy(), x, y, sample_weights)
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\distribute\distributed_training_utils.py", line 308, in validate_distributed_dataset_inputs
x_values_list = validate_per_replica_inputs(distribution_strategy, x)
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\distribute\distributed_training_utils.py", line 356, in validate_per_replica_inputs
validate_all_tensor_shapes(x, x_values)
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\distribute\distributed_training_utils.py", line 373, in validate_all_tensor_shapes
x_shape = x_values[0].shape.as_list()
File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\framework\tensor_shape.py", line 1171, in as_list
raise ValueError("as_list() is not defined on an unknown TensorShape.")
ValueError: as_list() is not defined on an unknown TensorShape.
Process finished with exit code 1
Any help is appreciated. Thank you
It is difficult to say for sure without a self contained example to test, but I think the problem is likely to come from the use of tf.py_func
, which may be missing the shape information of the tensors. You do not need it here though, I think what you can define map_example
and make_dataset
equivalently as this:
def map_example(height, width, image_raw, label):
image_data = tf.io.decode_raw(image_raw, tf.uint8)
image_data = tf.reshape(image_data, [1, height, width])
return image_data, label
def make_dataset(partition):
files = tf.data.Dataset.list_files("images_" + partition + "*.tfrecord")
dataset = tf.data.TFRecordDataset(files)
# dataset = dataset.shuffle(buffer_size=FLAGS.shuffle_buffer_size)
dataset = dataset.map(decode_example)
dataset = dataset.map(
lambda x: map_example(x['height'], x['width'], x['image_raw'], x['label']))
# dataset = dataset.batch(batch_size=FLAGS.batch_size)
return dataset