Search code examples
pythontensorflowmachine-learningtensorflow-datasetstfrecord

as_list() is not defined on an unknown TensorShape


**Update After implementing @jdehesa answer: My code looks like this:

from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf

import numpy as np
import matplotlib.pyplot as plt
import os
import PIL as pil
from tensorflow import feature_column
from tensorflow_core.python.platform.flags import FLAGS

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'


def print_type(name , x):
    print(" {} type = {}".format(name, type(x)))


def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def _float_feature(value):
    if not isinstance(value, np.ndarray):
        value = [value]
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))


def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    if not isinstance(value, np.ndarray):
        value = [value]
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))


def encode_example(arr, label):
    shape = arr.shape
    feature = {
        'height': _int64_feature(shape[0]),
        'width': _int64_feature(shape[1]),
        'label': _int64_feature(label),
        'image_raw': _bytes_feature(arr.flatten().tobytes()),
    }
    # print("Encoding type {}".format(type(feature['image_raw'])))
    return tf.train.Example(features=tf.train.Features(feature=feature))


def decode_example(serialized_example):
    # Create a dictionary describing the features.
    image_feature_description = {
        'height': tf.io.FixedLenFeature([], tf.int64),
        'width': tf.io.FixedLenFeature([], tf.int64),
        'label': tf.io.FixedLenFeature([], tf.int64),
        'image_raw': tf.io.FixedLenFeature([], tf.string),
    }
    example = tf.io.parse_single_example(serialized_example, image_feature_description)
    return example


def map_example(height, width, image_raw, label):
    # Assumes little endian decoding, pass little_endian=False for big endian
    image_data = tf.io.decode_raw(image_raw, tf.uint8)
    image_data = tf.reshape(image_data, [height, width])
    return image_data, label



def make_dataset(partition):
    files = tf.data.Dataset.list_files("images_" + partition + "*.tfrecord")
    dataset = tf.data.TFRecordDataset(files)
    # dataset = dataset.shuffle(buffer_size=FLAGS.shuffle_buffer_size)
    dataset = dataset.map(decode_example)
    dataset = dataset.map(
        lambda x: map_example(x['height'], x['width'], x['image_raw'], x['label']))
    # dataset = dataset.batch(batch_size=FLAGS.batch_size)
    return dataset



def write_examples_to_record_file(file_name, x , y):
    with tf.io.TFRecordWriter(file_name) as writer:
        for i in range(len(x)):
            tf_example = encode_example(x[i], y[i])
            writer.write(tf_example.SerializeToString())


mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train, x_test, y_test = x_train[:100], y_train[:100], x_test[:100], y_test[:100]
# x_train, x_test = x_train.astype(np.int8), x_test.astype(np.int8)

write_examples_to_record_file('images_train.tfrecord', x_train, y_train)
# write_examples_to_record_file('images_test.tfrecord', x_test, y_test)
train_dataset = make_dataset("train")
# test_dataset = make_dataset("test")

it = iter(train_dataset)
r = next(it)
print_type("r",r)
(x,y) = r
print_type("X", x)
print_type("Y", y)
print("x is" , x)
print("y is" , y)
print("x shape is" , x.shape())
print("y shape is" , y.shape())
# print(next(it))

# for r,label in next(it):
#     print(repr(r))
#     print("feature shape = {}".format(r.shape.as_list()))
#     print("label shape = {}".format(label.shape.as_list()))


# feature_column = [tf.feature_column.numeric_column(key='image', shape=(28,28))]
# feature_layer = tf.keras.layers.DenseFeatures(feature_column)
#

# it = iter(train_dataset)
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=[28,28]),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(train_dataset,  epochs=5)
# model.evaluate(test_dataset, verbose=2)

But now the error is different and I think the is the reason for the original question error, that there is a problem getting the shape of the x,y components of the next() tuple:

2019-11-19 11:11:54.540221: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'cudart64_100.dll'; dlerror: cudart64_100.dll not found
2019-11-19 11:11:56.763955: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'nvcuda.dll'; dlerror: nvcuda.dll not found
2019-11-19 11:11:56.764410: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: UNKNOWN ERROR (303)
2019-11-19 11:11:56.767167: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: szclu-dvcasa027
2019-11-19 11:11:56.767572: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: szclu-dvcasa027
2019-11-19 11:11:56.768026: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
WARNING:tensorflow:Entity <function decode_example at 0x000002A67F5EE438> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: No module named 'tensorflow_core.estimator'
WARNING:tensorflow:Entity <function make_dataset.<locals>.<lambda> at 0x000002A67F61C0D8> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: No module named 'tensorflow_core.estimator'
 r type = <class 'tuple'>
 X type = <class 'tensorflow.python.framework.ops.EagerTensor'>
 Y type = <class 'tensorflow.python.framework.ops.EagerTensor'>
x is tf.Tensor(
[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   3  18  18  18 126 136
  175  26 166 255 247 127   0   0   0   0]
 [  0   0   0   0   0   0   0   0  30  36  94 154 170 253 253 253 253 253
  225 172 253 242 195  64   0   0   0   0]
 [  0   0   0   0   0   0   0  49 238 253 253 253 253 253 253 253 253 251
   93  82  82  56  39   0   0   0   0   0]
 [  0   0   0   0   0   0   0  18 219 253 253 253 253 253 198 182 247 241
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0  80 156 107 253 253 205  11   0  43 154
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0  14   1 154 253  90   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0 139 253 190   2   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0  11 190 253  70   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0  35 241 225 160 108   1
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0  81 240 253 253 119
   25   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0  45 186 253 253
  150  27   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  16  93 252
  253 187   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0 249
  253 249  64   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0  46 130 183 253
  253 207   2   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0  39 148 229 253 253 253
  250 182   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0  24 114 221 253 253 253 253 201
   78   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0  23  66 213 253 253 253 253 198  81   2
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0  18 171 219 253 253 253 253 195  80   9   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0  55 172 226 253 253 253 253 244 133  11   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0 136 253 253 253 212 135 132  16   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]], shape=(28, 28), dtype=uint8)
y is tf.Tensor(5, shape=(), dtype=int64)
Traceback (most recent call last):
  File "<input>", line 1, in <module>
  File "C:\Users\me\.IntelliJIdea2019.3\config\plugins\python\helpers\pydev\_pydev_bundle\pydev_umd.py", line 197, in runfile
    pydev_imports.execfile(filename, global_vars, local_vars)  # execute the script
  File "C:\Users\me\.IntelliJIdea2019.3\config\plugins\python\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
    exec(compile(contents+"\n", file, 'exec'), glob, loc)
  File "C:/data/projects/cludeeplearning/train_model_so.py", line 108, in <module>
    print("x shape is" , x.shape())
TypeError: 'TensorShape' object is not callable

============================================================================= I am trying to experiment with tfrecords because eventually we will have lots of data stored in multiple files that may not fit in memory.

The basic idea is for this POC is to load data from mnist dataset, save it in tfrecrod files as tf.example, load it from tfrecord using tfrecorddataset and train a model.

Here's my code sample:

from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf

import numpy as np
import matplotlib.pyplot as plt
import os
import PIL as pil
from tensorflow import feature_column
from tensorflow_core.python.platform.flags import FLAGS

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'


def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def _float_feature(value):
    if not isinstance(value, np.ndarray):
        value = [value]
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))


def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    if not isinstance(value, np.ndarray):
        value = [value]
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))


def encode_example(arr, label):
    shape = arr.shape

    feature = {
        'height': _int64_feature(shape[0]),
        'width': _int64_feature(shape[1]),
        'label': _int64_feature(label),
        'image_raw': _bytes_feature(arr.flatten().tobytes()),
    }
    # print("Encoding type {}".format(type(feature['image_raw'])))
    return tf.train.Example(features=tf.train.Features(feature=feature))


def decode_example(serialized_example):
    # Create a dictionary describing the features.
    image_feature_description = {
        'height': tf.io.FixedLenFeature([], tf.int64),
        'width': tf.io.FixedLenFeature([], tf.int64),
        'label': tf.io.FixedLenFeature([], tf.int64),
        'image_raw': tf.io.FixedLenFeature([], tf.string),
    }
    example = tf.io.parse_single_example(serialized_example, image_feature_description)
    return example


def map_example(height, width, image_raw, label):
    image_data = np.frombuffer(image_raw.numpy(), dtype=np.dtype('int64'))
    image_data = tf.reshape(image_data, [height.numpy(), width.numpy()])
    # image_data.set_shape([28,28])
    label = tf.constant([label.numpy()], tf.int64)
    return image_data, label


def make_dataset(partition):
    files = tf.data.Dataset.list_files("images_" + partition + "*.tfrecord")
    dataset = tf.data.TFRecordDataset(files)
    # dataset = dataset.shuffle(buffer_size=FLAGS.shuffle_buffer_size)
    dataset = dataset.map(decode_example)
    dataset = dataset.map(lambda x: tf.py_function(func=map_example, inp=[x['height'], x['width'], x['image_raw'], x['label']], Tout=(tf.int64, tf.int64)))
    # dataset = dataset.batch(batch_size=FLAGS.batch_size)
    return dataset


def write_examples_to_record_file(file_name, x , y):
    with tf.io.TFRecordWriter(file_name) as writer:
        for i in range(len(x)):
            tf_example = encode_example(x[i], y[i])
            writer.write(tf_example.SerializeToString())


mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train, x_test, y_test = x_train[:100], y_train[:100], x_test[:100], y_test[:100]
x_train, x_test = x_train / 255.0, x_test / 255.0
write_examples_to_record_file('images_train.tfrecord', x_train, y_train)
# write_examples_to_record_file('images_test.tfrecord', x_test, y_test)
train_dataset = make_dataset("train")
# test_dataset = make_dataset("test")

for r in train_dataset.take(1):
    print(r[0].shape.as_list())
    print(r[1])


# feature_column = [tf.feature_column.numeric_column(key='image', shape=(28,28))]
# feature_layer = tf.keras.layers.DenseFeatures(feature_column)
#
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=[28,28,1]),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(train_dataset,  epochs=5)
# model.evaluate(test_dataset, verbose=2)

I get the following error just before it starts training, any idea why it does not work?

2019-11-19 06:31:21.067987: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'cudart64_100.dll'; dlerror: cudart64_100.dll not found
2019-11-19 06:31:23.315270: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'nvcuda.dll'; dlerror: nvcuda.dll not found
2019-11-19 06:31:23.315617: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: UNKNOWN ERROR (303)
2019-11-19 06:31:23.320751: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: szclu-dvcasa027
2019-11-19 06:31:23.321132: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: szclu-dvcasa027
2019-11-19 06:31:23.321927: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
WARNING:tensorflow:Entity <function decode_example at 0x00000250EE94A1F8> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: No module named 'tensorflow_core.estimator'
WARNING:tensorflow:Entity <function make_dataset.<locals>.<lambda> at 0x00000250EE94A438> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: No module named 'tensorflow_core.estimator'
feature shape = [28, 28]
label shape = [1]
Epoch 1/5
Traceback (most recent call last):
  File "C:/data/projects/cludeeplearning/train_model.py", line 110, in <module>
    model.fit(train_dataset,  epochs=5)
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 728, in fit

      1/Unknown - 0s 13ms/step
      1/Unknown - 0s 13ms/step    use_multiprocessing=use_multiprocessing)
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 324, in fit
    total_epochs=epochs)
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 123, in run_one_epoch
    batch_outs = execution_function(iterator)
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py", line 86, in execution_function
    distributed_function(input_fn))
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\eager\def_function.py", line 457, in __call__
    result = self._call(*args, **kwds)
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\eager\def_function.py", line 503, in _call
    self._initialize(args, kwds, add_initializers_to=initializer_map)
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\eager\def_function.py", line 408, in _initialize
    *args, **kwds))
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\eager\function.py", line 1848, in _get_concrete_function_internal_garbage_collected
    graph_function, _, _ = self._maybe_define_function(args, kwargs)
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\eager\function.py", line 2150, in _maybe_define_function
    graph_function = self._create_graph_function(args, kwargs)
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\eager\function.py", line 2041, in _create_graph_function
    capture_by_value=self._capture_by_value),
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\framework\func_graph.py", line 915, in func_graph_from_py_func
    func_outputs = python_func(*func_args, **func_kwargs)
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\eager\def_function.py", line 358, in wrapped_fn
    return weak_wrapped_fn().__wrapped__(*args, **kwds)
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py", line 66, in distributed_function
    model, input_iterator, mode)
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py", line 112, in _prepare_feed_values
    inputs, targets, sample_weights = _get_input_from_iterator(inputs)
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py", line 149, in _get_input_from_iterator
    distribution_strategy_context.get_strategy(), x, y, sample_weights)
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\distribute\distributed_training_utils.py", line 308, in validate_distributed_dataset_inputs
    x_values_list = validate_per_replica_inputs(distribution_strategy, x)
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\distribute\distributed_training_utils.py", line 356, in validate_per_replica_inputs
    validate_all_tensor_shapes(x, x_values)
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\keras\distribute\distributed_training_utils.py", line 373, in validate_all_tensor_shapes
    x_shape = x_values[0].shape.as_list()
  File "C:\Users\me\AppData\Local\Continuum\anaconda3\envs\deeplearning\lib\site-packages\tensorflow_core\python\framework\tensor_shape.py", line 1171, in as_list
    raise ValueError("as_list() is not defined on an unknown TensorShape.")
ValueError: as_list() is not defined on an unknown TensorShape.

Process finished with exit code 1

Any help is appreciated. Thank you


Solution

  • It is difficult to say for sure without a self contained example to test, but I think the problem is likely to come from the use of tf.py_func, which may be missing the shape information of the tensors. You do not need it here though, I think what you can define map_example and make_dataset equivalently as this:

    def map_example(height, width, image_raw, label):
        image_data = tf.io.decode_raw(image_raw, tf.uint8)
        image_data = tf.reshape(image_data, [1, height, width])
        return image_data, label
    
    def make_dataset(partition):
        files = tf.data.Dataset.list_files("images_" + partition + "*.tfrecord")
        dataset = tf.data.TFRecordDataset(files)
        # dataset = dataset.shuffle(buffer_size=FLAGS.shuffle_buffer_size)
        dataset = dataset.map(decode_example)
        dataset = dataset.map(
            lambda x: map_example(x['height'], x['width'], x['image_raw'], x['label']))
        # dataset = dataset.batch(batch_size=FLAGS.batch_size)
        return dataset