Search code examples
pythontensorflowkerasunsupervised-learningsupervised-learning

Batch 512 in TFRecordDataset does not work on Unsupervised model


I'm using Mnist dataset for testing the Unsupervised model.

The dataset and output:

test_dataset = tf.data.TFRecordDataset([test_filenames])
test_dataset = test_dataset.map(map_func)
test_dataset = test_dataset.batch(512)
print("test_dataset.map.element_spec: {}".format(test_dataset.element_spec))

test_dataset.map.element_spec: (TensorSpec(shape=(None, 28, 28), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.uint8, name=None))

The first non-unsupervised model is copied from here: https://www.tensorflow.org/tutorials/keras/classification

model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(10)
])

model.compile(optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'])

model.summary()

print("\n test_dataset: {}\n".format(test_dataset))

evaluate_value = model.evaluate(test_dataset)[0]
print("evaluate_value: {}\n".format(evaluate_value))    

Here is the output:

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 128)               100480    
                                                                 
 dense_1 (Dense)             (None, 10)                1290      
                                                                 
=================================================================
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________

 test_dataset: <BatchDataset element_spec=(TensorSpec(shape=(None, 28, 28), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.uint8, name=None))>

20/20 [==============================] - 1s 31ms/step - loss: 2.5328 - accuracy: 0.0844
evaluate_value: 2.5328330993652344

The second unsupervised model is copied from here: How to build an unsupervised CNN model with keras/tensorflow?

conv_encoder = tf.keras.models.Sequential([
    tf.keras.layers.Reshape([28, 28, 1], input_shape=[28, 28]),
    tf.keras.layers.Conv2D(16, kernel_size=3, padding="SAME", activation="selu"),
    tf.keras.layers.MaxPool2D(pool_size=2),
    tf.keras.layers.Conv2D(32, kernel_size=3, padding="SAME", activation="selu"),
    tf.keras.layers.MaxPool2D(pool_size=2),
    tf.keras.layers.Conv2D(64, kernel_size=3, padding="SAME", activation="selu"),
    tf.keras.layers.MaxPool2D(pool_size=2)
])

conv_decoder = tf.keras.models.Sequential([
    tf.keras.layers.Conv2DTranspose(32, kernel_size=3, strides=2, padding="VALID", activation="selu",
                                    input_shape=[3, 3, 64]),
    tf.keras.layers.Conv2DTranspose(16, kernel_size=3, strides=2, padding="SAME", activation="selu"),
    tf.keras.layers.Conv2DTranspose(1, kernel_size=3, strides=2, padding="SAME", activation="sigmoid"),
    tf.keras.layers.Reshape([28, 28])
])

model = tf.keras.models.Sequential([conv_encoder, conv_decoder])

def rounded_accuracy(y_true, y_pred):
    return tf.keras.metrics.binary_accuracy(tf.round(y_true), tf.round(y_pred))

model.compile(loss="binary_crossentropy", optimizer=tf.keras.optimizers.SGD(lr=0.1),
        metrics=[rounded_accuracy])

model.summary()

print("\n test_dataset: {}\n".format(test_dataset))

evaluate_value = model.evaluate(test_dataset)[0]
print("evaluate_value: {}\n".format(evaluate_value))    

Here is the output:

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 sequential_1 (Sequential)   (None, 3, 3, 64)          23296     
                                                                 
 sequential_2 (Sequential)   (None, 28, 28)            23233     
                                                                 
=================================================================
Total params: 46,529
Trainable params: 46,529
Non-trainable params: 0
_________________________________________________________________

 test_dataset: <BatchDataset element_spec=(TensorSpec(shape=(None, 28, 28), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.uint8, name=None))>

/usr/local/lib/python3.8/site-packages/keras/optimizer_v2/gradient_descent.py:102: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.
  super(SGD, self).__init__(name, **kwargs)

---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
/tmp/ipykernel_3872/4007225056.py in <cell line: 31>()
     29 print("\n test_dataset: {}\n".format(test_dataset))
     30 
---> 31 evaluate_value = model.evaluate(test_dataset)[0]
     32 print("evaluate_value: {}\n".format(evaluate_value))
     33 

/usr/local/lib/python3.8/site-packages/keras/utils/traceback_utils.py in error_handler(*args, **kwargs)
     65     except Exception as e:  # pylint: disable=broad-except
     66       filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67       raise e.with_traceback(filtered_tb) from None
     68     finally:
     69       del filtered_tb

/usr/local/lib/python3.8/site-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     52   try:
     53     ctx.ensure_initialized()
---> 54     tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
     55                                         inputs, attrs, num_outputs)
     56   except core._NotOkStatusException as e:

InvalidArgumentError: Graph execution error:

Detected at node 'Equal' defined at (most recent call last):
    File "/usr/local/lib/python3.8/runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/usr/local/lib/python3.8/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/usr/local/lib/python3.8/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/usr/local/lib/python3.8/site-packages/traitlets/config/application.py", line 976, in launch_instance
      app.start()
    File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 712, in start
      self.io_loop.start()
    File "/usr/local/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/usr/local/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/usr/local/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/usr/local/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
      await result
    File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "/usr/local/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "/usr/local/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2975, in run_cell
      result = self._run_cell(
    File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell
      return runner(coro)
    File "/usr/local/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner
      coro.send(None)
    File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_3872/4007225056.py", line 31, in <cell line: 31>
      evaluate_value = model.evaluate(test_dataset)[0]
    File "/usr/local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1716, in evaluate
      tmp_logs = self.test_function(iterator)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1525, in test_function
      return step_function(self, iterator)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1514, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1507, in run_step
      outputs = model.test_step(data)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1474, in test_step
      return self.compute_metrics(x, y, y_pred, sample_weight)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 957, in compute_metrics
      self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/compile_utils.py", line 459, in update_state
      metric_obj.update_state(y_t, y_p, sample_weight=mask)
    File "/usr/local/lib/python3.8/site-packages/keras/utils/metrics_utils.py", line 70, in decorated
      update_op = update_state_fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/site-packages/keras/metrics.py", line 178, in update_state_fn
      return ag_update_state(*args, **kwargs)
    File "/usr/local/lib/python3.8/site-packages/keras/metrics.py", line 729, in update_state
      matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/tmp/ipykernel_3872/4007225056.py", line 22, in rounded_accuracy
      return tf.keras.metrics.binary_accuracy(tf.round(y_true), tf.round(y_pred))
    File "/usr/local/lib/python3.8/site-packages/keras/metrics.py", line 4016, in binary_accuracy
      return backend.mean(tf.equal(y_true, y_pred), axis=-1)
Node: 'Equal'
Incompatible shapes: [512,28,28] vs. [512,1]
     [[{{node Equal}}]] [Op:__inference_test_function_1318]

And then, when I changed the batch from 512 to 1:

test_dataset = test_dataset.batch(1)

The unsupervised model works and here is the output for batch (1):

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 sequential_1 (Sequential)   (None, 3, 3, 64)          23296     
                                                                 
 sequential_2 (Sequential)   (None, 28, 28)            23233     
                                                                 
=================================================================
Total params: 46,529
Trainable params: 46,529
Non-trainable params: 0
_________________________________________________________________

 test_dataset: <BatchDataset element_spec=(TensorSpec(shape=(None, 28, 28), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.uint8, name=None))>

/usr/local/lib/python3.8/site-packages/keras/optimizer_v2/gradient_descent.py:102: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.
  super(SGD, self).__init__(name, **kwargs)

10000/10000 [==============================] - 17s 2ms/step - loss: 0.6837 - rounded_accuracy: 0.1055
evaluate_value: 0.6837235689163208

Is it possible for setting the batch to 512 instead of 1 for the above unsupervsed model?

How to do?

What's wrong in the above second model?

Based on @AloneTogether's answer, added the evaluate_value = model.evaluate(test_dataset.map(lambda x, y: (x, x)))[0] can solve the model.evaluate problem, but it can't solve the model.fit problem as shown here:

history = model.fit(
    test_dataset,
    steps_per_epoch=100,
    validation_data=test_dataset,
    epochs=1,
    verbose=1,
    callbacks=[]
)

Here is the error output for model.fit for the unsupervised model:

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 sequential_4 (Sequential)   (None, 3, 3, 64)          23296     
                                                                 
 sequential_5 (Sequential)   (None, 28, 28)            23233     
                                                                 
=================================================================
Total params: 46,529
Trainable params: 46,529
Non-trainable params: 0
_________________________________________________________________

 test_dataset: <BatchDataset element_spec=(TensorSpec(shape=(None, 28, 28), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.uint8, name=None))>

20/20 [==============================] - 1s 49ms/step - loss: 0.6951 - rounded_accuracy: 0.4664
evaluate_value: 0.6951028108596802

---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
/tmp/ipykernel_4342/180821372.py in <cell line: 34>()
     32 print("evaluate_value: {}\n".format(evaluate_value))
     33 
---> 34 history = model.fit(
     35     test_dataset,
     36     steps_per_epoch=100,

/usr/local/lib/python3.8/site-packages/keras/utils/traceback_utils.py in error_handler(*args, **kwargs)
     65     except Exception as e:  # pylint: disable=broad-except
     66       filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67       raise e.with_traceback(filtered_tb) from None
     68     finally:
     69       del filtered_tb

/usr/local/lib/python3.8/site-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     52   try:
     53     ctx.ensure_initialized()
---> 54     tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
     55                                         inputs, attrs, num_outputs)
     56   except core._NotOkStatusException as e:

InvalidArgumentError: Graph execution error:

Detected at node 'gradient_tape/binary_crossentropy/mul/BroadcastGradientArgs' defined at (most recent call last):
    File "/usr/local/lib/python3.8/runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/usr/local/lib/python3.8/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/usr/local/lib/python3.8/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/usr/local/lib/python3.8/site-packages/traitlets/config/application.py", line 976, in launch_instance
      app.start()
    File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 712, in start
      self.io_loop.start()
    File "/usr/local/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/usr/local/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/usr/local/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/usr/local/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
      await result
    File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "/usr/local/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "/usr/local/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2975, in run_cell
      result = self._run_cell(
    File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell
      return runner(coro)
    File "/usr/local/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner
      coro.send(None)
    File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_4342/180821372.py", line 34, in <cell line: 34>
      history = model.fit(
    File "/usr/local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1384, in fit
      tmp_logs = self.train_function(iterator)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1021, in train_function
      return step_function(self, iterator)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1010, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1000, in run_step
      outputs = model.train_step(data)
    File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 863, in train_step
      self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    File "/usr/local/lib/python3.8/site-packages/keras/optimizer_v2/optimizer_v2.py", line 530, in minimize
      grads_and_vars = self._compute_gradients(
    File "/usr/local/lib/python3.8/site-packages/keras/optimizer_v2/optimizer_v2.py", line 583, in _compute_gradients
      grads_and_vars = self._get_gradients(tape, loss, var_list, grad_loss)
    File "/usr/local/lib/python3.8/site-packages/keras/optimizer_v2/optimizer_v2.py", line 464, in _get_gradients
      grads = tape.gradient(loss, var_list, grad_loss)
Node: 'gradient_tape/binary_crossentropy/mul/BroadcastGradientArgs'
Incompatible shapes: [512,1] vs. [512,28,28]
     [[{{node gradient_tape/binary_crossentropy/mul/BroadcastGradientArgs}}]] [Op:__inference_train_function_3043]

Solution

  • I think you should try something like this, since your second model is a kind of autoencoder:

    evaluate_value = model.evaluate(test_dataset.map(lambda x, y: (x, x)))[0]
    

    Also, your error is coming from your metric actually since a batch size of 1 is broadcastable but 512 is not.

    This works:

    import tensorflow as tf
    y_true = tf.random.normal((1, 1))
    y_pred = tf.random.normal((1, 28, 28))
    tf.keras.metrics.binary_accuracy(tf.round(y_true), tf.round(y_pred))
    

    This does not:

    import tensorflow as tf
    y_true = tf.random.normal((512, 1))
    y_pred = tf.random.normal((512, 28, 28))
    tf.keras.metrics.binary_accuracy(tf.round(y_true), tf.round(y_pred))
    

    Update 1

    For model.fit, try:

    test_dataset =  test_dataset.map(lambda x, y: (x, x))
    history = model.fit(
        test_dataset,
        steps_per_epoch=100,
        validation_data=test_dataset,
        epochs=1,
        verbose=1,
        callbacks=[]
    )