I'm using Mnist
dataset for testing the Unsupervised
model.
The dataset and output:
test_dataset = tf.data.TFRecordDataset([test_filenames])
test_dataset = test_dataset.map(map_func)
test_dataset = test_dataset.batch(512)
print("test_dataset.map.element_spec: {}".format(test_dataset.element_spec))
test_dataset.map.element_spec: (TensorSpec(shape=(None, 28, 28), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.uint8, name=None))
The first non-unsupervised model is copied from here: https://www.tensorflow.org/tutorials/keras/classification
model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(10)
])
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model.summary()
print("\n test_dataset: {}\n".format(test_dataset))
evaluate_value = model.evaluate(test_dataset)[0]
print("evaluate_value: {}\n".format(evaluate_value))
Here is the output:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten (Flatten) (None, 784) 0
dense (Dense) (None, 128) 100480
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________
test_dataset: <BatchDataset element_spec=(TensorSpec(shape=(None, 28, 28), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.uint8, name=None))>
20/20 [==============================] - 1s 31ms/step - loss: 2.5328 - accuracy: 0.0844
evaluate_value: 2.5328330993652344
The second unsupervised model is copied from here: How to build an unsupervised CNN model with keras/tensorflow?
conv_encoder = tf.keras.models.Sequential([
tf.keras.layers.Reshape([28, 28, 1], input_shape=[28, 28]),
tf.keras.layers.Conv2D(16, kernel_size=3, padding="SAME", activation="selu"),
tf.keras.layers.MaxPool2D(pool_size=2),
tf.keras.layers.Conv2D(32, kernel_size=3, padding="SAME", activation="selu"),
tf.keras.layers.MaxPool2D(pool_size=2),
tf.keras.layers.Conv2D(64, kernel_size=3, padding="SAME", activation="selu"),
tf.keras.layers.MaxPool2D(pool_size=2)
])
conv_decoder = tf.keras.models.Sequential([
tf.keras.layers.Conv2DTranspose(32, kernel_size=3, strides=2, padding="VALID", activation="selu",
input_shape=[3, 3, 64]),
tf.keras.layers.Conv2DTranspose(16, kernel_size=3, strides=2, padding="SAME", activation="selu"),
tf.keras.layers.Conv2DTranspose(1, kernel_size=3, strides=2, padding="SAME", activation="sigmoid"),
tf.keras.layers.Reshape([28, 28])
])
model = tf.keras.models.Sequential([conv_encoder, conv_decoder])
def rounded_accuracy(y_true, y_pred):
return tf.keras.metrics.binary_accuracy(tf.round(y_true), tf.round(y_pred))
model.compile(loss="binary_crossentropy", optimizer=tf.keras.optimizers.SGD(lr=0.1),
metrics=[rounded_accuracy])
model.summary()
print("\n test_dataset: {}\n".format(test_dataset))
evaluate_value = model.evaluate(test_dataset)[0]
print("evaluate_value: {}\n".format(evaluate_value))
Here is the output:
Model: "sequential_3"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
sequential_1 (Sequential) (None, 3, 3, 64) 23296
sequential_2 (Sequential) (None, 28, 28) 23233
=================================================================
Total params: 46,529
Trainable params: 46,529
Non-trainable params: 0
_________________________________________________________________
test_dataset: <BatchDataset element_spec=(TensorSpec(shape=(None, 28, 28), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.uint8, name=None))>
/usr/local/lib/python3.8/site-packages/keras/optimizer_v2/gradient_descent.py:102: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.
super(SGD, self).__init__(name, **kwargs)
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
/tmp/ipykernel_3872/4007225056.py in <cell line: 31>()
29 print("\n test_dataset: {}\n".format(test_dataset))
30
---> 31 evaluate_value = model.evaluate(test_dataset)[0]
32 print("evaluate_value: {}\n".format(evaluate_value))
33
/usr/local/lib/python3.8/site-packages/keras/utils/traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
/usr/local/lib/python3.8/site-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
52 try:
53 ctx.ensure_initialized()
---> 54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
InvalidArgumentError: Graph execution error:
Detected at node 'Equal' defined at (most recent call last):
File "/usr/local/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/local/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.8/site-packages/ipykernel_launcher.py", line 17, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.8/site-packages/traitlets/config/application.py", line 976, in launch_instance
app.start()
File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 712, in start
self.io_loop.start()
File "/usr/local/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 215, in start
self.asyncio_loop.run_forever()
File "/usr/local/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
self._run_once()
File "/usr/local/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
handle._run()
File "/usr/local/lib/python3.8/asyncio/events.py", line 81, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
await self.process_one()
File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 499, in process_one
await dispatch(*args)
File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
await result
File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
reply_content = await reply_content
File "/usr/local/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
res = shell.run_cell(
File "/usr/local/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
return super().run_cell(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2975, in run_cell
result = self._run_cell(
File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell
return runner(coro)
File "/usr/local/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner
coro.send(None)
File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes
if (await self.run_code(code, result, async_=asy)):
File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "/tmp/ipykernel_3872/4007225056.py", line 31, in <cell line: 31>
evaluate_value = model.evaluate(test_dataset)[0]
File "/usr/local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1716, in evaluate
tmp_logs = self.test_function(iterator)
File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1525, in test_function
return step_function(self, iterator)
File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1514, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1507, in run_step
outputs = model.test_step(data)
File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1474, in test_step
return self.compute_metrics(x, y, y_pred, sample_weight)
File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 957, in compute_metrics
self.compiled_metrics.update_state(y, y_pred, sample_weight)
File "/usr/local/lib/python3.8/site-packages/keras/engine/compile_utils.py", line 459, in update_state
metric_obj.update_state(y_t, y_p, sample_weight=mask)
File "/usr/local/lib/python3.8/site-packages/keras/utils/metrics_utils.py", line 70, in decorated
update_op = update_state_fn(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/keras/metrics.py", line 178, in update_state_fn
return ag_update_state(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/keras/metrics.py", line 729, in update_state
matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/tmp/ipykernel_3872/4007225056.py", line 22, in rounded_accuracy
return tf.keras.metrics.binary_accuracy(tf.round(y_true), tf.round(y_pred))
File "/usr/local/lib/python3.8/site-packages/keras/metrics.py", line 4016, in binary_accuracy
return backend.mean(tf.equal(y_true, y_pred), axis=-1)
Node: 'Equal'
Incompatible shapes: [512,28,28] vs. [512,1]
[[{{node Equal}}]] [Op:__inference_test_function_1318]
And then, when I changed the batch
from 512
to 1
:
test_dataset = test_dataset.batch(1)
The unsupervised
model works
and here is the output for batch (1):
Model: "sequential_3"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
sequential_1 (Sequential) (None, 3, 3, 64) 23296
sequential_2 (Sequential) (None, 28, 28) 23233
=================================================================
Total params: 46,529
Trainable params: 46,529
Non-trainable params: 0
_________________________________________________________________
test_dataset: <BatchDataset element_spec=(TensorSpec(shape=(None, 28, 28), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.uint8, name=None))>
/usr/local/lib/python3.8/site-packages/keras/optimizer_v2/gradient_descent.py:102: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.
super(SGD, self).__init__(name, **kwargs)
10000/10000 [==============================] - 17s 2ms/step - loss: 0.6837 - rounded_accuracy: 0.1055
evaluate_value: 0.6837235689163208
Is it possible for setting the batch
to 512
instead of 1
for the above unsupervsed
model?
How to do?
What's wrong in the above second model?
Based on @AloneTogether
's answer, added the evaluate_value = model.evaluate(test_dataset.map(lambda x, y: (x, x)))[0]
can solve the model.evaluate
problem, but it can't solve the model.fit
problem as shown here:
history = model.fit(
test_dataset,
steps_per_epoch=100,
validation_data=test_dataset,
epochs=1,
verbose=1,
callbacks=[]
)
Here is the error output for model.fit for the unsupervised model:
Model: "sequential_6"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
sequential_4 (Sequential) (None, 3, 3, 64) 23296
sequential_5 (Sequential) (None, 28, 28) 23233
=================================================================
Total params: 46,529
Trainable params: 46,529
Non-trainable params: 0
_________________________________________________________________
test_dataset: <BatchDataset element_spec=(TensorSpec(shape=(None, 28, 28), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.uint8, name=None))>
20/20 [==============================] - 1s 49ms/step - loss: 0.6951 - rounded_accuracy: 0.4664
evaluate_value: 0.6951028108596802
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
/tmp/ipykernel_4342/180821372.py in <cell line: 34>()
32 print("evaluate_value: {}\n".format(evaluate_value))
33
---> 34 history = model.fit(
35 test_dataset,
36 steps_per_epoch=100,
/usr/local/lib/python3.8/site-packages/keras/utils/traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
/usr/local/lib/python3.8/site-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
52 try:
53 ctx.ensure_initialized()
---> 54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
InvalidArgumentError: Graph execution error:
Detected at node 'gradient_tape/binary_crossentropy/mul/BroadcastGradientArgs' defined at (most recent call last):
File "/usr/local/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/local/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.8/site-packages/ipykernel_launcher.py", line 17, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.8/site-packages/traitlets/config/application.py", line 976, in launch_instance
app.start()
File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 712, in start
self.io_loop.start()
File "/usr/local/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 215, in start
self.asyncio_loop.run_forever()
File "/usr/local/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
self._run_once()
File "/usr/local/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
handle._run()
File "/usr/local/lib/python3.8/asyncio/events.py", line 81, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
await self.process_one()
File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 499, in process_one
await dispatch(*args)
File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
await result
File "/usr/local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
reply_content = await reply_content
File "/usr/local/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
res = shell.run_cell(
File "/usr/local/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
return super().run_cell(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2975, in run_cell
result = self._run_cell(
File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell
return runner(coro)
File "/usr/local/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner
coro.send(None)
File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes
if (await self.run_code(code, result, async_=asy)):
File "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "/tmp/ipykernel_4342/180821372.py", line 34, in <cell line: 34>
history = model.fit(
File "/usr/local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1384, in fit
tmp_logs = self.train_function(iterator)
File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1021, in train_function
return step_function(self, iterator)
File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1010, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 1000, in run_step
outputs = model.train_step(data)
File "/usr/local/lib/python3.8/site-packages/keras/engine/training.py", line 863, in train_step
self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
File "/usr/local/lib/python3.8/site-packages/keras/optimizer_v2/optimizer_v2.py", line 530, in minimize
grads_and_vars = self._compute_gradients(
File "/usr/local/lib/python3.8/site-packages/keras/optimizer_v2/optimizer_v2.py", line 583, in _compute_gradients
grads_and_vars = self._get_gradients(tape, loss, var_list, grad_loss)
File "/usr/local/lib/python3.8/site-packages/keras/optimizer_v2/optimizer_v2.py", line 464, in _get_gradients
grads = tape.gradient(loss, var_list, grad_loss)
Node: 'gradient_tape/binary_crossentropy/mul/BroadcastGradientArgs'
Incompatible shapes: [512,1] vs. [512,28,28]
[[{{node gradient_tape/binary_crossentropy/mul/BroadcastGradientArgs}}]] [Op:__inference_train_function_3043]
I think you should try something like this, since your second model is a kind of autoencoder:
evaluate_value = model.evaluate(test_dataset.map(lambda x, y: (x, x)))[0]
Also, your error is coming from your metric actually since a batch size of 1 is broadcastable but 512 is not.
This works:
import tensorflow as tf
y_true = tf.random.normal((1, 1))
y_pred = tf.random.normal((1, 28, 28))
tf.keras.metrics.binary_accuracy(tf.round(y_true), tf.round(y_pred))
This does not:
import tensorflow as tf
y_true = tf.random.normal((512, 1))
y_pred = tf.random.normal((512, 28, 28))
tf.keras.metrics.binary_accuracy(tf.round(y_true), tf.round(y_pred))
Update 1
For model.fit
, try:
test_dataset = test_dataset.map(lambda x, y: (x, x))
history = model.fit(
test_dataset,
steps_per_epoch=100,
validation_data=test_dataset,
epochs=1,
verbose=1,
callbacks=[]
)