I built a multi-output and multi-class classification model, and to save memory, I used Sparse Categorical Cross entropy. But got the error that 'labels.shape' must equal 'logits.shape'
.
Here is a simple example I built.
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train / 255.0
x_test = x_test / 255.0
x_train = x_train.reshape(-1, 784)
x_test = x_test.reshape(-1, 784)
def get_gcd(i):
if i == 0:
return 0
elif i == 1:
return 1
else:
for j, k in enumerate([2, 3, 5, 7]):
if i % k == 0:
return j + 2
y_train_gcd = np.array([get_gcd(i) for i in y_train])
y_test_gcd = np.array([get_gcd(i) for i in y_test])
# # There will be no errors when using this code
# enc = OneHotEncoder()
# enc.fit(np.arange(10).reshape(-1, 1))
# y_train = enc.transform(y_train.reshape(-1, 1)).toarray()
# y_test = enc.transform(y_test.reshape(-1, 1)).toarray()
# enc.fit(np.arange(6).reshape(-1, 1))
# y_train_gcd = enc.transform(y_train_gcd.reshape(-1, 1)).toarray()
# y_test_gcd = enc.transform(y_test_gcd.reshape(-1, 1)).toarray()
y_train = [y_train, y_train_gcd]
y_test = [y_test, y_test_gcd]
inp = tf.keras.Input(shape=(784,))
x = tf.keras.layers.Dense(128, activation='relu')(inp)
output_y = tf.keras.layers.Dense(10, name='y')(x)
output_gcd = tf.keras.layers.Dense(6, name='gcd')(x)
model = tf.keras.Model(inputs=inp, outputs=[output_y, output_gcd])
model.compile(
optimizer='adam',
# # When using one-hot encoding, use this loss function
# loss={
# 'y': tf.keras.losses.CategoricalCrossentropy(from_logits=True),
# 'gcd': tf.keras.losses.CategoricalCrossentropy(from_logits=True)
# },
loss={
'y': tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
'gcd': tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
},
metrics={
'y': 'accuracy',
'gcd': 'F1Score'
}
)
model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test))
And this is the error I get.
Traceback (most recent call last):
File "c:\Users\VivoBook\Desktop\aigame\ab.py", line 57, in <module>
model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\Users\VivoBook\AppData\Local\Temp\__autograph_generated_filei9vwg64q.py", line 15, in tf__train_function
retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
^^^^^
ValueError: in user code:
File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\engine\training.py", line 1377, in train_function *
return step_function(self, iterator)
File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\engine\training.py", line 1360, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\engine\training.py", line 1349, in run_step **
outputs = model.train_step(data)
File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\engine\training.py", line 1131, in train_step
return self.compute_metrics(x, y, y_pred, sample_weight)
File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\engine\training.py", line 1225, in compute_metrics
self.compiled_metrics.update_state(y, y_pred, sample_weight)
File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\engine\compile_utils.py", line 620, in update_state
metric_obj.update_state(y_t, y_p, sample_weight=mask)
File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\utils\metrics_utils.py", line 77, in decorated
result = update_state_fn(*args, **kwargs)
File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\metrics\base_metric.py", line 140, in update_state_fn
return ag_update_state(*args, **kwargs)
File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\metrics\f_score_metrics.py", line 176, in update_state **
y_true = tf.convert_to_tensor(y_true, dtype=self.dtype)
ValueError: Tensor conversion requested dtype float32 for Tensor with dtype int32: <tf.Tensor 'ExpandDims_3:0' shape=(32, 1) dtype=int32>
I have found that when I One-Hot Encode the tag, no errors will occur, but to save memory I have to use Sparse Categorical Cross entropy.
You're getting this error because your get_gcd
function returns integers, and labels must be float. Just put a .
in your function and it will return floats.
def get_gcd(i):
if i == 0:
return 0.
elif i == 1:
return 1.
else:
for j, k in enumerate([2, 3, 5, 7]):
if i % k == 0:
return j + 2.
I wouldn't recommend you use F1Score
as a training metric.
Full reduced working example:
import numpy as np
import tensorflow as tf
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train / 255.0
x_test = x_test / 255.0
x_train = x_train.reshape(-1, 784)
x_test = x_test.reshape(-1, 784)
def get_gcd(i):
if i == 0:
return 0.
elif i == 1:
return 1.
else:
for j, k in enumerate([2, 3, 5, 7]):
if i % k == 0:
return j + 2.
y_train_gcd = np.array([get_gcd(i) for i in y_train])
y_test_gcd = np.array([get_gcd(i) for i in y_test])
y_train = [y_train, y_train_gcd]
y_test = [y_test, y_test_gcd]
inp = tf.keras.Input(shape=(784,))
x = tf.keras.layers.Dense(128, activation='relu')(inp)
output_y = tf.keras.layers.Dense(10, name='y')(x)
output_gcd = tf.keras.layers.Dense(6, name='gcd')(x)
model = tf.keras.Model(inputs=inp, outputs=[output_y, output_gcd])
model.compile(
optimizer='adam',
loss={
'y': tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
'gcd': tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
},
metrics=['accuracy']
)
model.fit(x_train, y_train, epochs=1, batch_size=32)