Search code examples
pythontensorflowneural-network

How to build a multi-output and multi-category classification model in Tensorflow and use Sparse Categorical Crossentropy


I built a multi-output and multi-class classification model, and to save memory, I used Sparse Categorical Cross entropy. But got the error that 'labels.shape' must equal 'logits.shape'. Here is a simple example I built.

import numpy as np
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder

mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train / 255.0
x_test = x_test / 255.0
x_train = x_train.reshape(-1, 784)
x_test = x_test.reshape(-1, 784)

def get_gcd(i):
    if i == 0:
        return 0
    elif i == 1:
        return 1
    else:
        for j, k in enumerate([2, 3, 5, 7]):
            if i % k == 0:
                return j + 2
y_train_gcd = np.array([get_gcd(i) for i in y_train])
y_test_gcd = np.array([get_gcd(i) for i in y_test])

# # There will be no errors when using this code
# enc = OneHotEncoder()
# enc.fit(np.arange(10).reshape(-1, 1))
# y_train = enc.transform(y_train.reshape(-1, 1)).toarray()
# y_test = enc.transform(y_test.reshape(-1, 1)).toarray()
# enc.fit(np.arange(6).reshape(-1, 1))
# y_train_gcd = enc.transform(y_train_gcd.reshape(-1, 1)).toarray()
# y_test_gcd = enc.transform(y_test_gcd.reshape(-1, 1)).toarray()

y_train = [y_train, y_train_gcd]
y_test = [y_test, y_test_gcd]

inp = tf.keras.Input(shape=(784,))
x = tf.keras.layers.Dense(128, activation='relu')(inp)
output_y = tf.keras.layers.Dense(10, name='y')(x)
output_gcd = tf.keras.layers.Dense(6, name='gcd')(x)
model = tf.keras.Model(inputs=inp, outputs=[output_y, output_gcd])
model.compile(
    optimizer='adam',
    # # When using one-hot encoding, use this loss function
    # loss={
    #     'y': tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    #     'gcd': tf.keras.losses.CategoricalCrossentropy(from_logits=True)
    # },
    loss={
        'y': tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        'gcd': tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    },
    metrics={
        'y': 'accuracy',
        'gcd': 'F1Score'
    }
)
model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test)) 

And this is the error I get.

Traceback (most recent call last):
  File "c:\Users\VivoBook\Desktop\aigame\ab.py", line 57, in <module>
    model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test)) 
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "C:\Users\VivoBook\AppData\Local\Temp\__autograph_generated_filei9vwg64q.py", line 15, in tf__train_function
    retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
    ^^^^^
ValueError: in user code:

    File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\engine\training.py", line 1377, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\engine\training.py", line 1360, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\engine\training.py", line 1349, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\engine\training.py", line 1131, in train_step
        return self.compute_metrics(x, y, y_pred, sample_weight)
    File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\engine\training.py", line 1225, in compute_metrics
        self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\engine\compile_utils.py", line 620, in update_state
        metric_obj.update_state(y_t, y_p, sample_weight=mask)
    File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\utils\metrics_utils.py", line 77, in decorated
        result = update_state_fn(*args, **kwargs)
    File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\metrics\base_metric.py", line 140, in update_state_fn
        return ag_update_state(*args, **kwargs)
    File "C:\Users\VivoBook\anaconda3\envs\tensorflow\Lib\site-packages\keras\src\metrics\f_score_metrics.py", line 176, in update_state  **
        y_true = tf.convert_to_tensor(y_true, dtype=self.dtype)

    ValueError: Tensor conversion requested dtype float32 for Tensor with dtype int32: <tf.Tensor 'ExpandDims_3:0' shape=(32, 1) dtype=int32>

I have found that when I One-Hot Encode the tag, no errors will occur, but to save memory I have to use Sparse Categorical Cross entropy.


Solution

  • You're getting this error because your get_gcd function returns integers, and labels must be float. Just put a . in your function and it will return floats.

    def get_gcd(i):
        if i == 0:
            return 0.
        elif i == 1:
            return 1.
        else:
            for j, k in enumerate([2, 3, 5, 7]):
                if i % k == 0:
                    return j + 2.
    

    I wouldn't recommend you use F1Score as a training metric.

    Full reduced working example:

    import numpy as np
    import tensorflow as tf
    
    mnist = tf.keras.datasets.mnist
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train = x_train / 255.0
    x_test = x_test / 255.0
    x_train = x_train.reshape(-1, 784)
    x_test = x_test.reshape(-1, 784)
    
    
    def get_gcd(i):
        if i == 0:
            return 0.
        elif i == 1:
            return 1.
        else:
            for j, k in enumerate([2, 3, 5, 7]):
                if i % k == 0:
                    return j + 2.
    
    
    y_train_gcd = np.array([get_gcd(i) for i in y_train])
    y_test_gcd = np.array([get_gcd(i) for i in y_test])
    
    
    y_train = [y_train, y_train_gcd]
    y_test = [y_test, y_test_gcd]
    
    inp = tf.keras.Input(shape=(784,))
    x = tf.keras.layers.Dense(128, activation='relu')(inp)
    output_y = tf.keras.layers.Dense(10, name='y')(x)
    output_gcd = tf.keras.layers.Dense(6, name='gcd')(x)
    model = tf.keras.Model(inputs=inp, outputs=[output_y, output_gcd])
    
    model.compile(
        optimizer='adam',
        loss={
            'y': tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            'gcd': tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
        },
        metrics=['accuracy']
    )
    model.fit(x_train, y_train, epochs=1, batch_size=32)