Create custom loss function which use dictionary gives error

I'm trying to create custom loss function which works as follow:

I have a classification problem (50 classes)
Each class belong to one fam class (3 fam classes)
I have a dictionary which link between class and fam class.
I want to create a model which predict one of the 50 classes.
When calculating the loss, if the model prediction class is wrong it will punish more if the fam-class of the prediction is different from the true fam-class

simple example code:

import tensorflow as tf
import numpy as np
import random


# Generate synthetic dataset
num_samples     = 1000
num_classes     = 50
num_fam_classes = 3

# Create random data
X_train       = np.random.rand(num_samples, 10)
y_train_class = np.random.randint(0, num_classes, size=num_samples)

X_val       = np.random.rand(num_samples, 10)
y_val_class = np.random.randint(0, num_classes, size=num_samples)


# convert to categoy
y_train_one_hot = tf.keras.utils.to_categorical(y_train_class, num_classes=50)
y_val_one_hot   = tf.keras.utils.to_categorical(y_val_class, num_classes=50)


# map between class and fam class
class_to_fam_dict = {}
for i in range(num_classes):
    class_to_fam_dict[i] = random.randint(0, 2)
    

def map_class_to_fam(class_index):
    return tf.cast(class_to_fam_dict[class_index], dtype=tf.int64)


# define loss:

def custom_loss(y_true, y_pred):

    #
    # --- step 1 : loss between classes
    #
    # loss between classes
    ce_loss_class = tf.keras.losses.categorical_crossentropy(y_true, y_pred, from_logits=False)

    #
    # --- step 2 : loss between fam class
    #
    
    # Extract predicted class indices
    predicted_classes        = tf.argmax(y_pred, axis=1)
    predicted_fam_classes    = tf.map_fn(map_class_to_fam, predicted_classes, dtype=tf.int64)
    true_fam_classes         = tf.map_fn(map_class_to_fam, tf.argmax(y_true, axis=1), dtype=tf.int64)
    true_fam_classes_one_hot = tf.one_hot(true_fam_classes, depth=num_fam_classes)

    # Compute penalty term for different family classes
    penalty = tf.where(tf.not_equal(predicted_fam_classes, true_fam_classes),
                       tf.keras.losses.categorical_crossentropy(true_fam_classes_one_hot,
                                                                tf.one_hot(predicted_fam_classes, depth=num_fam_classes),
                                                                from_logits=False),
                       0.0)

    # Sum the penalty along with the class prediction loss    
    total_loss = ce_loss_class + penalty

    return total_loss



# model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(10,)),
    tf.keras.layers.Dense(50, activation='softmax')
])




model.compile(optimizer='adam', loss=custom_loss, metrics=['accuracy'])
model.fit(X_train, y_train_one_hot, epochs=10, batch_size=32, validation_data=[X_val, y_val_one_hot])

Give error:

TypeError: Tensor is unhashable. Instead, use tensor.ref() as the key.

I have tried to add:

tf.compat.v1.disable_v2_behavior()

but it gives different error:

KeyError: <tf.Tensor 'loss/dense_3_loss/map/while/TensorArrayReadV3:0' shape=() dtype=int64>

Versions:

tensorflow==2.10.0
python 3.10.0

How can I fix the code ?

Solution

I think tf.map_fn will not work in graph execution with a "very customized function", also this post suggest that way. If you print class_index in your function map_class_to_fam, you will see that your code fails during model.compile trying to lookup in class_to_fam_dict the following placeholder tensor:

Tensor("custom_loss/map/while/TensorArrayV2Read/TensorListGetItem:0", shape=(), dtype=int64)

An alternative to this (based on this post) is to use tf.lookup. Here is the code:

import tensorflow as tf
import numpy as np
import random

# Generate synthetic dataset
num_samples     = 1000
num_classes     = 50
num_fam_classes = 3

# Create random data
X_train       = np.random.rand(num_samples, 10)
y_train_class = np.random.randint(0, num_classes, size=num_samples)

X_val       = np.random.rand(num_samples, 10)
y_val_class = np.random.randint(0, num_classes, size=num_samples)

# convert to categoy
y_train_one_hot = tf.keras.utils.to_categorical(y_train_class, num_classes=50)
y_val_one_hot   = tf.keras.utils.to_categorical(y_val_class, num_classes=50)

# map between class and fam class
class_to_fam_dict = {}
for i in range(num_classes):
    class_to_fam_dict[i] = random.randint(0, 2)

table = tf.lookup.StaticVocabularyTable(
    tf.lookup.KeyValueTensorInitializer(
        list(class_to_fam_dict.keys()),
        list(class_to_fam_dict.values()),
        key_dtype=tf.int64,
        value_dtype=tf.int64,
    ),
    num_oov_buckets=1,
)

# define loss:
def custom_loss(y_true, y_pred):

    #
    # --- step 1 : loss between classes
    #
    # loss between classes
    ce_loss_class = tf.keras.losses.categorical_crossentropy(y_true, y_pred, from_logits=False)

    #
    # --- step 2 : loss between fam class
    #
    # Extract predicted class indices
    predicted_classes        = tf.argmax(y_pred, axis=1)
    predicted_fam_classes = table.lookup(predicted_classes)
    true_fam_classes = table.lookup(tf.argmax(y_true, axis=1))
    true_fam_classes_one_hot = tf.one_hot(true_fam_classes, depth=num_fam_classes)

    # Compute penalty term for different family classes
    penalty = tf.where(tf.not_equal(predicted_fam_classes, true_fam_classes),
                       tf.keras.losses.categorical_crossentropy(true_fam_classes_one_hot,
                                                                tf.one_hot(predicted_fam_classes, depth=num_fam_classes),
                                                                from_logits=False),
                       0.0)

    # Sum the penalty along with the class prediction loss    
    total_loss = ce_loss_class + penalty

    return total_loss


# model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(10,)),
    tf.keras.layers.Dense(50, activation='softmax')
])

model.compile(optimizer='adam', loss=custom_loss, metrics=['accuracy'])
model.fit(X_train, y_train_one_hot, epochs=10, batch_size=32, validation_data=[X_val, y_val_one_hot])