I am trying to create a model that returns 2 outputs: 1st is the predictions and 2nd is attention map. I am using CIFAR-10 dataset for this.
The problem is the fit method dosen't accept a model with 2 output values in the default loss function so I have to create a custom loss function. I am not able to figure out how to create the custom loss function. I have tried the following for my loss function:
def teacher_model_generator(input_shape, num_classes):
inputs = Input(shape=input_shape)
x = Conv2D(64, (3, 3), padding='same', activation='relu')(inputs)
cbam_output = cbam(x)
print(cbam_output.shape)
x = GlobalAveragePooling2D()(cbam_output)
x = Dense(num_classes, activation='softmax', name='out_predictions')(x)
print(x.shape)
return tf.keras.Model(inputs=inputs, outputs=[x, cbam_output], name='teacher_model')
input_shape = (32, 32, 3)
num_classes = 10
teacher_model = teacher_model_generator(input_shape, num_classes)
teacher_model.compile(
loss = {
"out_predictions": tf.keras.losses.CategoricalCrossentropy(),
},
metrics = 'acc',
optimizer = keras.optimizers.Adam(learning_rate=0.001)
)
teacher_model.fit(xtrain, y_out_a, epochs=3)
My teacher model's architecture is given below:-
Model: "teacher_model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 32, 32, 3)] 0 []
conv2d (Conv2D) (None, 32, 32, 64) 1792 ['input_1[0][0]']
global_average_pooling2d ( (None, 64) 0 ['conv2d[0][0]']
GlobalAveragePooling2D)
global_max_pooling2d (Glob (None, 64) 0 ['conv2d[0][0]']
alMaxPooling2D)
dense (Dense) (None, 8) 512 ['global_average_pooling2d[0][
0]',
'global_max_pooling2d[0][0]']
dense_1 (Dense) (None, 64) 512 ['dense[0][0]',
'dense[1][0]']
tf.__operators__.add (TFOp (None, 64) 0 ['dense_1[0][0]',
Lambda) 'dense_1[1][0]']
activation (Activation) (None, 64) 0 ['tf.__operators__.add[0][0]']
multiply (Multiply) (None, 32, 32, 64) 0 ['conv2d[0][0]',
'activation[0][0]']
tf.math.reduce_mean (TFOpL (None, 32, 64) 0 ['multiply[0][0]']
ambda)
tf.math.reduce_mean_1 (TFO (None, 32, 64) 0 ['multiply[0][0]']
pLambda)
tf.expand_dims (TFOpLambda (None, 1, 32, 64) 0 ['tf.math.reduce_mean[0][0]']
)
tf.expand_dims_1 (TFOpLamb (None, 1, 32, 64) 0 ['tf.math.reduce_mean_1[0][0]'
da) ]
concatenate (Concatenate) (None, 1, 32, 128) 0 ['tf.expand_dims[0][0]',
'tf.expand_dims_1[0][0]']
conv2d_1 (Conv2D) (None, 1, 32, 1) 6273 ['concatenate[0][0]']
multiply_1 (Multiply) (None, 32, 32, 64) 0 ['multiply[0][0]',
'conv2d_1[0][0]']
tf.__operators__.add_1 (TF (None, 32, 32, 64) 0 ['conv2d[0][0]',
OpLambda) 'multiply_1[0][0]']
global_average_pooling2d_1 (None, 64) 0 ['tf.__operators__.add_1[0][0]
(GlobalAveragePooling2D) ']
out_predictions (Dense) (None, 10) 650 ['global_average_pooling2d_1[0
][0]']
==================================================================================================
Total params: 9739 (38.04 KB)
Trainable params: 9739 (38.04 KB)
Non-trainable params: 0 (0.00 Byte)
__________________________________________________________________________________________________
I am getting the following error:
(None, 32, 32, 64) --> cbam_output.shape -> output 2 of my model
(None, 10) --> x.shape -> output 1 of my model
Epoch 1/3
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[11], line 65
57 teacher_model = teacher_model_generator(input_shape, num_classes)
58 teacher_model.compile(
59 loss = {
60 "out_predictions": tf.keras.losses.CategoricalCrossentropy(),
(...)
63 optimizer = keras.optimizers.Adam(learning_rate=0.001)
64 )
---> 65 teacher_model.fit(xtrain, y_out_a, epochs=3)
File ~\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.__traceback__)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File ~\AppData\Local\Temp\__autograph_generated_file5swoecr9.py:15, in outer_factory.<locals>.inner_factory.<locals>.tf__train_function(iterator)
13 try:
14 do_return = True
---> 15 retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
16 except:
17 do_return = False
ValueError: in user code:
File "C:\Users\21112\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 1401, in train_function *
return step_function(self, iterator)
File "C:\Users\21112\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 1384, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "C:\Users\21112\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 1373, in run_step **
outputs = model.train_step(data)
File "C:\Users\21112\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 1155, in train_step
return self.compute_metrics(x, y, y_pred, sample_weight)
File "C:\Users\21112\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 1249, in compute_metrics
self.compiled_metrics.update_state(y, y_pred, sample_weight)
File "C:\Users\21112\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\compile_utils.py", line 620, in update_state
metric_obj.update_state(y_t, y_p, sample_weight=mask)
File "C:\Users\21112\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\metrics_utils.py", line 77, in decorated
result = update_state_fn(*args, **kwargs)
File "C:\Users\21112\AppData\Roaming\Python\Python311\site-packages\keras\src\metrics\base_metric.py", line 140, in update_state_fn
return ag_update_state(*args, **kwargs)
File "C:\Users\21112\AppData\Roaming\Python\Python311\site-packages\keras\src\metrics\base_metric.py", line 723, in update_state **
matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
File "C:\Users\21112\AppData\Roaming\Python\Python311\site-packages\keras\src\metrics\accuracy_metrics.py", line 459, in sparse_categorical_accuracy
matches = metrics_utils.sparse_categorical_matches(y_true, y_pred)
File "C:\Users\21112\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\metrics_utils.py", line 969, in sparse_categorical_matches
matches = tf.cast(tf.equal(y_true, y_pred), backend.floatx())
ValueError: Dimensions must be equal, but are 10 and 32 for '{{node Equal_1}} = Equal[T=DT_FLOAT, incompatible_shape_error=true](IteratorGetNext:1, Cast_4)' with input shapes: [?,10], [?,32,32].
Generally, from a multi-output model, you are trying to compute loss on specific output only. Here is one way to do this: you can specify the target layer with desired loss method in the compile method. For example:
loss = {
"out_a": keras.losses.CategoricalCrossentropy(),
"out_b": keras.losses.SparseCategoricalCrossentropy(),
"out_c": keras.losses.MeanSquaredError()
}
Here is a toy example.
(xtrain, ytrain), (xtest, _) = keras.datasets.mnist.load_data()
xtrain = xtrain[:10000]
ytrain = ytrain[:10000]
y_out_a = keras.utils.to_categorical(ytrain, num_classes=10)
y_out_b = keras.utils.to_categorical((ytrain % 2 == 0).astype(int), num_classes=2)
def teacher_model_generator(input_shape, num_classes):
inputs = Input(shape=input_shape)
x = Conv2D(64, (3, 3), padding='same', activation='relu')(inputs)
x = GlobalAveragePooling2D()(x)
out_a = keras.layers.Dense(10, activation='softmax', name='out_a')(x)
out_b = keras.layers.Dense(2, activation='softmax', name='out_b')(x)
return keras.Model(
inputs=inputs, outputs=[out_a, out_b], name='teacher_model'
)
input_shape = (28, 28, 1)
num_classes = 10
teacher_model = teacher_model_generator(input_shape, num_classes)
teacher_model.compile(
loss = {
"out_a": tf.keras.losses.CategoricalCrossentropy(),
},
metrics = 'acc',
optimizer = keras.optimizers.Adam(learning_rate=0.001)
)
teacher_model.fit(xtrain, [y_out_a, y_out_b,], epochs=3)
Epoch 1/3
loss: 2.2846 - out_a_loss: 2.2846 - out_a_acc: 0.2191 - out_b_acc: 0.4930
Epoch 2/3
loss: 1.7500 - out_a_loss: 1.7500 - out_a_acc: 0.3483 - out_b_acc: 0.4930
Epoch 3/3
loss: 1.6229 - out_a_loss: 1.6229 - out_a_acc: 0.3922 - out_b_acc: 0.4930
This is using keras's high level API but if you want to go with a bit mid level API you probably can get more control on this.
An another way to achieve this is to first train the model as usual with single input and single ouput and after finishing the training, create a feature extractor from it.
def teacher_model_generator(input_shape, num_classes):
inputs = Input(shape=input_shape)
x = Conv2D(64, (3, 3), padding='same', activation='relu')(inputs)
x = GlobalAveragePooling2D()(x)
out_a = keras.layers.Dense(10, activation='softmax', name='out_a')(x)
out_b = keras.layers.Dense(2, activation='softmax', name='out_b')(x)
return keras.Model(
inputs=inputs, outputs=out_a, name='teacher_model'
)
input_shape = (28, 28, 1)
num_classes = 10
teacher_model = teacher_model_generator(input_shape, num_classes)
teacher_model.compile(
loss = keras.losses.CategoricalCrossentropy(),
metrics = 'acc',
optimizer = keras.optimizers.Adam(learning_rate=0.001)
)
teacher_model.fit(xtrain, y_out_a, epochs=3)
After finishing the training, first inspect the layers as follows
for layer in teacher_model.layers:
print(layer.name, layer.output_shape)
input_7 [(None, 28, 28, 1)]
conv2d_6 (None, 28, 28, 64)
global_average_pooling2d_6 (None, 64)
out_a (None, 10)
Here the conv2d_6
is the target layers. Now, build a feature extractor.
feature_extractor = keras.Model(
teacher_model.inputs,
[
teacher_model.get_layer(name='conv2d_6').output,
teacher_model.output
]
)
Now, you can use it as a multi-output for inference.
conv2D_output, pred_output = feature_extractor(xtrain[0][None, ...])
conv2D_output.shape, pred_output.shape
(TensorShape([1, 28, 28, 64]), TensorShape([1, 10]))
Note, in your case I see you're using cbam
module. Specifically, you need to return the last layer name or module name in the teacher_model.get_layer(name=[HERE]).output
method