import tensorflow as tf
from ..models.encoder import encoder_build
from ..models.decoder import decoder_build
def compute_attention_maps(inputs,name,upsample=False):
attMap = tf.reduce_sum(tf.square(inputs),axis=-1,keepdims=True,name= str(name)+"reducSum")
if upsample:
attMap = tf.keras.layers.UpSampling2D(size=(2, 2),
interpolation='bilinear',
name = str(name)+"bilinear")(attMap)
attMap = tf.squeeze(attMap,axis=-1,name = str(name)+"squeeze")
attMap = tf.reshape(attMap,
(tf.shape(attMap)[0],tf.shape(attMap)[1]*tf.shape(attMap)[2]),
name = str(name)+"reshape")
attMap = tf.nn.softmax(attMap,
axis=-1,
name = str(name)+"spatialSoftmax")
return attMap
def compute_mse(x,y,name):
diff = tf.math.squared_difference(x,y,name = str(name)+"squError")
diff = tf.reduce_mean(diff,axis=0, name = str(name)+"mean")
diff = tf.reduce_sum(diff, name = str(name)+"sum")
return diff
def compute_distillation(attention_inputs):
inp1,inp2,inp3,inp4 = attention_inputs
attMap1 = compute_attention_maps(inp1,"attmap1_")
attMap2_upsample = compute_attention_maps(inp2,"attmap2UP_",upsample=True)
attMap2 = compute_attention_maps(inp2,"attmap2_")
attMap3_upsample = compute_attention_maps(inp3,"attmap3UP_",upsample=True)
attMap3 = compute_attention_maps(inp3,"attmap3_")
attMap4 = compute_attention_maps(inp4,"attmap4_")
distillation1 = compute_mse(attMap1,attMap2_upsample,"distil1_")
distillation2 = compute_mse(attMap2,attMap3_upsample,"distil2_")
distillation3 = compute_mse(attMap3,attMap4,"distil3_")
return tf.math.add_n([distillation1,distillation2,distillation3], name="distill_loss")
if __name__ == '__main__':
inputs = tf.keras.layers.Input(shape=(None, None, 3), name='image')
encoderTuple = encoder_build(inputs) # import from encoder.py file
attention_inputs = encoderTuple[1]
outputs = decoder_build(encoderTuple) # import from decoder.py file
model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
model.add_loss(compute_distillation(attention_inputs))
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.00001, clipnorm=0.001),
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(x = train_generator,
epochs=epochs,
verbose=1,
callbacks=callbacks,
validation_data=validation_generator,
shuffle=True)
i have created keras segmentation model for lane detection (https://arxiv.org/pdf/1908.00821.pdf). I'm able to compile, start training and save models for each epoch without any errors. But if i add my custom loss to model model.add_loss(compute_distillation(attention_inputs)) model gets trained for 1 epoch, after that model is not saving and displaying below error. How to resolve this error?
374/375 [============================>.] - ETA: 0s - loss: 4.4717 - acc: 0.9781Epoch 1/50
78/78[============================>.] - ETA: 37:38 - val_loss: 4.5855 - val_acc: 0.9758
Epoch 00001: saving model to /workspace/work/enet_sad_naiveresize/snapshot/enetNRSAD_Tusimple_L_4.4718_VL_4.5855.h5
Traceback (most recent call last):
File "/workspace/work/enet_sad_naiveresize/bin/train.py", line 82, in <module>
shuffle=True)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py", line 727, in fit
use_multiprocessing=use_multiprocessing)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_generator.py", line 603, in fit
steps_name='steps_per_epoch')
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_generator.py", line 332, in model_iteration
callbacks.on_epoch_end(epoch, epoch_logs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/callbacks.py", line 299, in on_epoch_end
callback.on_epoch_end(epoch, logs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/callbacks.py", line 968, in on_epoch_end
self._save_model(epoch=epoch, logs=logs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/callbacks.py", line 1015, in _save_model
self.model.save(filepath, overwrite=True)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/network.py", line 1171, in save
signatures)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/save.py", line 109, in save_model
model, filepath, overwrite, include_optimizer)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/hdf5_format.py", line 103, in save_model_to_hdf5
save_weights_to_hdf5_group(model_weights_group, model_layers)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/hdf5_format.py", line 619, in save_weights_to_hdf5_group
g = f.create_group(layer.name)
File "/usr/local/lib/python3.6/dist-packages/h5py/_hl/group.py", line 68, in create_group
gid = h5g.create(self.id, name, lcpl=lcpl, gcpl=gcpl)
File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
File "h5py/h5g.pyx", line 161, in h5py.h5g.create
ValueError: Unable to create group (name already exists)
The issue is because you are stacking layers (and naming them wrongly) in compute_distillation function, by calling other functions such as compute_attention_maps and compute_mse. You would have got a similar layer if you hadn't named also and the fact that the error persists even after you named them is because the h5 models expects names in a certain format as explained here https://github.com/keras-team/keras/issues/12195. A good solution would be to use keras lambda layers in compute_distilation function to create attMap1, attMap2, etc. or define your own custom AttentionMap layer as shown below.
class AttentionMaps(tf.keras.layers.Layer):
def __init__(self, upsample=False):
super(AttentionMaps, self).__init__()
self.upsample = upsample
def call(self, inputs):
attMap = tf.reduce_sum(
tf.square(inputs),
axis=-1,
keepdims=True
)
if self.upsample:
attMap = tf.keras.layers.UpSampling2D(
size=(2, 2),
interpolation='bilinear'
)(attMap)
attMap = tf.squeeze(attMap,axis=-1)
attMap = tf.reshape(
attMap,
(tf.shape(attMap)[0],tf.shape(attMap)[1]*tf.shape(attMap)[2]))
attMap = tf.nn.softmax(attMap,
axis=-1,)
return attMap
This custom layer can then be added to your model as per the example below. The names ofthe layers are no longer required, so I removed them.
def compute_distillation(attention_inputs):
inp1,inp2,inp3,inp4 = attention_inputs
attention_layer_1 = AttentionMaps()
attMap1 = attention_layer_1(inp1)
attention_layer_2 = AttentionMaps(upsample=True)
attMap2_upsample = attention_layer_2(inp2)
attention_layer_3 = AttentionMaps()
attMap2 = attention_layer_3(inp2)
attention_layer_4 = AttentionMaps(upsample=True)
attMap3_upsample = attention_layer_4(inp3)
attention_layer_5 = AttentionMaps()
attMap3 = attention_layer_5(inp3)
attention_layer_6 = AttentionMaps(upsample=True)
attMap4_upsample = attention_layer_6(inp4)
distillation1 = compute_mse(attMap1,attMap2_upsample)
distillation2 = compute_mse(attMap2,attMap3_upsample)
distillation3 = compute_mse(attMap3,attMap4_upsample)
return tf.math.add_n([distillation1,distillation2,distillation3], name="distill_loss")