tensorflow image-segmentation tf.keras h5py attention-model

Unable to create group (name already exists)

import tensorflow as tf
from ..models.encoder import encoder_build
from ..models.decoder import decoder_build

def compute_attention_maps(inputs,name,upsample=False):

    attMap = tf.reduce_sum(tf.square(inputs),axis=-1,keepdims=True,name= str(name)+"reducSum") 
    if upsample:
        attMap = tf.keras.layers.UpSampling2D(size=(2, 2), 
                                              interpolation='bilinear',
                                              name = str(name)+"bilinear")(attMap)
    attMap = tf.squeeze(attMap,axis=-1,name = str(name)+"squeeze")
    attMap = tf.reshape(attMap,
                        (tf.shape(attMap)[0],tf.shape(attMap)[1]*tf.shape(attMap)[2]),
                        name = str(name)+"reshape")
    attMap = tf.nn.softmax(attMap, 
                           axis=-1,
                           name = str(name)+"spatialSoftmax")
    return attMap

def compute_mse(x,y,name):

    diff = tf.math.squared_difference(x,y,name = str(name)+"squError")
    diff = tf.reduce_mean(diff,axis=0, name = str(name)+"mean")
    diff = tf.reduce_sum(diff, name = str(name)+"sum")
    return diff

def compute_distillation(attention_inputs):
    inp1,inp2,inp3,inp4 = attention_inputs 

    attMap1          = compute_attention_maps(inp1,"attmap1_")
    attMap2_upsample = compute_attention_maps(inp2,"attmap2UP_",upsample=True)
    attMap2          = compute_attention_maps(inp2,"attmap2_")
    attMap3_upsample = compute_attention_maps(inp3,"attmap3UP_",upsample=True)
    attMap3          = compute_attention_maps(inp3,"attmap3_")
    attMap4          = compute_attention_maps(inp4,"attmap4_")

    distillation1 = compute_mse(attMap1,attMap2_upsample,"distil1_")
    distillation2 = compute_mse(attMap2,attMap3_upsample,"distil2_")
    distillation3 = compute_mse(attMap3,attMap4,"distil3_")
    return tf.math.add_n([distillation1,distillation2,distillation3], name="distill_loss")

if __name__ == '__main__':
    inputs = tf.keras.layers.Input(shape=(None, None, 3), name='image')
    encoderTuple = encoder_build(inputs) # import from encoder.py file
    attention_inputs = encoderTuple[1]
    outputs = decoder_build(encoderTuple) # import from decoder.py file
    model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
    model.add_loss(compute_distillation(attention_inputs))
    model.summary()
    model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.00001, clipnorm=0.001), 
                     loss='binary_crossentropy',
                     metrics=['accuracy'])
    model.fit(x = train_generator,
          epochs=epochs, 
          verbose=1, 
          callbacks=callbacks,
          validation_data=validation_generator, 
          shuffle=True)

i have created keras segmentation model for lane detection (https://arxiv.org/pdf/1908.00821.pdf). I'm able to compile, start training and save models for each epoch without any errors. But if i add my custom loss to model model.add_loss(compute_distillation(attention_inputs)) model gets trained for 1 epoch, after that model is not saving and displaying below error. How to resolve this error?

374/375 [============================>.] - ETA: 0s - loss: 4.4717 - acc: 0.9781Epoch 1/50
 78/78[============================>.] - ETA: 37:38 - val_loss: 4.5855 - val_acc: 0.9758
Epoch 00001: saving model to /workspace/work/enet_sad_naiveresize/snapshot/enetNRSAD_Tusimple_L_4.4718_VL_4.5855.h5
Traceback (most recent call last):
  File "/workspace/work/enet_sad_naiveresize/bin/train.py", line 82, in <module>
    shuffle=True)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py", line 727, in fit
    use_multiprocessing=use_multiprocessing)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_generator.py", line 603, in fit
    steps_name='steps_per_epoch')
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_generator.py", line 332, in model_iteration
    callbacks.on_epoch_end(epoch, epoch_logs)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/callbacks.py", line 299, in on_epoch_end
    callback.on_epoch_end(epoch, logs)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/callbacks.py", line 968, in on_epoch_end
    self._save_model(epoch=epoch, logs=logs)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/callbacks.py", line 1015, in _save_model
    self.model.save(filepath, overwrite=True)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/network.py", line 1171, in save
    signatures)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/save.py", line 109, in save_model
    model, filepath, overwrite, include_optimizer)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/hdf5_format.py", line 103, in save_model_to_hdf5
    save_weights_to_hdf5_group(model_weights_group, model_layers)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/hdf5_format.py", line 619, in save_weights_to_hdf5_group
    g = f.create_group(layer.name)
  File "/usr/local/lib/python3.6/dist-packages/h5py/_hl/group.py", line 68, in create_group
    gid = h5g.create(self.id, name, lcpl=lcpl, gcpl=gcpl)
  File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
  File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
  File "h5py/h5g.pyx", line 161, in h5py.h5g.create
ValueError: Unable to create group (name already exists)

Solution

The issue is because you are stacking layers (and naming them wrongly) in compute_distillation function, by calling other functions such as compute_attention_maps and compute_mse. You would have got a similar layer if you hadn't named also and the fact that the error persists even after you named them is because the h5 models expects names in a certain format as explained here https://github.com/keras-team/keras/issues/12195. A good solution would be to use keras lambda layers in compute_distilation function to create attMap1, attMap2, etc. or define your own custom AttentionMap layer as shown below.

class AttentionMaps(tf.keras.layers.Layer):
  def __init__(self, upsample=False):
    super(AttentionMaps, self).__init__()
    self.upsample = upsample

  def call(self, inputs):
    attMap = tf.reduce_sum(
        tf.square(inputs),
        axis=-1,
        keepdims=True
    ) 
    if self.upsample:
        attMap = tf.keras.layers.UpSampling2D(
            size=(2, 2), 
            interpolation='bilinear'
        )(attMap)
    attMap = tf.squeeze(attMap,axis=-1)
    attMap = tf.reshape(
        attMap,
        (tf.shape(attMap)[0],tf.shape(attMap)[1]*tf.shape(attMap)[2]))
    attMap = tf.nn.softmax(attMap, 
                            axis=-1,)
                            
    return attMap

This custom layer can then be added to your model as per the example below. The names ofthe layers are no longer required, so I removed them.

def compute_distillation(attention_inputs):

    inp1,inp2,inp3,inp4 = attention_inputs
    attention_layer_1 = AttentionMaps()
    attMap1          = attention_layer_1(inp1)
    attention_layer_2 = AttentionMaps(upsample=True)
    attMap2_upsample = attention_layer_2(inp2)
    attention_layer_3 = AttentionMaps()
    attMap2          = attention_layer_3(inp2)
    attention_layer_4 = AttentionMaps(upsample=True)
    attMap3_upsample = attention_layer_4(inp3)
    attention_layer_5 = AttentionMaps()
    attMap3          = attention_layer_5(inp3)
    attention_layer_6 = AttentionMaps(upsample=True)
    attMap4_upsample = attention_layer_6(inp4)
    
    distillation1 = compute_mse(attMap1,attMap2_upsample)
    distillation2 = compute_mse(attMap2,attMap3_upsample)
    distillation3 = compute_mse(attMap3,attMap4_upsample)
    
    return tf.math.add_n([distillation1,distillation2,distillation3], name="distill_loss")