Search code examples
tensorflowmachine-learningkerasdeep-learning

addition of 2 models as input to another (graph disconnected)


I have two models, model_A and model_B. I want to do element wise addition with those two models and use the result as input to model_C. So, I have this code:

from tensorflow.keras.layers import Conv2D, BatchNormalization, \
    Activation, Input, Add
from tensorflow.keras.models import Model
import numpy as np
import tensorflow as tf

def model_A(inputs):
    x1 = Conv2D(32, 3, padding='same')(inputs)
    x1 = BatchNormalization()(x1)
    x1 = Activation('relu')(x1)
    
    x2 = Conv2D(32, 3, padding='same')(x1)
    model = Model(inputs=inputs, outputs=x2, name='model_A')
    return model
    

def model_B(inputs):
    f1 = Conv2D(32, 3, padding='same')(inputs)
    f1 = BatchNormalization()(f1)
    f1 = Activation('relu')(f1)
    
    f2 = Conv2D(32, 3, padding='same')(f1)

    model = Model(inputs=inputs, outputs=f2, name='model_B')
    return model

def model_C(inputs):
    f1 = Conv2D(32, 3, padding='same')(inputs)
    f1 = BatchNormalization()(f1)
    f1 = Activation('relu')(f1)
    
    f2 = Conv2D(16, 3, padding='same')(f1)
    f2 = BatchNormalization()(f2)
    f2 = Activation('relu')(f2)

    f3 = Conv2D(1, 3,  padding='same')(f2)

    model = Model(inputs=inputs, outputs=f3, name='model_C')
    return model
    
def model_final(height, width, channels):
    inputs = Input((height, width, channels))
    
    modelA = model_A(inputs)
    modelB = model_B(inputs)
    
    addition = Add()([modelA.output, modelB.output])
    
    modelC = model_C(addition)
    
    return Model(inputs, modelC.output)
    
a = np.random.uniform(0, 1, (100, 32, 32, 3))
b = np.random.uniform(0, 1, (100, 32, 32, 3))
c = np.random.uniform(0, 1, (100, 32, 32, 3))
    
model = model_final(32, 32, 3)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer,
              loss='mae',
              metrics=['mae'])
    

If I run the code, I receive Graph disconnected at Model(inputs=inputs, outputs=f3, name='model_C'). So, in order to solve this problem, I am doing:

def model_final(height, width, channels):
    inputs = Input((height, width, channels))
    
    modelA = model_A(inputs)
    modelB = model_B(inputs)
    
    addition = Add()([modelA.output, modelB.output])
    
    inputs_C = Input((height, width, 32))
    modelC = model_C(inputs_C)
    modelC = modelC(addition)
    
    model = Model(inputs, modelC)
    return model

which compiles fine. But, I am not sure if this is right. If the logic of doing this is correct!


Solution

  • In your solution, you are creating a new input to C, while you should be using models A and B outputs for that.

    I'd suggest either you create fully separate models, then you run tensors along them and create an overall model; or you don't create submodels and work directly with the tensors instead.


    Using submodels

    This is better if you really want submodels for training them separately, dettaching them and using elsewhere, etc:

    def model_A(height, width, channels):
        
        inputs = Input((height, width, channels))
    
        x1 = Conv2D(32, 3, padding='same')(inputs)
        x1 = BatchNormalization()(x1)
        x1 = Activation('relu')(x1)
        
        x2 = Conv2D(32, 3, padding='same')(x1)
        model = Model(inputs=inputs, outputs=x2, name='model_A')
        return model
    
    
    def model_B(height, width, channels):
        inputs = Input((height, width, channels)) 
    
        f1 = Conv2D(32, 3, padding='same')(inputs)
        f1 = BatchNormalization()(f1)
        f1 = Activation('relu')(f1)
        
        f2 = Conv2D(32, 3, padding='same')(f1)
    
        model = Model(inputs=inputs, outputs=f2, name='model_B')
        return model
    
    def model_C(height, width, channels):
        inputs = Input((height, width, channels))
    
        f1 = Conv2D(32, 3, padding='same')(inputs)
        f1 = BatchNormalization()(f1)
        f1 = Activation('relu')(f1)
        
        f2 = Conv2D(16, 3, padding='same')(f1)
        f2 = BatchNormalization()(f2)
        f2 = Activation('relu')(f2)
    
        f3 = Conv2D(1, 3,  padding='same')(f2)
    
        model = Model(inputs=inputs, outputs=f3, name='model_C')
        return model
    

    Then you "run a tensor" through the models:

    def model_final(height, width, channels):
        inputs = Input((height, width, channels))
        
        modelA = model_A(height, width, channels)
        modelB = model_B(height, width, channels)
        modelC = model_C(height, width, channels)
    
        outputsA = modelA(inputs)
        outputsB = modelB(inputs)
        
        addition = Add()([outputsA, outputsB])
    
        outputsC= modelC(addition)
        
        model = Model(inputs, outputsC)
        return model
    

    Making a single model

    This option can be easier to use when saving/loading and other things, but you wont be able to have separate models for other purposes.

    Here you only return the outputs, not new models, and then make a single model.

    def model_A(inputs):
        x1 = Conv2D(32, 3, padding='same')(inputs)
        x1 = BatchNormalization()(x1)
        x1 = Activation('relu')(x1)
        
        x2 = Conv2D(32, 3, padding='same')(x1)
        
        return x2
    
    def model_B(inputs):
        f1 = Conv2D(32, 3, padding='same')(inputs)
        f1 = BatchNormalization()(f1)
        f1 = Activation('relu')(f1)
        
        f2 = Conv2D(32, 3, padding='same')(f1)
    
        return f2
    
    def model_C(inputs):
        f1 = Conv2D(32, 3, padding='same')(inputs)
        f1 = BatchNormalization()(f1)
        f1 = Activation('relu')(f1)
        
        f2 = Conv2D(16, 3, padding='same')(f1)
        f2 = BatchNormalization()(f2)
        f2 = Activation('relu')(f2)
    
        f3 = Conv2D(1, 3,  padding='same')(f2)
    
        return f3
    

    The final model then uses the tensors to be built:

    def model_final(height, width, channels):
        inputs = Input((height, width, channels))
        
        x2 = model_A(inputs)
        f2 = model_B(inputs)
        
        addition = Add()([x2, f2])
        
        outputs = model_C(addition)
        
        return Model(inputs, outputs)