I have two models, model_A
and model_B
. I want to do element wise addition with those two models and use the result as input to model_C
. So, I have this code:
from tensorflow.keras.layers import Conv2D, BatchNormalization, \
Activation, Input, Add
from tensorflow.keras.models import Model
import numpy as np
import tensorflow as tf
def model_A(inputs):
x1 = Conv2D(32, 3, padding='same')(inputs)
x1 = BatchNormalization()(x1)
x1 = Activation('relu')(x1)
x2 = Conv2D(32, 3, padding='same')(x1)
model = Model(inputs=inputs, outputs=x2, name='model_A')
return model
def model_B(inputs):
f1 = Conv2D(32, 3, padding='same')(inputs)
f1 = BatchNormalization()(f1)
f1 = Activation('relu')(f1)
f2 = Conv2D(32, 3, padding='same')(f1)
model = Model(inputs=inputs, outputs=f2, name='model_B')
return model
def model_C(inputs):
f1 = Conv2D(32, 3, padding='same')(inputs)
f1 = BatchNormalization()(f1)
f1 = Activation('relu')(f1)
f2 = Conv2D(16, 3, padding='same')(f1)
f2 = BatchNormalization()(f2)
f2 = Activation('relu')(f2)
f3 = Conv2D(1, 3, padding='same')(f2)
model = Model(inputs=inputs, outputs=f3, name='model_C')
return model
def model_final(height, width, channels):
inputs = Input((height, width, channels))
modelA = model_A(inputs)
modelB = model_B(inputs)
addition = Add()([modelA.output, modelB.output])
modelC = model_C(addition)
return Model(inputs, modelC.output)
a = np.random.uniform(0, 1, (100, 32, 32, 3))
b = np.random.uniform(0, 1, (100, 32, 32, 3))
c = np.random.uniform(0, 1, (100, 32, 32, 3))
model = model_final(32, 32, 3)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer,
loss='mae',
metrics=['mae'])
If I run the code, I receive Graph disconnected
at Model(inputs=inputs, outputs=f3, name='model_C')
. So, in order to solve this problem, I am doing:
def model_final(height, width, channels):
inputs = Input((height, width, channels))
modelA = model_A(inputs)
modelB = model_B(inputs)
addition = Add()([modelA.output, modelB.output])
inputs_C = Input((height, width, 32))
modelC = model_C(inputs_C)
modelC = modelC(addition)
model = Model(inputs, modelC)
return model
which compiles fine. But, I am not sure if this is right. If the logic of doing this is correct!
In your solution, you are creating a new input to C, while you should be using models A and B outputs for that.
I'd suggest either you create fully separate models, then you run tensors along them and create an overall model; or you don't create submodels and work directly with the tensors instead.
This is better if you really want submodels for training them separately, dettaching them and using elsewhere, etc:
def model_A(height, width, channels):
inputs = Input((height, width, channels))
x1 = Conv2D(32, 3, padding='same')(inputs)
x1 = BatchNormalization()(x1)
x1 = Activation('relu')(x1)
x2 = Conv2D(32, 3, padding='same')(x1)
model = Model(inputs=inputs, outputs=x2, name='model_A')
return model
def model_B(height, width, channels):
inputs = Input((height, width, channels))
f1 = Conv2D(32, 3, padding='same')(inputs)
f1 = BatchNormalization()(f1)
f1 = Activation('relu')(f1)
f2 = Conv2D(32, 3, padding='same')(f1)
model = Model(inputs=inputs, outputs=f2, name='model_B')
return model
def model_C(height, width, channels):
inputs = Input((height, width, channels))
f1 = Conv2D(32, 3, padding='same')(inputs)
f1 = BatchNormalization()(f1)
f1 = Activation('relu')(f1)
f2 = Conv2D(16, 3, padding='same')(f1)
f2 = BatchNormalization()(f2)
f2 = Activation('relu')(f2)
f3 = Conv2D(1, 3, padding='same')(f2)
model = Model(inputs=inputs, outputs=f3, name='model_C')
return model
Then you "run a tensor" through the models:
def model_final(height, width, channels):
inputs = Input((height, width, channels))
modelA = model_A(height, width, channels)
modelB = model_B(height, width, channels)
modelC = model_C(height, width, channels)
outputsA = modelA(inputs)
outputsB = modelB(inputs)
addition = Add()([outputsA, outputsB])
outputsC= modelC(addition)
model = Model(inputs, outputsC)
return model
This option can be easier to use when saving/loading and other things, but you wont be able to have separate models for other purposes.
Here you only return the outputs, not new models, and then make a single model.
def model_A(inputs):
x1 = Conv2D(32, 3, padding='same')(inputs)
x1 = BatchNormalization()(x1)
x1 = Activation('relu')(x1)
x2 = Conv2D(32, 3, padding='same')(x1)
return x2
def model_B(inputs):
f1 = Conv2D(32, 3, padding='same')(inputs)
f1 = BatchNormalization()(f1)
f1 = Activation('relu')(f1)
f2 = Conv2D(32, 3, padding='same')(f1)
return f2
def model_C(inputs):
f1 = Conv2D(32, 3, padding='same')(inputs)
f1 = BatchNormalization()(f1)
f1 = Activation('relu')(f1)
f2 = Conv2D(16, 3, padding='same')(f1)
f2 = BatchNormalization()(f2)
f2 = Activation('relu')(f2)
f3 = Conv2D(1, 3, padding='same')(f2)
return f3
The final model then uses the tensors to be built:
def model_final(height, width, channels):
inputs = Input((height, width, channels))
x2 = model_A(inputs)
f2 = model_B(inputs)
addition = Add()([x2, f2])
outputs = model_C(addition)
return Model(inputs, outputs)