python tensorflow keras deep-learning neural-network

Multi-task learning with ANN?

I am trying to implement a simple multi-task learning with the following network:

y_train_target1 = Y_train.iloc[:, 0]
y_test_target1 = Y_test.iloc[:, 0]
y_train_target2 = Y_train.iloc[:, 1]
y_test_target2 = Y_test.iloc[:, 1]

input_dim_train=X_train.shape[1]
#shape of X_train is: (30000,126)
inputs = Input(shape=X_train.shape[1], name='main_input')

main_model = Sequential()
main_model.add(Dense(200, input_dim=input_dim_train, activation='relu'))
main_model.add(Dense(50, input_dim=input_dim_train, activation='relu'))
main_model.add(BatchNormalization())
main_model.add(Dropout(0.4))
main_model.add(Dense(1, activation='softmax'))

model_target1 = Sequential()
model_target2 = Sequential()

model_target1.add(main_model)
model_target2.add(main_model)

model_target1.add(Dense(1, activation='softmax', name='target1_output'))
model_target2.add(Dense(1, activation='softmax', name='target2_output'))

model_share = Model(inputs = inputs,outputs = [model_target1, model_target2])
model_share.summary()

But I face the following error, when I run Model(inputs = inputs,outputs = [model_target1, model_target2]) line:

ValueError: Output tensors of a Functional model must be the output of a TensorFlow `Layer` (thus holding past layer metadata). Found: <keras.engine.sequential.Sequential object at 0x00000214980FE5B0>

Any idea to handle this issue?

Solution

If the objective is to share the layers between the two outputs then you can write your code as shown below, see also this answer.

import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model
np.random.seed(0)
tf.random.set_seed(0)

# generate the features
x = np.random.normal(0, 1, (100, 10))

# generate the targets
y1 = np.mean(x, axis=1)
y2 = np.median(x, axis=1)

# define the shared layers
d1 = Dense(200, activation='relu')
d2 = Dense(50, activation='relu')
d3 = BatchNormalization()
d4 = Dropout(0.4)

# create a function for applying the shared layers
def nn(x, name):

    y = d1(x)
    y = d2(y)
    y = d3(y)
    y = d4(y)
    y = Dense(1, name=name)(y)

    return y

# create the model
inputs = Input(shape=x.shape[1], name='common_input')
output1 = nn(inputs, name='target_1')
output2 = nn(inputs, name='target_2')

model = Model(inputs=inputs, outputs=[output1, output2])

model.compile(optimizer='adam', loss='mse')

# train the model
model.fit(x, [y1, y2], epochs=5)
# Epoch 1/5
# 4/4 [==============================] - 1s 1ms/step - loss: 5.3587 - target_1_loss: 2.7805 - target_2_loss: 2.5782
# Epoch 2/5
# 4/4 [==============================] - 0s 1ms/step - loss: 3.8924 - target_1_loss: 1.8996 - target_2_loss: 1.9927
# Epoch 3/5
# 4/4 [==============================] - 0s 970us/step - loss: 2.8755 - target_1_loss: 1.4582 - target_2_loss: 1.4173
# Epoch 4/5
# 4/4 [==============================] - 0s 943us/step - loss: 2.6111 - target_1_loss: 1.2023 - target_2_loss: 1.4088
# Epoch 5/5
# 4/4 [==============================] - 0s 910us/step - loss: 2.6412 - target_1_loss: 1.1902 - target_2_loss: 1.4510

# generate the model predictions
y1_pred, y2_pred = model.predict(x)

print(y1_pred)
# [[0.3716803 ]
#  [0.22038066]
#  [0.2840684 ]
#  [0.09253158]
#  [0.21785215]
#  ...

print(y2_pred)
# [[ 0.17823327]
#  [ 0.10360342]
#  [ 0.12475234]
#  [-0.04125798]
#  [-0.25730723]
#  ...