Search code examples
pythonnumpytensorflowkerasfunctional-api

Merge 3 Deep Network and Train End-to-End


I'm using a deep learning concept but a beginner in it, I'm trying to build a feature fusion concept using 3 deep neural network models, the idea is I'm trying to get features from all three models and do classification on the last single sigmoid layer and then get the results, here is the code that I run.

Code:

from keras.layers import Input, Dense
from keras.models import Model
from sklearn.model_selection import train_test_split
import numpy
# random seed for reproducibility
numpy.random.seed(2)
# loading load pima indians diabetes dataset, past 5 years of medical history
dataset = numpy.loadtxt('https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv', delimiter=",")
# split into input (X) and output (Y) variables, splitting csv data
X = dataset[:, 0:8]
Y = dataset[:, 8]
x_train, x_validation, y_train, y_validation = train_test_split(X, Y, test_size=0.20, random_state=5)
#create the input layer
input_layer = Input(shape=(8,))
A2 = Dense(8, activation='relu')(input_layer)
A3 = Dense(30, activation='relu')(A2)
B2 = Dense(40, activation='relu')(A2)
B3 = Dense(30, activation='relu')(B2)
C2 = Dense(50, activation='relu')(B2)
C3 = Dense(5, activation='relu')(C2)
merged = Model(inputs=[input_layer],outputs=[A3,B3,C3])
final_model = Dense(1, 
activation='sigmoid')(merged
final_model.compile(loss="binary_crossentropy",
              optimizer="adam", metrics=['accuracy'])
# call the function to fit to the data (training the network)
final_model.fit(x_train, y_train, epochs=2000, batch_size=50,
          validation_data=(x_validation, y_validation))
# evaluate the model
scores = final_model.evaluate(x_validation,y_validation)
print("\n%s: %.2f%%" % (final_model.metrics_names[1], scores[1] * 100))

Here is the error that I'm facing

if x.shape.ndims is None:

AttributeError: 'Functional' object has no attribute 'shape'

Please help me out to fix this issue, or if anyone knows what code should I use then let me know I'm also willing to change code but not concept Thank you.


Update

From @M.Innat's answer, we've tried as follows. The idea is we first build 3 models and then build a final / combine model by joining these models with a single classifier. But I am facing a discrepancy. When I train each model, they gave 90% results but when I combine them, they hardly reach 60 or 70.

Code MODEL 1:

   model = Sequential()
    # input layer requires input_dim param
    model.add(Dense(10, input_dim=8, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(5, activation='relu'))
    # sigmoid instead of relu for final probability between 0 and 1
    model.add(Dense(1, activation='sigmoid'))
    
    # compile the model, adam gradient descent (optimized)
    model.compile(loss="binary_crossentropy",
                  optimizer="adam", metrics=['accuracy'])
    
    # call the function to fit to the data (training the network)
    model.fit(x_train, y_train, epochs=1000, batch_size=50,
              validation_data=(x_validation, y_validation))
    
    # evaluate the model
    
    scores = model.evaluate(X, Y)
    print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
    model.save('diabetes_risk_nn.h5')

MODEL 1 Accuracy = 94.14%. And same as another 2 models.

MODEL 2 Accuracy = 93.62% MODEL 3 Accuracy = 92.71%

Next, as @M.Innat's suggested to merging the models. Here we have done that using the above Models 1,2,3. But the score is not near ~90%. FINAL Combined Model:

# Define Model A 
input_layer = Input(shape=(8,))
A2 = Dense(10, activation='relu')(input_layer)
A3 = Dense(50, activation='relu')(A2)
A4 = Dense(50, activation='relu')(A3)
A5 = Dense(50, activation='relu')(A4)
A6 = Dense(50, activation='relu')(A5)
A7 = Dense(50, activation='relu')(A6)
A8 = Dense(5, activation='relu')(A7)
model_a = Model(inputs=input_layer, outputs=A8, name="ModelA")

# Define Model B 
input_layer = Input(shape=(8,))
B2 = Dense(10, activation='relu')(input_layer)
B3 = Dense(50, activation='relu')(B2)
B4 = Dense(40, activation='relu')(B3)
B5 = Dense(60, activation='relu')(B4)
B6 = Dense(30, activation='relu')(B5)
B7 = Dense(50, activation='relu')(B6)
B8 = Dense(50, activation='relu')(B7)
B9 = Dense(5, activation='relu')(B8)
model_b = Model(inputs=input_layer, outputs=B9, name="ModelB")

# Define Model C
input_layer = Input(shape=(8,))
C2 = Dense(10, activation='relu')(input_layer)
C3 = Dense(50, activation='relu')(C2)
C4 = Dense(40, activation='relu')(C3)
C5 = Dense(40, activation='relu')(C4)
C6 = Dense(70, activation='relu')(C5)
C7 = Dense(50, activation='relu')(C6)
C8 = Dense(50, activation='relu')(C7)
C9 = Dense(60, activation='relu')(C8)
C10 = Dense(50, activation='relu')(C9)
C11 = Dense(5, activation='relu')(C10)
model_c = Model(inputs=input_layer, outputs=C11, name="ModelC")
all_three_models = [model_a, model_b, model_c]
all_three_models_input = Input(shape=all_three_models[0].input_shape[1:])

And then combine these three.

models_output = [model(all_three_models_input) for model in all_three_models]
Concat           = tf.keras.layers.concatenate(models_output, name="Concatenate")
final_out     = Dense(1, activation='sigmoid')(Concat)
final_model   = Model(inputs=all_three_models_input, outputs=final_out, name='Ensemble')
#tf.keras.utils.plot_model(final_model, expand_nested=True)
final_model.compile(loss="binary_crossentropy",
              optimizer="adam", metrics=['accuracy'])
# call the function to fit to the data (training the network)
final_model.fit(x_train, y_train, epochs=1000, batch_size=50,
          validation_data=(x_validation, y_validation))

# evaluate the model

scores = final_model.evaluate(x_validation,y_validation)
print("\n%s: %.2f%%" % (final_model.metrics_names[1], scores[1] * 100))
final_model.save('diabetes_risk_nn.h5')

But unlike each model where they gave 90%, this combine final model gave accuracy around =70%


Solution

  • According to your code, there is only one model (not three). And by seeing the output that you tried, I think you're looking for something like this:

    DataSet

    import tensorflow as tf 
    from tensorflow.keras.layers import Input, Dense
    from tensorflow.keras.models import Model
    from sklearn.model_selection import train_test_split
    import numpy
    
    # random seed for reproducibility
    numpy.random.seed(2)
    # loading load pima indians diabetes dataset, past 5 years of medical history
    dataset = numpy.loadtxt('https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv', delimiter=",")
    
    # split into input (X) and output (Y) variables, splitting csv data
    X = dataset[:, 0:8]
    Y = dataset[:, 8]
    
    x_train, x_validation, y_train, y_validation = train_test_split(X, Y, test_size=0.20, random_state=5)
    

    Model

    #create the input layer
    input_layer = Input(shape=(8,))
    
    A2 = Dense(8, activation='relu')(input_layer)
    A3 = Dense(30, activation='relu')(A2)
    
    B2 = Dense(40, activation='relu')(input_layer)
    B3 = Dense(30, activation='relu')(B2)
    
    C2 = Dense(50, activation='relu')(input_layer)
    C3 = Dense(5, activation='relu')(C2)
    
    
    merged = tf.keras.layers.concatenate([A3,B3,C3])
    final_out = Dense(1, activation='sigmoid')(merged)
    
    final_model = Model(inputs=[input_layer], outputs=final_out)
    tf.keras.utils.plot_model(final_model)
    

    enter image description here

    Train

    final_model.compile(loss="binary_crossentropy",
                  optimizer="adam", metrics=['accuracy'])
    
    # call the function to fit to the data (training the network)
    final_model.fit(x_train, y_train, epochs=5, batch_size=50,
              validation_data=(x_validation, y_validation))
    
    # evaluate the model
    scores = final_model.evaluate(x_validation,y_validation)
    print("\n%s: %.2f%%" % (final_model.metrics_names[1], scores[1] * 100))
    
    Epoch 1/5
    13/13 [==============================] - 1s 15ms/step - loss: 0.7084 - accuracy: 0.6803 - val_loss: 0.6771 - val_accuracy: 0.6883
    Epoch 2/5
    13/13 [==============================] - 0s 5ms/step - loss: 0.6491 - accuracy: 0.6600 - val_loss: 0.5985 - val_accuracy: 0.6623
    Epoch 3/5
    13/13 [==============================] - 0s 5ms/step - loss: 0.6161 - accuracy: 0.6813 - val_loss: 0.6805 - val_accuracy: 0.6883
    Epoch 4/5
    13/13 [==============================] - 0s 5ms/step - loss: 0.6335 - accuracy: 0.7003 - val_loss: 0.6115 - val_accuracy: 0.6623
    Epoch 5/5
    13/13 [==============================] - 0s 5ms/step - loss: 0.5684 - accuracy: 0.7285 - val_loss: 0.6150 - val_accuracy: 0.6883
    5/5 [==============================] - 0s 2ms/step - loss: 0.6150 - accuracy: 0.6883
    
    accuracy: 68.83%
    

    Update

    Based on your this comment:

    Let me explain to you what I'm trying to do, firstly I create 3 models DNN separately then I try to combine those models to get features of all there, after that I want to classify using all extracted features and then evaluate the accuracy. That's what actually I'm trying to develop.

    • create 3 models separately - OK, 3 models
    • combine them to get a feature - OK, Feature extractors
    • classify - OK, Average the model output feature maps and pass to the classifier - in other words Ensembling.

    Let's do this. First, build three models separately.

    # Define Model A 
    input_layer = Input(shape=(8,))
    A2 = Dense(8, activation='relu')(input_layer)
    A3 = Dense(30, activation='relu')(A2)
    C3 = Dense(5, activation='relu')(A3)
    model_a = Model(inputs=input_layer, outputs=C3, name="ModelA")
    
    # Define Model B 
    input_layer = Input(shape=(8,))
    A2 = Dense(8, activation='relu')(input_layer)
    A3 = Dense(30, activation='relu')(A2)
    C3 = Dense(5, activation='relu')(A3)
    model_b = Model(inputs=input_layer, outputs=C3, name="ModelB")
    
    # Define Model C
    input_layer = Input(shape=(8,))
    A2 = Dense(8, activation='relu')(input_layer)
    A3 = Dense(30, activation='relu')(A2)
    C3 = Dense(5, activation='relu')(A3)
    model_c = Model(inputs=input_layer, outputs=C3, name="ModelC")
    

    I used the same number of parameters, change yourself. Anyway, these three models perform as each feature extractor (not classifier). Next, we will combine their output by averaging them and after that pass that to the classifier.

    all_three_models = [model_a, model_b, model_c]
    all_three_models_input = Input(shape=all_three_models[0].input_shape[1:])
    
    
    models_output = [model(all_three_models_input) for model in all_three_models]
    Avg           = tf.keras.layers.average(models_output, name="Average")
    final_out     = Dense(1, activation='sigmoid')(Avg)
    final_model   = Model(inputs=all_three_models_input, outputs=final_out, name='Ensemble')
    
    tf.keras.utils.plot_model(final_model, expand_nested=True)
    

    enter image description here

    Now, you can train the model and evaluate it on the test set. Hope this helps.


    More info.

    (1). You can add seed.

    from tensorflow.keras.models import Model
    from sklearn.model_selection import train_test_split
    import tensorflow as tf 
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Dense, Dropout
    from sklearn.model_selection import train_test_split
    import os, numpy
    
    # random seed for reproducibility
    numpy.random.seed(101)
    tf.random.set_seed(101)
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    
    dataset = .. your data 
    
    # split into input (X) and output (Y) variables, splitting csv data
    X = dataset[:, 0:8]
    Y = dataset[:, 8]
    x_train, x_validation, y_train, y_validation = train_test_split(X, Y,
                                                                    
                                                test_size=0.20, random_state=101)
    

    (2). Try with the SGD optimizer. Also, use the ModelCheckpoint callback to save the highest validation accuracy.

    final_model.compile(loss="binary_crossentropy",
                  optimizer="sgd", metrics=['accuracy'])
    
    model_save = tf.keras.callbacks.ModelCheckpoint(
                    'merge_best.h5',
                    monitor="val_accuracy",
                    verbose=0,
                    save_best_only=True,
                    save_weights_only=True,
                    mode="max",
                    save_freq="epoch"
                )
    
    # call the function to fit to the data (training the network)
    final_model.fit(x_train, y_train, epochs=1000, batch_size=256, callbacks=[model_save],
              validation_data=(x_validation, y_validation))
    

    Evaluate on the test set.

    # evaluate the model
    final_model.load_weights('merge_best.h5')
    scores = final_model.evaluate(x_validation,y_validation)
    print("\n%s: %.2f%%" % (final_model.metrics_names[1], scores[1] * 100))
    
    5/5 [==============================] - 0s 4ms/step - loss: 0.6543 - accuracy: 0.7662
    
    accuracy: 76.62%