I'm using a deep learning concept but a beginner in it, I'm trying to build a feature fusion concept using 3 deep neural network models, the idea is I'm trying to get features from all three models and do classification on the last single sigmoid layer and then get the results, here is the code that I run.
Code:
from keras.layers import Input, Dense
from keras.models import Model
from sklearn.model_selection import train_test_split
import numpy
# random seed for reproducibility
numpy.random.seed(2)
# loading load pima indians diabetes dataset, past 5 years of medical history
dataset = numpy.loadtxt('https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv', delimiter=",")
# split into input (X) and output (Y) variables, splitting csv data
X = dataset[:, 0:8]
Y = dataset[:, 8]
x_train, x_validation, y_train, y_validation = train_test_split(X, Y, test_size=0.20, random_state=5)
#create the input layer
input_layer = Input(shape=(8,))
A2 = Dense(8, activation='relu')(input_layer)
A3 = Dense(30, activation='relu')(A2)
B2 = Dense(40, activation='relu')(A2)
B3 = Dense(30, activation='relu')(B2)
C2 = Dense(50, activation='relu')(B2)
C3 = Dense(5, activation='relu')(C2)
merged = Model(inputs=[input_layer],outputs=[A3,B3,C3])
final_model = Dense(1,
activation='sigmoid')(merged
final_model.compile(loss="binary_crossentropy",
optimizer="adam", metrics=['accuracy'])
# call the function to fit to the data (training the network)
final_model.fit(x_train, y_train, epochs=2000, batch_size=50,
validation_data=(x_validation, y_validation))
# evaluate the model
scores = final_model.evaluate(x_validation,y_validation)
print("\n%s: %.2f%%" % (final_model.metrics_names[1], scores[1] * 100))
Here is the error that I'm facing
if x.shape.ndims is None:
AttributeError: 'Functional' object has no attribute 'shape'
Please help me out to fix this issue, or if anyone knows what code should I use then let me know I'm also willing to change code but not concept Thank you.
From @M.Innat's answer, we've tried as follows. The idea is we first build 3 models and then build a final / combine model by joining these models with a single classifier. But I am facing a discrepancy. When I train each model, they gave 90% results but when I combine them, they hardly reach 60 or 70.
Code MODEL 1:
model = Sequential()
# input layer requires input_dim param
model.add(Dense(10, input_dim=8, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(5, activation='relu'))
# sigmoid instead of relu for final probability between 0 and 1
model.add(Dense(1, activation='sigmoid'))
# compile the model, adam gradient descent (optimized)
model.compile(loss="binary_crossentropy",
optimizer="adam", metrics=['accuracy'])
# call the function to fit to the data (training the network)
model.fit(x_train, y_train, epochs=1000, batch_size=50,
validation_data=(x_validation, y_validation))
# evaluate the model
scores = model.evaluate(X, Y)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
model.save('diabetes_risk_nn.h5')
MODEL 1 Accuracy = 94.14%
. And same as another 2 models.
MODEL 2 Accuracy = 93.62%
MODEL 3 Accuracy = 92.71%
Next, as @M.Innat's suggested to merging the models. Here we have done that using the above Models 1,2,3. But the score is not near ~90%. FINAL Combined Model:
# Define Model A
input_layer = Input(shape=(8,))
A2 = Dense(10, activation='relu')(input_layer)
A3 = Dense(50, activation='relu')(A2)
A4 = Dense(50, activation='relu')(A3)
A5 = Dense(50, activation='relu')(A4)
A6 = Dense(50, activation='relu')(A5)
A7 = Dense(50, activation='relu')(A6)
A8 = Dense(5, activation='relu')(A7)
model_a = Model(inputs=input_layer, outputs=A8, name="ModelA")
# Define Model B
input_layer = Input(shape=(8,))
B2 = Dense(10, activation='relu')(input_layer)
B3 = Dense(50, activation='relu')(B2)
B4 = Dense(40, activation='relu')(B3)
B5 = Dense(60, activation='relu')(B4)
B6 = Dense(30, activation='relu')(B5)
B7 = Dense(50, activation='relu')(B6)
B8 = Dense(50, activation='relu')(B7)
B9 = Dense(5, activation='relu')(B8)
model_b = Model(inputs=input_layer, outputs=B9, name="ModelB")
# Define Model C
input_layer = Input(shape=(8,))
C2 = Dense(10, activation='relu')(input_layer)
C3 = Dense(50, activation='relu')(C2)
C4 = Dense(40, activation='relu')(C3)
C5 = Dense(40, activation='relu')(C4)
C6 = Dense(70, activation='relu')(C5)
C7 = Dense(50, activation='relu')(C6)
C8 = Dense(50, activation='relu')(C7)
C9 = Dense(60, activation='relu')(C8)
C10 = Dense(50, activation='relu')(C9)
C11 = Dense(5, activation='relu')(C10)
model_c = Model(inputs=input_layer, outputs=C11, name="ModelC")
all_three_models = [model_a, model_b, model_c]
all_three_models_input = Input(shape=all_three_models[0].input_shape[1:])
And then combine these three.
models_output = [model(all_three_models_input) for model in all_three_models]
Concat = tf.keras.layers.concatenate(models_output, name="Concatenate")
final_out = Dense(1, activation='sigmoid')(Concat)
final_model = Model(inputs=all_three_models_input, outputs=final_out, name='Ensemble')
#tf.keras.utils.plot_model(final_model, expand_nested=True)
final_model.compile(loss="binary_crossentropy",
optimizer="adam", metrics=['accuracy'])
# call the function to fit to the data (training the network)
final_model.fit(x_train, y_train, epochs=1000, batch_size=50,
validation_data=(x_validation, y_validation))
# evaluate the model
scores = final_model.evaluate(x_validation,y_validation)
print("\n%s: %.2f%%" % (final_model.metrics_names[1], scores[1] * 100))
final_model.save('diabetes_risk_nn.h5')
But unlike each model where they gave 90%, this combine final model gave accuracy around =70%
According to your code, there is only one model (not three). And by seeing the output that you tried, I think you're looking for something like this:
DataSet
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
import numpy
# random seed for reproducibility
numpy.random.seed(2)
# loading load pima indians diabetes dataset, past 5 years of medical history
dataset = numpy.loadtxt('https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv', delimiter=",")
# split into input (X) and output (Y) variables, splitting csv data
X = dataset[:, 0:8]
Y = dataset[:, 8]
x_train, x_validation, y_train, y_validation = train_test_split(X, Y, test_size=0.20, random_state=5)
Model
#create the input layer
input_layer = Input(shape=(8,))
A2 = Dense(8, activation='relu')(input_layer)
A3 = Dense(30, activation='relu')(A2)
B2 = Dense(40, activation='relu')(input_layer)
B3 = Dense(30, activation='relu')(B2)
C2 = Dense(50, activation='relu')(input_layer)
C3 = Dense(5, activation='relu')(C2)
merged = tf.keras.layers.concatenate([A3,B3,C3])
final_out = Dense(1, activation='sigmoid')(merged)
final_model = Model(inputs=[input_layer], outputs=final_out)
tf.keras.utils.plot_model(final_model)
Train
final_model.compile(loss="binary_crossentropy",
optimizer="adam", metrics=['accuracy'])
# call the function to fit to the data (training the network)
final_model.fit(x_train, y_train, epochs=5, batch_size=50,
validation_data=(x_validation, y_validation))
# evaluate the model
scores = final_model.evaluate(x_validation,y_validation)
print("\n%s: %.2f%%" % (final_model.metrics_names[1], scores[1] * 100))
Epoch 1/5
13/13 [==============================] - 1s 15ms/step - loss: 0.7084 - accuracy: 0.6803 - val_loss: 0.6771 - val_accuracy: 0.6883
Epoch 2/5
13/13 [==============================] - 0s 5ms/step - loss: 0.6491 - accuracy: 0.6600 - val_loss: 0.5985 - val_accuracy: 0.6623
Epoch 3/5
13/13 [==============================] - 0s 5ms/step - loss: 0.6161 - accuracy: 0.6813 - val_loss: 0.6805 - val_accuracy: 0.6883
Epoch 4/5
13/13 [==============================] - 0s 5ms/step - loss: 0.6335 - accuracy: 0.7003 - val_loss: 0.6115 - val_accuracy: 0.6623
Epoch 5/5
13/13 [==============================] - 0s 5ms/step - loss: 0.5684 - accuracy: 0.7285 - val_loss: 0.6150 - val_accuracy: 0.6883
5/5 [==============================] - 0s 2ms/step - loss: 0.6150 - accuracy: 0.6883
accuracy: 68.83%
Based on your this comment:
Let me explain to you what I'm trying to do, firstly I create 3 models DNN separately then I try to combine those models to get features of all there, after that I want to classify using all extracted features and then evaluate the accuracy. That's what actually I'm trying to develop.
Let's do this. First, build three models separately.
# Define Model A
input_layer = Input(shape=(8,))
A2 = Dense(8, activation='relu')(input_layer)
A3 = Dense(30, activation='relu')(A2)
C3 = Dense(5, activation='relu')(A3)
model_a = Model(inputs=input_layer, outputs=C3, name="ModelA")
# Define Model B
input_layer = Input(shape=(8,))
A2 = Dense(8, activation='relu')(input_layer)
A3 = Dense(30, activation='relu')(A2)
C3 = Dense(5, activation='relu')(A3)
model_b = Model(inputs=input_layer, outputs=C3, name="ModelB")
# Define Model C
input_layer = Input(shape=(8,))
A2 = Dense(8, activation='relu')(input_layer)
A3 = Dense(30, activation='relu')(A2)
C3 = Dense(5, activation='relu')(A3)
model_c = Model(inputs=input_layer, outputs=C3, name="ModelC")
I used the same number of parameters, change yourself. Anyway, these three models perform as each feature extractor (not classifier). Next, we will combine their output by averaging them and after that pass that to the classifier.
all_three_models = [model_a, model_b, model_c]
all_three_models_input = Input(shape=all_three_models[0].input_shape[1:])
models_output = [model(all_three_models_input) for model in all_three_models]
Avg = tf.keras.layers.average(models_output, name="Average")
final_out = Dense(1, activation='sigmoid')(Avg)
final_model = Model(inputs=all_three_models_input, outputs=final_out, name='Ensemble')
tf.keras.utils.plot_model(final_model, expand_nested=True)
Now, you can train the model and evaluate it on the test set. Hope this helps.
(1). You can add seed.
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
import os, numpy
# random seed for reproducibility
numpy.random.seed(101)
tf.random.set_seed(101)
os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
dataset = .. your data
# split into input (X) and output (Y) variables, splitting csv data
X = dataset[:, 0:8]
Y = dataset[:, 8]
x_train, x_validation, y_train, y_validation = train_test_split(X, Y,
test_size=0.20, random_state=101)
(2). Try with the SGD
optimizer. Also, use the ModelCheckpoint
callback to save the highest validation accuracy
.
final_model.compile(loss="binary_crossentropy",
optimizer="sgd", metrics=['accuracy'])
model_save = tf.keras.callbacks.ModelCheckpoint(
'merge_best.h5',
monitor="val_accuracy",
verbose=0,
save_best_only=True,
save_weights_only=True,
mode="max",
save_freq="epoch"
)
# call the function to fit to the data (training the network)
final_model.fit(x_train, y_train, epochs=1000, batch_size=256, callbacks=[model_save],
validation_data=(x_validation, y_validation))
Evaluate on the test set.
# evaluate the model
final_model.load_weights('merge_best.h5')
scores = final_model.evaluate(x_validation,y_validation)
print("\n%s: %.2f%%" % (final_model.metrics_names[1], scores[1] * 100))
5/5 [==============================] - 0s 4ms/step - loss: 0.6543 - accuracy: 0.7662
accuracy: 76.62%