python tensorflow keras neural-network transfer-learning

Unable to use a pre-trained model

I am looking to pre-train a model on one dataset and train the layers on another dataset.

Here is my neural network for the first dataset:

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(100, input_shape=(30,) ))
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(10))
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(1))
model.add(tf.keras.layers.Activation('sigmoid'))

model.summary()
# need sparse otherwise shape is wrong. check why
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

print('Fitting the data to the model')
batch_size = 20
epochs = 10
history = model.fit(X_train_orig_sm, Y_train_orig_sm, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0.2)
print('Evaluating the test data on the model')

Here is how I save the model:

from tensorflow.keras.models import model_from_yaml
# serialize model to YAML
model_yaml = model.to_yaml()
with open("model.yaml", "w") as yaml_file:
    yaml_file.write(model_yaml)

Here is how I load the model and use the first 5 layers:

yaml_file = open('model.yaml', 'r')
model_1_yaml = yaml_file.read()
yaml_file.close()
model_1 = model_from_yaml(model_1_yaml)
model_pre=model_1.layers[:5]

However, when I train this combined with the second neural network layers:

transfer_model = tf.keras.models.Sequential()
transfer_model.add(model_pre)
transfer_model.add(tf.keras.layers.Dropout(0.2))
transfer_model.add(tf.keras.layers.Dense(10))
transfer_model.add(tf.keras.layers.Activation('relu'))
transfer_model.add(tf.keras.layers.Dropout(0.2))
transfer_model.add(tf.keras.layers.Dense(1))
transfer_model.add(tf.keras.layers.Activation('sigmoid'))

transfer_model.summary()
# need sparse otherwise shape is wrong. check why
transfer_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

print('Fitting the data to the model')
batch_size = 20
epochs = 10
history = transfer_model.fit(X_train_sm, Y_train_sm, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0.2)
print('Evaluating the test data on the model')
transfer_model.evaluate(X_test,Y_test)

I get the error below:

TypeError: The added layer must be an instance of class Layer. Found: [<tensorflow.python.keras.layers.core.Dense object at 0x7fc80dadccd0>, <tensorflow.python.keras.layers.core.Activation object at 0x7fc80dadcf50>, <tensorflow.python.keras.layers.core.Dropout object at 0x7fc80dafd350>, <tensorflow.python.keras.layers.core.Dense object at 0x7fc80daf4d50>]

Could anyone point out where I'm wrong here?

Solution

Here is how you can achieve this. I will walk you through a working example. First, we build a model and train it. Save the entire model with the trained weights. Next, we took the top 5 layers from this model and build a new 5 layer model, and at the end, we use it in the final model.

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import load_model

# Training parameters
batch_size = 256
num_classes = 10
epochs = 5

# The data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

x_train = x_train.reshape(-1, 784).astype("float32") / 255
x_test = x_test.reshape(-1, 784).astype("float32") / 255

# Build the model
model = keras.Sequential(
    [
        keras.Input(shape=(784,)),
        layers.Dense(32, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        layers.Dense(64, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        layers.Dense(128, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(10),
    ]
)

# Compile the model
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.RMSprop(),
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
)

# Train the model
model.fit(x_train, y_train, batch_size=batch_size,
          epochs=epochs, validation_data=(x_test, y_test))

model.save('my_model.h5') # save the entire model with trained weights
new_model = load_model('my_model.h5') # load this model with its weights

Now, we take the top 5 layers from this new_model and build a new one. (This is your learning point).

top5layerModel = tf.keras.Model(
    inputs  = new_model.inputs,
    outputs = new_model.layers[5].output,
)

top5layerModel.summary()

Now, we use this model (top5layerModel) to the final model as follows:

# Build the model
final_model = keras.Sequential(
    [
        top5layerModel,
        layers.Dense(256, activation='elu'),
        layers.Dense(10)
    ]
)

final_model.summary()

Now, you can train it as usual.