python keras deep-learning tf.keras gridsearchcv

Varying the number of neurons per hidden layer for dynamically created layers using GridSearchCV

I'm trying to update the number of neurons in each hidden layer for dynamically created layers. For example, the output as seen in the model.summary() shows each of the layers having either 5 or 10 Neurons, but not a 5 then 10 or 10 then 5 which is what I'd like.

import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

dataset = 'https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv'
df_diabetes = pd.read_csv(dataset, names = names)
df_diabetes.head()
df_diabetes = df_diabetes[1:]
X = df_diabetes[['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']]
y = df_diabetes['class']
### Normalize the input features so that they are on the same scale. This is so that the errors calculated for back-propagation are calculated from a similar scale of features. This would mean smaller initial errors compared to that from non-normalised feature data. Smaller scale of errors leads to faster convergence of the gradient descent when adjusting the weights using the chosen cost function.
scaler = MinMaxScaler(feature_range=[0, 1])
X_rescaled = scaler.fit_transform(X)
X = pd.DataFrame(data = X_rescaled, columns = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age'])
X.head(3)

X= X.to_numpy()
y = y.to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.75, random_state = 1, stratify =y, shuffle = True)

def createmodel(n_layers, neurons, activation_func, optimizer, loss_func):
    model = Sequential()

    for i in range(1, n_layers):
        if i == 1:
            model.add(Dense(neurons, input_dim=X_train.shape[1], activation=activation_func))
        else:
            model.add(Dense(neurons, activation=activation_func))
    
    # Finally, the output layer should have a single node in binary classification
    model.summary()
    model.add(Dense(1, activation=activation_func))
    model.compile(optimizer=optimizer, loss=loss_func, metrics=["accuracy"])  # note: metrics could also be 'mse'

    return model

##Wrap model into scikit-learn
model = KerasClassifier(build_fn=createmodel, verbose=False)

# activation_funcs = ['sigmoid', 'relu', 'tanh']
activation_funcs = ['relu']
# loss_funcs = ['binary_crossentropy','hinge']
loss_funcs = ['binary_crossentropy']
# optimizers = ['rmsprop', 'adam','sgd']
optimizers = ['adam']
param_grid = dict(n_layers=[2,3], neurons=[5,10],  activation_func = activation_funcs,
                  optimizer=optimizers, loss_func = loss_funcs, batch_size = [100], epochs = [20])
grid = GridSearchCV(estimator = model, param_grid = param_grid, verbose=2)
grid.fit(X,y)

print(grid.best_score_)
print(grid.best_params_)

pd.DataFrame(grid.cv_results_)[['mean_test_score', 'std_test_score', 'params']].to_csv('GridOptimization.csv')

Solution

You could try defining n_layers as a list of lists:

n_layers = [[5, 10], [10, 5], [5, 10, 10],[5, 5, 10]]

where the length of each list represents the number of layers and their elements are the nodes for the layers. Here is a working example (just copy and paste it):

import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

dataset = 'https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv'
df_diabetes = pd.read_csv(dataset, names = names)
df_diabetes.head()
df_diabetes = df_diabetes[1:]
X = df_diabetes[['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']]
y = df_diabetes['class']
### Normalize the input features so that they are on the same scale. This is so that the errors calculated for back-propagation are calculated from a similar scale of features. This would mean smaller initial errors compared to that from non-normalised feature data. Smaller scale of errors leads to faster convergence of the gradient descent when adjusting the weights using the chosen cost function.
scaler = MinMaxScaler(feature_range=[0, 1])
X_rescaled = scaler.fit_transform(X)
X = pd.DataFrame(data = X_rescaled, columns = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age'])
X.head(3)

X= X.to_numpy()
y = y.to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.75, random_state = 1, stratify =y, shuffle = True)

def createmodel(n_layers, activation_func, optimizer, loss_func):
    model = Sequential()

    for i, neurons in enumerate(n_layers):
      if i == 0:
        model.add(Dense(neurons, input_dim=X_train.shape[1], activation=activation_func))
      else: model.add(Dense(neurons, activation=activation_func))

    # Finally, the output layer should have a single node in binary classification
    model.summary()
    model.add(Dense(1, activation=activation_func))
    model.compile(optimizer=optimizer, loss=loss_func, metrics=["accuracy"])  # note: metrics could also be 'mse'

    return model

model = KerasClassifier(build_fn=createmodel, verbose=False)

activation_funcs = ['relu']
loss_funcs = ['binary_crossentropy']
optimizers = ['adam']

n_layers = [[5, 10], [10, 5], [5, 10, 10],[5, 5, 10]] # The length of each list represents the number of layers and their elements are the nodes for the layers.
param_grid = dict(n_layers = n_layers,  activation_func = activation_funcs,
                  optimizer=optimizers, loss_func = loss_funcs, batch_size = [100], epochs = [20])
grid = GridSearchCV(estimator = model, param_grid = param_grid, verbose=2)
grid.fit(X,y)

print(grid.best_score_)
print(grid.best_params_)

pd.DataFrame(grid.cv_results_)[['mean_test_score', 'std_test_score', 'params']].to_csv('GridOptimization.csv')