Search code examples
pythonkerasdeep-learningtf.kerasgridsearchcv

Varying the number of neurons per hidden layer for dynamically created layers using GridSearchCV


I'm trying to update the number of neurons in each hidden layer for dynamically created layers. For example, the output as seen in the model.summary() shows each of the layers having either 5 or 10 Neurons, but not a 5 then 10 or 10 then 5 which is what I'd like.

import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

dataset = 'https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv'
df_diabetes = pd.read_csv(dataset, names = names)
df_diabetes.head()
df_diabetes = df_diabetes[1:]
X = df_diabetes[['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']]
y = df_diabetes['class']
### Normalize the input features so that they are on the same scale. This is so that the errors calculated for back-propagation are calculated from a similar scale of features. This would mean smaller initial errors compared to that from non-normalised feature data. Smaller scale of errors leads to faster convergence of the gradient descent when adjusting the weights using the chosen cost function.
scaler = MinMaxScaler(feature_range=[0, 1])
X_rescaled = scaler.fit_transform(X)
X = pd.DataFrame(data = X_rescaled, columns = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age'])
X.head(3)

X= X.to_numpy()
y = y.to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.75, random_state = 1, stratify =y, shuffle = True)

def createmodel(n_layers, neurons, activation_func, optimizer, loss_func):
    model = Sequential()

    for i in range(1, n_layers):
        if i == 1:
            model.add(Dense(neurons, input_dim=X_train.shape[1], activation=activation_func))
        else:
            model.add(Dense(neurons, activation=activation_func))
    
    # Finally, the output layer should have a single node in binary classification
    model.summary()
    model.add(Dense(1, activation=activation_func))
    model.compile(optimizer=optimizer, loss=loss_func, metrics=["accuracy"])  # note: metrics could also be 'mse'

    return model

##Wrap model into scikit-learn
model = KerasClassifier(build_fn=createmodel, verbose=False)

# activation_funcs = ['sigmoid', 'relu', 'tanh']
activation_funcs = ['relu']
# loss_funcs = ['binary_crossentropy','hinge']
loss_funcs = ['binary_crossentropy']
# optimizers = ['rmsprop', 'adam','sgd']
optimizers = ['adam']
param_grid = dict(n_layers=[2,3], neurons=[5,10],  activation_func = activation_funcs,
                  optimizer=optimizers, loss_func = loss_funcs, batch_size = [100], epochs = [20])
grid = GridSearchCV(estimator = model, param_grid = param_grid, verbose=2)
grid.fit(X,y)

print(grid.best_score_)
print(grid.best_params_)

pd.DataFrame(grid.cv_results_)[['mean_test_score', 'std_test_score', 'params']].to_csv('GridOptimization.csv')

Solution

  • You could try defining n_layers as a list of lists:

    n_layers = [[5, 10], [10, 5], [5, 10, 10],[5, 5, 10]]
    

    where the length of each list represents the number of layers and their elements are the nodes for the layers. Here is a working example (just copy and paste it):

    import pandas as pd
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Dense
    from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.model_selection import train_test_split
    from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
    
    names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
    
    dataset = 'https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv'
    df_diabetes = pd.read_csv(dataset, names = names)
    df_diabetes.head()
    df_diabetes = df_diabetes[1:]
    X = df_diabetes[['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']]
    y = df_diabetes['class']
    ### Normalize the input features so that they are on the same scale. This is so that the errors calculated for back-propagation are calculated from a similar scale of features. This would mean smaller initial errors compared to that from non-normalised feature data. Smaller scale of errors leads to faster convergence of the gradient descent when adjusting the weights using the chosen cost function.
    scaler = MinMaxScaler(feature_range=[0, 1])
    X_rescaled = scaler.fit_transform(X)
    X = pd.DataFrame(data = X_rescaled, columns = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age'])
    X.head(3)
    
    X= X.to_numpy()
    y = y.to_numpy()
    X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.75, random_state = 1, stratify =y, shuffle = True)
    
    def createmodel(n_layers, activation_func, optimizer, loss_func):
        model = Sequential()
    
        for i, neurons in enumerate(n_layers):
          if i == 0:
            model.add(Dense(neurons, input_dim=X_train.shape[1], activation=activation_func))
          else: model.add(Dense(neurons, activation=activation_func))
    
        # Finally, the output layer should have a single node in binary classification
        model.summary()
        model.add(Dense(1, activation=activation_func))
        model.compile(optimizer=optimizer, loss=loss_func, metrics=["accuracy"])  # note: metrics could also be 'mse'
    
        return model
    
    model = KerasClassifier(build_fn=createmodel, verbose=False)
    
    activation_funcs = ['relu']
    loss_funcs = ['binary_crossentropy']
    optimizers = ['adam']
    
    n_layers = [[5, 10], [10, 5], [5, 10, 10],[5, 5, 10]] # The length of each list represents the number of layers and their elements are the nodes for the layers.
    param_grid = dict(n_layers = n_layers,  activation_func = activation_funcs,
                      optimizer=optimizers, loss_func = loss_funcs, batch_size = [100], epochs = [20])
    grid = GridSearchCV(estimator = model, param_grid = param_grid, verbose=2)
    grid.fit(X,y)
    
    print(grid.best_score_)
    print(grid.best_params_)
    
    pd.DataFrame(grid.cv_results_)[['mean_test_score', 'std_test_score', 'params']].to_csv('GridOptimization.csv')