I'm trying to update the number of neurons in each hidden layer for dynamically created layers. For example, the output as seen in the model.summary() shows each of the layers having either 5 or 10 Neurons, but not a 5 then 10 or 10 then 5 which is what I'd like.
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataset = 'https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv'
df_diabetes = pd.read_csv(dataset, names = names)
df_diabetes.head()
df_diabetes = df_diabetes[1:]
X = df_diabetes[['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']]
y = df_diabetes['class']
### Normalize the input features so that they are on the same scale. This is so that the errors calculated for back-propagation are calculated from a similar scale of features. This would mean smaller initial errors compared to that from non-normalised feature data. Smaller scale of errors leads to faster convergence of the gradient descent when adjusting the weights using the chosen cost function.
scaler = MinMaxScaler(feature_range=[0, 1])
X_rescaled = scaler.fit_transform(X)
X = pd.DataFrame(data = X_rescaled, columns = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age'])
X.head(3)
X= X.to_numpy()
y = y.to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.75, random_state = 1, stratify =y, shuffle = True)
def createmodel(n_layers, neurons, activation_func, optimizer, loss_func):
model = Sequential()
for i in range(1, n_layers):
if i == 1:
model.add(Dense(neurons, input_dim=X_train.shape[1], activation=activation_func))
else:
model.add(Dense(neurons, activation=activation_func))
# Finally, the output layer should have a single node in binary classification
model.summary()
model.add(Dense(1, activation=activation_func))
model.compile(optimizer=optimizer, loss=loss_func, metrics=["accuracy"]) # note: metrics could also be 'mse'
return model
##Wrap model into scikit-learn
model = KerasClassifier(build_fn=createmodel, verbose=False)
# activation_funcs = ['sigmoid', 'relu', 'tanh']
activation_funcs = ['relu']
# loss_funcs = ['binary_crossentropy','hinge']
loss_funcs = ['binary_crossentropy']
# optimizers = ['rmsprop', 'adam','sgd']
optimizers = ['adam']
param_grid = dict(n_layers=[2,3], neurons=[5,10], activation_func = activation_funcs,
optimizer=optimizers, loss_func = loss_funcs, batch_size = [100], epochs = [20])
grid = GridSearchCV(estimator = model, param_grid = param_grid, verbose=2)
grid.fit(X,y)
print(grid.best_score_)
print(grid.best_params_)
pd.DataFrame(grid.cv_results_)[['mean_test_score', 'std_test_score', 'params']].to_csv('GridOptimization.csv')
You could try defining n_layers
as a list of lists:
n_layers = [[5, 10], [10, 5], [5, 10, 10],[5, 5, 10]]
where the length of each list represents the number of layers and their elements are the nodes for the layers. Here is a working example (just copy and paste it):
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataset = 'https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv'
df_diabetes = pd.read_csv(dataset, names = names)
df_diabetes.head()
df_diabetes = df_diabetes[1:]
X = df_diabetes[['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']]
y = df_diabetes['class']
### Normalize the input features so that they are on the same scale. This is so that the errors calculated for back-propagation are calculated from a similar scale of features. This would mean smaller initial errors compared to that from non-normalised feature data. Smaller scale of errors leads to faster convergence of the gradient descent when adjusting the weights using the chosen cost function.
scaler = MinMaxScaler(feature_range=[0, 1])
X_rescaled = scaler.fit_transform(X)
X = pd.DataFrame(data = X_rescaled, columns = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age'])
X.head(3)
X= X.to_numpy()
y = y.to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.75, random_state = 1, stratify =y, shuffle = True)
def createmodel(n_layers, activation_func, optimizer, loss_func):
model = Sequential()
for i, neurons in enumerate(n_layers):
if i == 0:
model.add(Dense(neurons, input_dim=X_train.shape[1], activation=activation_func))
else: model.add(Dense(neurons, activation=activation_func))
# Finally, the output layer should have a single node in binary classification
model.summary()
model.add(Dense(1, activation=activation_func))
model.compile(optimizer=optimizer, loss=loss_func, metrics=["accuracy"]) # note: metrics could also be 'mse'
return model
model = KerasClassifier(build_fn=createmodel, verbose=False)
activation_funcs = ['relu']
loss_funcs = ['binary_crossentropy']
optimizers = ['adam']
n_layers = [[5, 10], [10, 5], [5, 10, 10],[5, 5, 10]] # The length of each list represents the number of layers and their elements are the nodes for the layers.
param_grid = dict(n_layers = n_layers, activation_func = activation_funcs,
optimizer=optimizers, loss_func = loss_funcs, batch_size = [100], epochs = [20])
grid = GridSearchCV(estimator = model, param_grid = param_grid, verbose=2)
grid.fit(X,y)
print(grid.best_score_)
print(grid.best_params_)
pd.DataFrame(grid.cv_results_)[['mean_test_score', 'std_test_score', 'params']].to_csv('GridOptimization.csv')