I've been trying to do grid search on mnist
dataset with an MLP
. Since mnist
dataset is labeled from 0 to 9 and I have 10 neurons in output, I'm using one-hot encoding
. But as soon as I try to run grid search, I get the following error: y should be a 1d array, got an array of shape (54000, 10) instead
. When I try to use labels as argmax using y.argmax(axis=1)
I get another error: ValueError: Shapes (10, 1) and (10, 10) are incompatible
. How can I overcome this issue?
I also need to mention that I'm new to data science and neural networks. Thanks in advance
import tensorflow as tf
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV
from tensorflow.keras import Sequential
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
X_train = X_train.reshape((X_train.shape[0], 28 * 28)) / 255.0
Y_train = to_categorical(Y_train)
def create_MLP(hlayer_count=1, hlayer_1_size=64, hlayer_2_size=64,
activation="relu", optimizer="adam", learning_rate=0.01):
print(hlayer_count)
model = Sequential()
# first hidden layer
model.add(Dense(hlayer_1_size, activation=activation, input_shape=(28 * 28,)))
if hlayer_count == 2:
# second hidden layer
model.add(Dense(hlayer_2_size, activation=activation))
# output layer
model.add(Dense(10, activation="softmax"))
# compile model
metrics = ["accuracy"]
if optimizer == "adam":
o = optimizers.Adam(learning_rate=learning_rate)
elif optimizer == "sgd":
o = optimizers.SGD(learning_rate=learning_rate, momentum=0.9)
else:
o = optimizers.RMSprop(learning_rate=learning_rate)
model.compile(loss="categorical_crossentropy",
optimizer=o,
metrics=metrics)
return model
grid_model = KerasClassifier(model=create_MLP, epochs=30, verbose=4)
batch_size = [10, 20, 40, 80]
hlayer_size = [32, 64, 96, 128]
activation = ["relu", "tanh", "sigmoid"]
optimizer = ["adam", "sgd", "rmsprop"]
learning_rate = [0.001, 0.01, 0.05, 0.1]
dict1 = dict(batch_size=batch_size,
model__hlayer_count=[1],
model__hlayer_1_size=hlayer_size,
model__activation=activation,
model__optimizer=optimizer,
model__learning_rate=learning_rate)
dict2 = dict(batch_size=batch_size,
model__hlayer_count=[2],
model__hlayer_1_size=hlayer_size,
model__hlayer_2_size=hlayer_size,
model__activation=activation,
model__optimizer=optimizer,
model__learning_rate=learning_rate)
grid = HalvingGridSearchCV(estimator=grid_model, param_grid=[dict1, dict2], n_jobs=-1, cv=3, error_score="raise")
grid_results = grid.fit(X_train, Y_train)
Using the tf.keras.losses.SparseCategoricalCrossentropy()
as the loss function you can avoid this error
This error seems to be occurring because tensorflow model is using Categorical Crossentropy loss function which needs onehot encoded target variable and scikit-learn GridSearchCV
requires 1D array of target variable
X_train = X_train.reshape((X_train.shape[0], 28 * 28)) / 255.0
# Y_train = to_categorical(Y_train)
# Here keep the Y_train as 1D array containing all the different classes
# When you compile the model use Sparse categorical crossentropy
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
optimizer=o,
metrics=metrics)
For usage here is the tensorflow tutorial using sparse categorical enropy loss function