python tensorflow machine-learning keras scikit-learn

GridSearchCV y should be a 1d array, got an array of shape (54000, 10) instead

I've been trying to do grid search on mnist dataset with an MLP. Since mnist dataset is labeled from 0 to 9 and I have 10 neurons in output, I'm using one-hot encoding. But as soon as I try to run grid search, I get the following error: y should be a 1d array, got an array of shape (54000, 10) instead. When I try to use labels as argmax using y.argmax(axis=1) I get another error: ValueError: Shapes (10, 1) and (10, 10) are incompatible. How can I overcome this issue?
I also need to mention that I'm new to data science and neural networks. Thanks in advance

import tensorflow as tf
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV

from tensorflow.keras import Sequential
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

X_train = X_train.reshape((X_train.shape[0], 28 * 28)) / 255.0
Y_train = to_categorical(Y_train)

def create_MLP(hlayer_count=1, hlayer_1_size=64, hlayer_2_size=64,
               activation="relu", optimizer="adam", learning_rate=0.01):
  print(hlayer_count)
  model = Sequential()
  # first hidden layer
  model.add(Dense(hlayer_1_size, activation=activation, input_shape=(28 * 28,)))
  if hlayer_count == 2:
    # second hidden layer
    model.add(Dense(hlayer_2_size, activation=activation))
  # output layer
  model.add(Dense(10, activation="softmax"))
  # compile model
  metrics = ["accuracy"]
  if optimizer == "adam":
    o = optimizers.Adam(learning_rate=learning_rate)
  elif optimizer == "sgd":
    o = optimizers.SGD(learning_rate=learning_rate, momentum=0.9)
  else:
    o = optimizers.RMSprop(learning_rate=learning_rate)
  model.compile(loss="categorical_crossentropy",
                optimizer=o,
                metrics=metrics)
  return model

grid_model = KerasClassifier(model=create_MLP, epochs=30, verbose=4)
batch_size = [10, 20, 40, 80]
hlayer_size = [32, 64, 96, 128]
activation = ["relu", "tanh", "sigmoid"]
optimizer = ["adam", "sgd", "rmsprop"]
learning_rate = [0.001, 0.01, 0.05, 0.1]
dict1 = dict(batch_size=batch_size,
                   model__hlayer_count=[1],
                   model__hlayer_1_size=hlayer_size,
                   model__activation=activation,
                   model__optimizer=optimizer,
                   model__learning_rate=learning_rate)
dict2 = dict(batch_size=batch_size,
                   model__hlayer_count=[2],
                   model__hlayer_1_size=hlayer_size,
                   model__hlayer_2_size=hlayer_size,
                   model__activation=activation,
                   model__optimizer=optimizer,
                   model__learning_rate=learning_rate)
grid = HalvingGridSearchCV(estimator=grid_model, param_grid=[dict1, dict2], n_jobs=-1, cv=3, error_score="raise")
grid_results = grid.fit(X_train, Y_train)

Solution

Using the tf.keras.losses.SparseCategoricalCrossentropy() as the loss function you can avoid this error

This error seems to be occurring because tensorflow model is using Categorical Crossentropy loss function which needs onehot encoded target variable and scikit-learn GridSearchCV requires 1D array of target variable

X_train = X_train.reshape((X_train.shape[0], 28 * 28)) / 255.0
# Y_train = to_categorical(Y_train)
# Here keep the Y_train as 1D array containing all the different classes

# When you compile the model use Sparse categorical crossentropy
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                optimizer=o,
                metrics=metrics)

For usage here is the tensorflow tutorial using sparse categorical enropy loss function

https://www.tensorflow.org/guide/keras/train_and_evaluate