python tensorflow machine-learning keras lstm

ValueError: Shapes are incompatible in Tensorflow LSTM using RandomizedSearchCV

Problem

Given several input feature (sequences) I want to train a LSTM RNN making (one-hot encoded) multi-class predictions for several features at the same time. That is, my input X has shape (observations, sequence length, features_in) while the output y has shape (observations, class_prediction, features_out). This particular setting requires that the softmax activation function of the final layer is applied only along one axis which (afaik) can be achieved by using a Lambda layer. The output I want is similar to what is described here, but with the difference that the individual "softmax arrays" are organized as another dimension. Actually, in my case, I'm using this answer from the same thread.

Now, training this model seems to work as expected (at least no errors are thrown). However, when using RandomizedSearchCV to find the hyperparameters, the following error is raised:

ValueError: in user code:

    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:853 train_function  *
        return step_function(self, iterator)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:842 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:835 run_step  **
        outputs = model.train_step(data)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:788 train_step
        loss = self.compiled_loss(
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/compile_utils.py:201 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/losses.py:141 __call__
        losses = call_fn(y_true, y_pred)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/losses.py:245 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/losses.py:1665 categorical_crossentropy
        return backend.categorical_crossentropy(
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/backend.py:4839 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/framework/tensor_shape.py:1161 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (None, 3, 9, 2) and (None, 3, 9) are incompatible

MWE

Note: The axis argument in CategoricalCrossentropy was added only in Tensorflow version >= 2.6. That is, you won't be able to run the MWE with TF v2.5 or below.

import numpy as np
import tensorflow as tf
from numpy.random import standard_normal, choice
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Reshape, Lambda
from tensorflow.keras.activations import softmax
from sklearn.model_selection import RandomizedSearchCV
from tensorflow.keras.utils import to_categorical


def build_model(input_shape, n_neurons, output_shape):
    inp = Input(shape=input_shape)
    x = LSTM(
        n_neurons,
        return_sequences=False,
        input_shape=[None, input_shape[1]],
    )(inp)
    x = Dense(np.product(output_shape))(x)
    x = Reshape(output_shape)(x)
    out = Lambda(lambda x: softmax(x, axis=1))(x)
    model = Model(inp, out)

    optimizer = tf.keras.optimizers.Adam()
    loss = tf.keras.losses.CategoricalCrossentropy(axis=1)
    model.compile(
        loss=loss,
        optimizer=optimizer,
    )
    return model


n_observations = 100
sequence_length = 5
n_features_in = 7

n_classes = 3
target_classes = [0, 1, 2] * 3
n_features_out = len(target_classes)

X = standard_normal((n_observations, sequence_length, n_features_in))
y = [to_categorical(choice(target_classes, n_features_out, replace=False)).T for i in range(n_observations)]
y = np.stack(y)

X.shape  # (observations, sequence_length, features_in) -> (100, 5, 7)
y.shape  # (observations, target classes, features_out) -> (100, 3, 9)

# Fit model (this works!)
# -----------------------------------------------------------------------------
model = build_model(X.shape[1:], 10, y.shape[1:])
_ = model.fit(X, y, epochs=4)

# Randomized Search CV (ERROR!)
# -----------------------------------------------------------------------------
keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_model)
param_distribs = {
    'n_neurons'     : np.arange(1, 1000),
    'input_shape'   : [X.shape[1:]],
    'output_shape'  : [y.shape[1:]],
}

grid_search_cv = RandomizedSearchCV(keras_reg, param_distribs, n_iter=2)
grid_search_cv.fit(X, y, epochs=4)

Solution

As mentioned in the comments, you can easily solve this with the keras-tuner:

import numpy as np
import tensorflow as tf
from numpy.random import standard_normal, choice
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Reshape, Lambda
from tensorflow.keras.activations import softmax
from tensorflow.keras.utils import to_categorical
import keras_tuner as kt

def build_model(hp):
    inp = Input(shape=(5, 7))
    x = LSTM(
        hp.Int('n_neurons',min_value=1,max_value=1000, step=1),return_sequences=False,input_shape=[None, 7])(inp)
    x = Dense(np.product((3, 9)))(x)
    x = Reshape((3, 9))(x)
    out = Lambda(lambda x: softmax(x, axis=1))(x)
    model = Model(inp, out)
    optimizer = tf.keras.optimizers.Adam()
    loss = tf.keras.losses.CategoricalCrossentropy(axis=1)
    model.compile(loss=loss,optimizer=optimizer)
    return model

n_observations = 100
sequence_length = 5
n_features_in = 7
n_classes = 3
target_classes = [0, 1, 2] * 3
n_features_out = len(target_classes)
X = standard_normal((n_observations, sequence_length, n_features_in))
y = [to_categorical(choice(target_classes, n_features_out, replace=False)).T for i in range(n_observations)]
y = np.stack(y)

tuner = kt.RandomSearch(
    build_model,
    objective='loss',
    max_trials=5)
tuner.search(X, y, epochs=5)
best_model = tuner.get_best_models()[0]

You can use HyperParameters.Int to define a range you want to test out.