Given several input feature (sequences) I want to train a LSTM RNN making (one-hot encoded) multi-class predictions for several features at the same time. That is, my input X
has shape (observations, sequence length, features_in)
while the output y
has shape (observations, class_prediction, features_out)
. This particular setting requires that the softmax activation function of the final layer is applied only along one axis which (afaik) can be achieved by using a Lambda
layer. The output I want is similar to what is described here, but with the difference that the individual "softmax arrays" are organized as another dimension. Actually, in my case, I'm using this answer from the same thread.
Now, training this model seems to work as expected (at least no errors are thrown). However, when using RandomizedSearchCV
to find the hyperparameters, the following error is raised:
ValueError: in user code:
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:853 train_function *
return step_function(self, iterator)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:842 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
return fn(*args, **kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:835 run_step **
outputs = model.train_step(data)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:788 train_step
loss = self.compiled_loss(
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/compile_utils.py:201 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/losses.py:141 __call__
losses = call_fn(y_true, y_pred)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/losses.py:245 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/losses.py:1665 categorical_crossentropy
return backend.categorical_crossentropy(
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/backend.py:4839 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/framework/tensor_shape.py:1161 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 3, 9, 2) and (None, 3, 9) are incompatible
Note: The axis
argument in CategoricalCrossentropy
was added only in Tensorflow version >= 2.6. That is, you won't be able to run the MWE with TF v2.5 or below.
import numpy as np
import tensorflow as tf
from numpy.random import standard_normal, choice
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Reshape, Lambda
from tensorflow.keras.activations import softmax
from sklearn.model_selection import RandomizedSearchCV
from tensorflow.keras.utils import to_categorical
def build_model(input_shape, n_neurons, output_shape):
inp = Input(shape=input_shape)
x = LSTM(
n_neurons,
return_sequences=False,
input_shape=[None, input_shape[1]],
)(inp)
x = Dense(np.product(output_shape))(x)
x = Reshape(output_shape)(x)
out = Lambda(lambda x: softmax(x, axis=1))(x)
model = Model(inp, out)
optimizer = tf.keras.optimizers.Adam()
loss = tf.keras.losses.CategoricalCrossentropy(axis=1)
model.compile(
loss=loss,
optimizer=optimizer,
)
return model
n_observations = 100
sequence_length = 5
n_features_in = 7
n_classes = 3
target_classes = [0, 1, 2] * 3
n_features_out = len(target_classes)
X = standard_normal((n_observations, sequence_length, n_features_in))
y = [to_categorical(choice(target_classes, n_features_out, replace=False)).T for i in range(n_observations)]
y = np.stack(y)
X.shape # (observations, sequence_length, features_in) -> (100, 5, 7)
y.shape # (observations, target classes, features_out) -> (100, 3, 9)
# Fit model (this works!)
# -----------------------------------------------------------------------------
model = build_model(X.shape[1:], 10, y.shape[1:])
_ = model.fit(X, y, epochs=4)
# Randomized Search CV (ERROR!)
# -----------------------------------------------------------------------------
keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_model)
param_distribs = {
'n_neurons' : np.arange(1, 1000),
'input_shape' : [X.shape[1:]],
'output_shape' : [y.shape[1:]],
}
grid_search_cv = RandomizedSearchCV(keras_reg, param_distribs, n_iter=2)
grid_search_cv.fit(X, y, epochs=4)
As mentioned in the comments, you can easily solve this with the keras-tuner
:
import numpy as np
import tensorflow as tf
from numpy.random import standard_normal, choice
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Reshape, Lambda
from tensorflow.keras.activations import softmax
from tensorflow.keras.utils import to_categorical
import keras_tuner as kt
def build_model(hp):
inp = Input(shape=(5, 7))
x = LSTM(
hp.Int('n_neurons',min_value=1,max_value=1000, step=1),return_sequences=False,input_shape=[None, 7])(inp)
x = Dense(np.product((3, 9)))(x)
x = Reshape((3, 9))(x)
out = Lambda(lambda x: softmax(x, axis=1))(x)
model = Model(inp, out)
optimizer = tf.keras.optimizers.Adam()
loss = tf.keras.losses.CategoricalCrossentropy(axis=1)
model.compile(loss=loss,optimizer=optimizer)
return model
n_observations = 100
sequence_length = 5
n_features_in = 7
n_classes = 3
target_classes = [0, 1, 2] * 3
n_features_out = len(target_classes)
X = standard_normal((n_observations, sequence_length, n_features_in))
y = [to_categorical(choice(target_classes, n_features_out, replace=False)).T for i in range(n_observations)]
y = np.stack(y)
tuner = kt.RandomSearch(
build_model,
objective='loss',
max_trials=5)
tuner.search(X, y, epochs=5)
best_model = tuner.get_best_models()[0]
You can use HyperParameters.Int
to define a range you want to test out.