I'm trying to build a stacked LSTM sequence auto-encoder that takes a signal of 430 timesteps with each timestep having 1 value. You can see the code for the model below:
feat_dim = 1
timesteps = 430
inputs = keras.Input(shape = (timesteps, feat_dim), dtype = 'float32')
x = layers.LSTM(320, activation = 'relu', return_sequences = True)(inputs)
x = layers.LSTM(256, activation = 'relu', return_sequences = True)(x)
x = layers.LSTM(128, activation = 'relu', return_sequences = True)(x)
x = layers.LSTM(64, activation = 'relu', return_sequences = True)(x)
x = layers.LSTM(32, activation = 'relu', return_sequences = True)(x)
x = layers.LSTM(16, activation = 'relu', return_sequences = True)(x)
encoded = layers.LSTM(encoding_dim, activation = 'relu', return_sequences = False)(x)
x = layers.RepeatVector(timesteps)(encoded)
x = layers.LSTM(16, activation = 'relu', return_sequences = True)(x)
x = layers.LSTM(32, activation = 'relu', return_sequences = True)(x)
x = layers.LSTM(64, activation = 'relu', return_sequences = True)(x)
x = layers.LSTM(128, activation = 'relu', return_sequences = True)(x)
x = layers.LSTM(256, activation = 'relu', return_sequences = True)(x)
x = layers.LSTM(320, activation = 'relu', return_sequences = True)(x)
decoded = layers.TimeDistributed(layers.Dense(feat_dim, activation = 'sigmoid'))(x)
autoencoder = keras.Model(input_dim, decoded)
encoder = keras.Model(input_dim, encoded)
encoded_input = keras.Input(shape=(timesteps, encoding_dim,))
decoded_layer = autoencoder.layers[-7](encoded_input)
decoded_layer = autoencoder.layers[-6](decoded_layer)
decoded_layer = autoencoder.layers[-5](decoded_layer)
decoded_layer = autoencoder.layers[-4](decoded_layer)
decoded_layer = autoencoder.layers[-3](decoded_layer)
# decoded_layer = autoencoder.layers[-2](decoded_layer)
decoded_layer = autoencoder.layers[-1](decoded_layer)
decoder = keras.Model(encoded_input, decoded)
autoencoder.compile(optimizer = 'adam', loss = 'mean_squared_error')
autoencoder.fit(xtrainsc, xtrainsc,
epochs = 50,
batch_size = 128,
shuffle = True,
validation_data = (xtestsc, xtestsc))
When I run autoencoder.summary()
I get the model summary just fine but when I run autoencoder.fit()
I get the following error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-136-aff0d412291f> in <cell line: 1>()
----> 1 autoencoder.fit(xtrainsc, xtrainsc,
2 epochs = 50,
3 batch_size = 128,
4 shuffle = True,
5 validation_data = (xtestsc, xtestsc))
/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py in error_handler(*args, **kwargs)
120 # To get the full stack trace, call:
121 # `keras.config.disable_traceback_filtering()`
--> 122 raise e.with_traceback(filtered_tb) from None
123 finally:
124 del filtered_tb
/usr/local/lib/python3.10/dist-packages/keras/src/ops/function.py in _run_through_graph(self, inputs, operation_fn, call_fn)
177 output_tensors = []
178 for x in self.outputs:
--> 179 output_tensors.append(tensor_dict[id(x)])
180
181 return tree.pack_sequence_as(self._outputs_struct, output_tensors)
KeyError: 'Exception encountered when calling Functional.call().\n\n\x1b[1m139922652929696\x1b[0m\n\nArguments received by Functional.call():\n • inputs=tf.Tensor(shape=(None, 430, 1), dtype=float32)\n • training=True\n • mask=None'
I've reshaped my dataset already to (batch, timesteps, feature) and when I try the code below adapted from the original keras tutorial on building autoencoders, it works, just that the train and validation loss start giving NaN
values after a number of epochs.
encoded = layers.LSTM(encoding_dim, activation = 'relu')(inputs)
x = layers.RepeatVector(timesteps)(encoded)
decoded = layers.LSTM(encoding_dim, activation = 'relu', return_sequences = True)(x)
decoded = layers.TimeDistributed(layers.Dense(feat_dim, activation = 'sigmoid'))(decoded)
autoencoder = keras.Model(inputs, decoded)
encoder = keras.Model(inputs, encoded)
encoded_input = keras.Input(shape=(timesteps, encoding_dim))
decoder = keras.Model(encoded_input, decoded)
Has anyone encountered a similar error and fixed it? I'd love to get some help on this. Thanks in advance.
To solve the error message, replace input_dim by inputs as keras.model() parameter.
For the problem with the loss value becoming NaN after 3 epochs:
first check that there is no NaN in your data but as the first epochs run it should be ok
probably that you are dealing with exploding gradients. Several possible solutions to try:
replace ReLU by tanh
decrease the learning rate
add gradient clipping
add weights regularisation