Search code examples
pythontensorflowmachine-learningkerasvalueerror

ValueError while trying to run the Sequential Model from Keras


I'm trying to build a NARX NN with Keras. I'm still not 100% sure on the use of the argument return_sequence=True in the LSTM neurons but, before I can check that, I need to make the code work. When I try to run it I get the following message:

ValueError: Error when checking input: expected lstm_84_input to have 3 dimensions, but got array with shape (6686, 3)

See my code below. The error is raised while running the model.fit command. My data data is of the shape 40101 time steps x 6 features (3 exogenous inputs, 3 system responses).

import numpy as np
import pandas as pd
from sklearn.model_selection import TimeSeriesSplit
import tensorflow as tf
from tensorflow.keras import initializers

# --- main
data = pd.read_excel('example.xlsx',usecols=['wave','wind','current','X','Y','RZ'])
data.plot(subplots=True, figsize=[15,10])

x_data = np.array(data.loc[:,['wave','wind','current']])
y_data = np.array(data.loc[:,['X','Y','RZ']])

timeSeriesCrossValidation = TimeSeriesSplit(n_splits=5)
for train, validation in timeSeriesCrossValidation.split(x_data, y_data):  

    # create model
    model = tf.keras.models.Sequential()
    
    # input layer
    model.add(tf.keras.layers.LSTM(units=50,
                                   input_shape=(40101,3),
                                   dropout=0.01,
                                   recurrent_dropout=0.2,
                                   kernel_initializer=initializers.RandomNormal(mean=0,stddev=.5),
                                   bias_initializer=initializers.Zeros(),
                                   return_sequences = True))
    
    # 1st hidden layer
    model.add(tf.keras.layers.LSTM(units=50,
                                   dropout=0.01,
                                   recurrent_dropout=0.2,
                                   kernel_initializer=initializers.RandomNormal(mean=0,stddev=.5),
                                   bias_initializer=initializers.Zeros(),
                                   return_sequences = True))
    
    # 2nd hidder layer
    model.add(tf.keras.layers.LSTM(units=50,
                                   dropout=0.01,
                                   recurrent_dropout=0.2,
                                   kernel_initializer=initializers.RandomNormal(mean=0,stddev=.5),
                                   bias_initializer=initializers.Zeros(),
                                   return_sequences = False))
    
    # output layer
    model.add(tf.keras.layers.Dense(3))
    
    model.compile(loss='mse',optimizer='nadam',metrics=['accuracy'])

    model.fit(x_data[train], y_data[train],
              verbose=2,
              batch_size=None,
              epochs=10,
              validation_data=(x_data[validation], y_data[validation])
              #callbacks=early_stop
              )
    
    prediction = model.predict(x_data[validation])
    y_validation = y_data[validation]

Solution

  • LSTM layers need input in 3 dimensions:

    (n_samples, time_steps, features)
    

    You passed data with this format:

    (n_samples, features)
    

    Since you don't have a function to create time steps, the easiest solution would be to change your input to shape:

    (40101, 1, 3)
    

    Bogus data:

    x_data = np.random.rand(40101, 1, 3)
    y_data = np.random.rand(40101, 3)
    

    Also, you shouldn't pass the number of samples in the input_shape argument of a Keras layer. Just use this:

    input_shape=(1, 3)
    

    So here is the corrected code (with bogus data):

    import numpy as np
    from sklearn.model_selection import TimeSeriesSplit
    import tensorflow as tf
    from tensorflow.keras import initializers
    from tensorflow.keras.layers import *
    
    x_data = np.random.rand(40101, 1, 3)
    y_data = np.random.rand(40101, 3)
    
    timeSeriesCrossValidation = TimeSeriesSplit(n_splits=5)
    for train, validation in timeSeriesCrossValidation.split(x_data, y_data):
        # create model
        model = tf.keras.models.Sequential()
    
        # input layer
        model.add(LSTM(units=5,
                       input_shape=(1, 3),
                       dropout=0.01,
                       recurrent_dropout=0.2,
                       kernel_initializer=initializers.RandomNormal(mean=0, stddev=.5),
                       bias_initializer=initializers.Zeros(),
                       return_sequences=True))
    
        # 1st hidden layer
        model.add(LSTM(units=5,
                       dropout=0.01,
                       recurrent_dropout=0.2,
                       kernel_initializer=initializers.RandomNormal(mean=0, stddev=.5),
                       bias_initializer=initializers.Zeros(),
                       return_sequences=True))
    
        # 2nd hidder layer
        model.add(LSTM(units=50,
                       dropout=0.01,
                       recurrent_dropout=0.2,
                       kernel_initializer=initializers.RandomNormal(mean=0, stddev=.5),
                       bias_initializer=initializers.Zeros(),
                       return_sequences=False))
    
        # output layer
        model.add(tf.keras.layers.Dense(3))
    
        model.compile(loss='mse', optimizer='nadam', metrics=['accuracy'])
    
        model.fit(x_data[train], y_data[train],
                  verbose=2,
                  batch_size=None,
                  epochs=1,
                  validation_data=(x_data[validation], y_data[validation])
                  # callbacks=early_stop
                  )
    
        prediction = model.predict(x_data[validation])
        y_validation = y_data[validation]
    

    If you want a function to create time steps, use this:

    def multivariate_data(dataset, target, start_index, end_index, history_size,
                          target_size, step, single_step=False):
      data = []
      labels = []
    
      start_index = start_index + history_size
      if end_index is None:
        end_index = len(dataset) - target_size
    
      for i in range(start_index, end_index):
        indices = range(i-history_size, i, step)
        data.append(dataset[indices])
    
        if single_step:
          labels.append(target[i+target_size])
        else:
          labels.append(target[i:i+target_size])
    
      return np.array(data), np.array(labels)
    

    It will give you the right shape, e.g.:

    multivariate_data(dataset=np.random.rand(40101, 3), 
                      target=np.random.rand(40101, 3), 
                      0, len(x_data), 5, 0, 1, True)[0].shape
    
    (40096, 5, 3)
    

    You lost 5 data points because at the beginning you can't look 5 steps back in the past.