Search code examples
pythontensorflowkerastime-serieslstm

Python Keras LSTM " Error when checking input"


I am trying to train a LSTM RNN. This is my first time ever attempting this and using Keras. I used a guide to come up with this code.

Here is a sample of the data:

    lat         long        datetime                id  trip_id    mode_cat
0   39.979973   116.305745  2011-08-27 06:13:01     20  1          1
1   39.979957   116.305688  2011-08-27 06:13:02     20  1          1
2   39.979960   116.305693  2011-08-27 06:13:03     20  1          1
3   39.979970   116.305717  2011-08-27 06:13:04     20  1          1
4   39.979985   116.305732  2011-08-27 06:13:05     20  1          1

Where lat, long, datetime, id, trip_id is the input and mode_cat is the output (output is binary).

Here is the part of the code where I create and train the RNN:

def evaluate_model(trainX, trainy, testX, testy):
    verbose, epochs, batch_size = 0, 15, 64
    n_timesteps, n_features, n_outputs = trainX.shape[0], trainX.shape[1], 1
    model = Sequential()
    print("adding LSTM")
    model.add(LSTM(1, input_shape=(n_timesteps,n_features), return_sequences=True))
    print("adding dropout")
    model.add(Dropout(0.5))
    print("adding dense")
    model.add(Dense(100, activation='relu'))
    print("adding dense")
    model.add(Dense(n_outputs, activation='softmax'))
    print("adding compile")
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    print(model.summary())
    # fit network
    model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
    # evaluate model
    _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
    return accuracy

def run_experiment(repeats=10):
    # repeat experiment
    scores = list()
    for r in range(repeats):
        score = evaluate_model(train_x, train_y, test_x, test_y)
        score = score * 100.0
        print('>#%d: %.3f' % (r+1, score))
        scores.append(score)
    # summarize results
    summarize_results(scores)

run_experiment()

I get the following result:

adding LSTM
adding dropout
adding dense
adding dense
adding compile
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lstm_1 (LSTM)                (None, 455414, 1)         28        
_________________________________________________________________
dropout_1 (Dropout)          (None, 455414, 1)         0         
_________________________________________________________________
dense_1 (Dense)              (None, 455414, 100)       200       
_________________________________________________________________
dense_2 (Dense)              (None, 455414, 1)         101       
=================================================================
Total params: 329
Trainable params: 329
Non-trainable params: 0
_________________________________________________________________
None
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-8-5d4acc2b23d8> in <module>()
     31     summarize_results(scores)
     32 
---> 33 run_experiment()

<ipython-input-8-5d4acc2b23d8> in run_experiment(repeats)
     24     scores = list()
     25     for r in range(repeats):
---> 26         score = evaluate_model(train_x, train_y, test_x, test_y)
     27         score = score * 100.0
     28         print('>#%d: %.3f' % (r+1, score))

<ipython-input-8-5d4acc2b23d8> in evaluate_model(trainX, trainy, testX, testy)
     15     print(model.summary())
     16     # fit network
---> 17     model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
     18     # evaluate model
     19     _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)

~\Anaconda3\lib\site-packages\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
   1152             sample_weight=sample_weight,
   1153             class_weight=class_weight,
-> 1154             batch_size=batch_size)
   1155 
   1156         # Prepare validation data.

~\Anaconda3\lib\site-packages\keras\engine\training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)
    577             feed_input_shapes,
    578             check_batch_axis=False,  # Don't enforce the batch size.
--> 579             exception_prefix='input')
    580 
    581         if y is not None:

~\Anaconda3\lib\site-packages\keras\engine\training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
    133                         ': expected ' + names[i] + ' to have ' +
    134                         str(len(shape)) + ' dimensions, but got array '
--> 135                         'with shape ' + str(data_shape))
    136                 if not check_batch_axis:
    137                     data_shape = data_shape[1:]

ValueError: Error when checking input: expected lstm_1_input to have 3 dimensions, but got array with shape (455414, 5)

Please advise!


Solution

  • Feeding a RNN is a bit different than other networks as it works with sequences.

    The problem in your code is the dataset format. RNNs' input should be a 3D tensor with shape [batch, timesteps, feature] 1

    As you have a time-series dataset, you should preprocess your input data with a moving window scheme

    You should check this tutorial about time series forecasting, where they implement this window scheme like this:

    def multivariate_data(dataset, target, start_index, end_index, history_size,
                          target_size, step, single_step=False):
      data = []
      labels = []
    
      start_index = start_index + history_size
      if end_index is None:
        end_index = len(dataset) - target_size
    
      for i in range(start_index, end_index):
        indices = range(i-history_size, i, step)
        data.append(dataset[indices])
    
        if single_step:
          labels.append(target[i+target_size])
        else:
          labels.append(target[i:i+target_size])
    
      return np.array(data), np.array(labels)
    
    past_history = 720
    future_target = 72
    STEP = 6
    
    x_train_single, y_train_single = multivariate_data(dataset, dataset[:, 1], 0,
                                                       TRAIN_SPLIT, past_history,
                                                       future_target, STEP,
                                                       single_step=True)
    x_val_single, y_val_single = multivariate_data(dataset, dataset[:, 1],
                                                   TRAIN_SPLIT, None, past_history,
                                                   future_target, STEP,
                                                   single_step=True)
    

    Here it is an illustration of the moving window procedure (taken from this paper)

    Moving windows procedure

    Your problem is not exactly time series forecasting so you should adapt the code from that tutorial to your problem. It should be something like this:

    def moving_window(dataset, past_history):
        data, labels = [], []
        for i in range(past_history, len(dataset)):
            indices = range(i-past_history, i)
            data.append(dataset[['lat', 'long', 'id', 'trip_id']].values[indices])
            label.append(dataset['mode_cat'][i])
        return np.array(data), np.array(labels)
    
    PAST_HISTORY = 60
    X, y = moving_window(dataset, PAST_HISTORY)
    

    PAST_HISTORY is a hyperparameter that should be tuned.

    Hope it helps! :)