I am trying to train a LSTM RNN. This is my first time ever attempting this and using Keras. I used a guide to come up with this code.
Here is a sample of the data:
lat long datetime id trip_id mode_cat
0 39.979973 116.305745 2011-08-27 06:13:01 20 1 1
1 39.979957 116.305688 2011-08-27 06:13:02 20 1 1
2 39.979960 116.305693 2011-08-27 06:13:03 20 1 1
3 39.979970 116.305717 2011-08-27 06:13:04 20 1 1
4 39.979985 116.305732 2011-08-27 06:13:05 20 1 1
Where lat, long, datetime, id, trip_id is the input and mode_cat is the output (output is binary).
Here is the part of the code where I create and train the RNN:
def evaluate_model(trainX, trainy, testX, testy):
verbose, epochs, batch_size = 0, 15, 64
n_timesteps, n_features, n_outputs = trainX.shape[0], trainX.shape[1], 1
model = Sequential()
print("adding LSTM")
model.add(LSTM(1, input_shape=(n_timesteps,n_features), return_sequences=True))
print("adding dropout")
model.add(Dropout(0.5))
print("adding dense")
model.add(Dense(100, activation='relu'))
print("adding dense")
model.add(Dense(n_outputs, activation='softmax'))
print("adding compile")
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
# fit network
model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
# evaluate model
_, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
return accuracy
def run_experiment(repeats=10):
# repeat experiment
scores = list()
for r in range(repeats):
score = evaluate_model(train_x, train_y, test_x, test_y)
score = score * 100.0
print('>#%d: %.3f' % (r+1, score))
scores.append(score)
# summarize results
summarize_results(scores)
run_experiment()
I get the following result:
adding LSTM
adding dropout
adding dense
adding dense
adding compile
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm_1 (LSTM) (None, 455414, 1) 28
_________________________________________________________________
dropout_1 (Dropout) (None, 455414, 1) 0
_________________________________________________________________
dense_1 (Dense) (None, 455414, 100) 200
_________________________________________________________________
dense_2 (Dense) (None, 455414, 1) 101
=================================================================
Total params: 329
Trainable params: 329
Non-trainable params: 0
_________________________________________________________________
None
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-8-5d4acc2b23d8> in <module>()
31 summarize_results(scores)
32
---> 33 run_experiment()
<ipython-input-8-5d4acc2b23d8> in run_experiment(repeats)
24 scores = list()
25 for r in range(repeats):
---> 26 score = evaluate_model(train_x, train_y, test_x, test_y)
27 score = score * 100.0
28 print('>#%d: %.3f' % (r+1, score))
<ipython-input-8-5d4acc2b23d8> in evaluate_model(trainX, trainy, testX, testy)
15 print(model.summary())
16 # fit network
---> 17 model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
18 # evaluate model
19 _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
~\Anaconda3\lib\site-packages\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
1152 sample_weight=sample_weight,
1153 class_weight=class_weight,
-> 1154 batch_size=batch_size)
1155
1156 # Prepare validation data.
~\Anaconda3\lib\site-packages\keras\engine\training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)
577 feed_input_shapes,
578 check_batch_axis=False, # Don't enforce the batch size.
--> 579 exception_prefix='input')
580
581 if y is not None:
~\Anaconda3\lib\site-packages\keras\engine\training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
133 ': expected ' + names[i] + ' to have ' +
134 str(len(shape)) + ' dimensions, but got array '
--> 135 'with shape ' + str(data_shape))
136 if not check_batch_axis:
137 data_shape = data_shape[1:]
ValueError: Error when checking input: expected lstm_1_input to have 3 dimensions, but got array with shape (455414, 5)
Please advise!
Feeding a RNN is a bit different than other networks as it works with sequences.
The problem in your code is the dataset format. RNNs' input should be a 3D tensor with shape [batch, timesteps, feature]
1
As you have a time-series dataset, you should preprocess your input data with a moving window scheme
You should check this tutorial about time series forecasting, where they implement this window scheme like this:
def multivariate_data(dataset, target, start_index, end_index, history_size,
target_size, step, single_step=False):
data = []
labels = []
start_index = start_index + history_size
if end_index is None:
end_index = len(dataset) - target_size
for i in range(start_index, end_index):
indices = range(i-history_size, i, step)
data.append(dataset[indices])
if single_step:
labels.append(target[i+target_size])
else:
labels.append(target[i:i+target_size])
return np.array(data), np.array(labels)
past_history = 720
future_target = 72
STEP = 6
x_train_single, y_train_single = multivariate_data(dataset, dataset[:, 1], 0,
TRAIN_SPLIT, past_history,
future_target, STEP,
single_step=True)
x_val_single, y_val_single = multivariate_data(dataset, dataset[:, 1],
TRAIN_SPLIT, None, past_history,
future_target, STEP,
single_step=True)
Here it is an illustration of the moving window procedure (taken from this paper)
Your problem is not exactly time series forecasting so you should adapt the code from that tutorial to your problem. It should be something like this:
def moving_window(dataset, past_history):
data, labels = [], []
for i in range(past_history, len(dataset)):
indices = range(i-past_history, i)
data.append(dataset[['lat', 'long', 'id', 'trip_id']].values[indices])
label.append(dataset['mode_cat'][i])
return np.array(data), np.array(labels)
PAST_HISTORY = 60
X, y = moving_window(dataset, PAST_HISTORY)
PAST_HISTORY is a hyperparameter that should be tuned.
Hope it helps! :)