I have this snipped dataframe:
y x1 x2
0 0.198382 15 1.84227
1 0.195289 16 1.88341
2 0.195089 16 1.92455
3 0.201794 16 1.96569
4 0.208498 16 2.00683
I would like to use X1 and X2 to predict y using Keras RNN LSTM model. Each row is a sample from a specific day and I eventually would like to forecast on a new test set that has 251 days. I created train and test sets:
y_col = 'y'
train_size = len(df3)-251
train, test = df3.iloc[0:train_size].copy(), df3.iloc[train_size:len(df)].copy()
X_train = train.drop(y_col,axis=1)
X_test = test.drop(y_col,axis=1)
y_train = train[y_col]
y_test = test[y_col]
print(len(train), len(test))
31877 251
This is how the entire data looks like:
And zooming in:
I then normalized the data:
Xscaler = MinMaxScaler(feature_range=(0, 1))
scaled_X_train = Xscaler.transform(X_train)
scaled_X_test = Xscaler.transform(X_test)
Then modified the shape of my 'y' according to this great blog:
y_train = np.array(y_train)
y_train = np.insert(y_train, 0, 0)
y_train = np.delete(y_train, -1)
Then build and trained the model:
n_input = 20
generator = TimeseriesGenerator(scaled_X_train, y_train, length=n_input, batch_size=32)
model = Sequential()
model.add(LSTM(150, activation='relu', input_shape=(n_input, n_features)))
model.compile(optimizer='adam', loss='mse')
loss_per_epoch = model.history.history['loss']
So far so good. Now I'm trying to make predictions on the test set and I get an error, that relates to the shape of the X_test:
ValueError: Error when checking input: expected lstm_6_input to have 3 dimensions, but got array with shape (251, 2)
I thought that the train and test sets should have the same dimensions, in my example:
(31877, 2)
(251, 2)
I understand that I should change my test set to be 3D but where should I insert the third D and what should be its values?
UPDATE: Trying to implement @Marco Cerliani solution I get an error with the test generator:
n_input = 20
generator = TimeseriesGenerator(scaled_X_train, y_train, length=n_input, batch_size=32)
test_generator = TimeseriesGenerator(scaled_X_test, y_test, length=n_input, batch_size=32)
(32, 20, 2)
KeyError Traceback (most recent call last)
<ipython-input-55-6feb1cf23e96> in <module>
----> 1 test_generator[0][0].shape
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\site-packages\keras_preprocessing\sequence.py in __getitem__(self, index)
370 samples = np.array([self.data[row - self.length:row:self.sampling_rate]
371 for row in rows])
--> 372 targets = np.array([self.targets[row] for row in rows])
374 if self.reverse:
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\site-packages\keras_preprocessing\sequence.py in <listcomp>(.0)
370 samples = np.array([self.data[row - self.length:row:self.sampling_rate]
371 for row in rows])
--> 372 targets = np.array([self.targets[row] for row in rows])
374 if self.reverse:
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
869 key = com.apply_if_callable(key, self)
870 try:
--> 871 result = self.index.get_value(self, key)
873 if not is_scalar(result):
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\site-packages\pandas\core\indexes\base.py in get_value(self, series, key)
4403 k = self._convert_scalar_indexer(k, kind="getitem")
4404 try:
-> 4405 return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
4406 except KeyError as e1:
4407 if len(self) > 0 and (self.holds_integer() or self.is_boolean()):
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 20
model = Sequential()
model.add(LSTM(150, activation='relu', input_shape=(n_input, n_features)))
model.compile(optimizer='adam', loss='mse')
KeyError Traceback (most recent call last)
<ipython-input-31-81ef70218432> in <module>
4 model.compile(optimizer='adam', loss='mse')
5 model.fit_generator(generator,epochs=3)
----> 6 model.predict(test_generator)
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\site-packages\keras\engine\training.py in predict(self, x, batch_size, verbose, steps, callbacks, max_queue_size, workers, use_multiprocessing)
1431 max_queue_size=max_queue_size,
1432 workers=workers,
-> 1433 use_multiprocessing=use_multiprocessing)
1435 if x is None and steps is None:
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\site-packages\keras\legacy\interfaces.py in wrapper(*args, **kwargs)
89 warnings.warn('Update your `' + object_name + '` call to the ' +
90 'Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(*args, **kwargs)
92 wrapper._original_function = func
93 return wrapper
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\site-packages\keras\engine\training.py in predict_generator(self, generator, steps, callbacks, max_queue_size, workers, use_multiprocessing, verbose)
1844 workers=workers,
1845 use_multiprocessing=use_multiprocessing,
-> 1846 verbose=verbose)
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\site-packages\keras\engine\training_generator.py in predict_generator(model, generator, steps, callbacks, max_queue_size, workers, use_multiprocessing, verbose)
490 while steps_done < steps:
--> 491 generator_output = next(output_generator)
492 if isinstance(generator_output, tuple):
493 # Compatibility with the generators
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\site-packages\keras\utils\data_utils.py in get(self)
623 except Exception:
624 self.stop()
--> 625 six.reraise(*sys.exc_info())
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\site-packages\six.py in reraise(tp, value, tb)
701 if value.__traceback__ is not tb:
702 raise value.with_traceback(tb)
--> 703 raise value
704 finally:
705 value = None
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\site-packages\keras\utils\data_utils.py in get(self)
608 try:
609 future = self.queue.get(block=True)
--> 610 inputs = future.get(timeout=30)
611 except mp.TimeoutError:
612 idx = future.idx
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\multiprocessing\pool.py in get(self, timeout)
655 return self._value
656 else:
--> 657 raise self._value
659 def _set(self, i, obj):
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\multiprocessing\pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
119 job, i, func, args, kwds = task
120 try:
--> 121 result = (True, func(*args, **kwds))
122 except Exception as e:
123 if wrap_exception and func is not _helper_reraises_exception:
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\site-packages\keras\utils\data_utils.py in get_index(uid, i)
404 The value at index `i`.
405 """
--> 406 return _SHARED_SEQUENCES[uid][i]
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\site-packages\keras_preprocessing\sequence.py in __getitem__(self, index)
370 samples = np.array([self.data[row - self.length:row:self.sampling_rate]
371 for row in rows])
--> 372 targets = np.array([self.targets[row] for row in rows])
374 if self.reverse:
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\site-packages\keras_preprocessing\sequence.py in <listcomp>(.0)
370 samples = np.array([self.data[row - self.length:row:self.sampling_rate]
371 for row in rows])
--> 372 targets = np.array([self.targets[row] for row in rows])
374 if self.reverse:
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
869 key = com.apply_if_callable(key, self)
870 try:
--> 871 result = self.index.get_value(self, key)
873 if not is_scalar(result):
~\AppData\Local\Continuum\anaconda3\envs\keras\lib\site-packages\pandas\core\indexes\base.py in get_value(self, series, key)
4403 k = self._convert_scalar_indexer(k, kind="getitem")
4404 try:
-> 4405 return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
4406 except KeyError as e1:
4407 if len(self) > 0 and (self.holds_integer() or self.is_boolean()):
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 20
Thanks to Marco Cerliani great help, I found the mistake which was that my y_test was a pandas series, so I converted it to a numpy array np.y_test = array(y_test)
and it worked
you can always initialize a generator for test predictions...
generator_test = TimeseriesGenerator(scaled_X_test, y_test, length=n_input, batch_size=32)
complete dummy example
n_sample_train = 100
n_sample_test = 30
n_input = 5
n_features = 2
X_train = np.random.uniform(0,1, (n_sample_train,n_features))
X_test = np.random.uniform(0,1, (n_sample_test,n_features))
y_train = np.random.uniform(0,1, n_sample_train)
y_test = np.random.uniform(0,1, n_sample_test)
generator_train = tf.keras.preprocessing.sequence.TimeseriesGenerator(X_train, y_train,
length=n_input, batch_size=8)
generator_test = tf.keras.preprocessing.sequence.TimeseriesGenerator(X_test, y_test,
length=n_input, batch_size=8)
generator_test_zeros = tf.keras.preprocessing.sequence.TimeseriesGenerator(X_test, np.zeros(len(X_test)),
length=n_input, batch_size=8)
model = Sequential()
model.add(LSTM(100, activation='relu', input_shape=(n_input, n_features)))
model.compile(optimizer='adam', loss='mse')
model.fit(generator_train, steps_per_epoch=3, epochs=3, verbose=1)
yhat_test = model.predict(generator_test, verbose=0)
yhat_test_zeros = model.predict(generator_test_zeros, verbose=0)
# check if they are identical
all(yhat_test == yhat_test_zeros) # TRUE