Search code examples
pythontensorflowkeraslstmrecurrent-neural-network

How to shape the input of a RNN with multiple features for each target?


I'm trying to learn how to use RNN for time-series predictions and in all the examples I'm seeing out there they use a sequence of prices to predict the following price. In the examples each target (Y_train[n]) is associated to a sequence or matrix composed of the last 30 prices/steps ([X_train[[n-1],[n-2]....,[n-30]).

However in the real world to accurately predict you need more than the sequence of the last 30 prices, you would also need other... should I say features? Like the last 30 values of volume or the last 30 values of a sentiment index.

So my question is: How do you shape the input of an RNN with two sequences for each target (last 30 prices and last 30 volume values)? This is the example code I'm using with only 1 sequence to use as reference:

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

# Dividing Dataset (Test and Train)
train_lim = int(len(df) * 2 / 3)
training_set = df[:train_lim][['Close']]
test_set = df[train_lim:][['Close']]

# Normalizing
sc = MinMaxScaler(feature_range=(0, 1))
training_set_scaled = sc.fit_transform(training_set)

# Shaping Input
X_train = []
y_train = []
X_test = []

for i in range(30, training_set_scaled.size):
    X_train.append(training_set_scaled[i - 30:i, 0])
    y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)

for i in range(30, len(test_set)):
    X_test.append(test_set.iloc[i - 30:i, 0])
X_test = np.array(X_test)

# Adding extra dimension ???
X_train = np.reshape(X_train, [X_train.shape[0], X_train.shape[1], 1])
X_test = np.reshape(X_test, [X_test.shape[0], X_test.shape[1], 1])

regressor = Sequential()

# LSTM layer 1
regressor.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
regressor.add(Dropout(0.2))

# LSTM layer 2,3,4
regressor.add(LSTM(units=50, return_sequences=True))
regressor.add(Dropout(0.2))
regressor.add(LSTM(units=50, return_sequences=True))
regressor.add(Dropout(0.2))
regressor.add(LSTM(units=50, return_sequences=True))
regressor.add(Dropout(0.2))
regressor.add(LSTM(units=50, return_sequences=True))
regressor.add(Dropout(0.2))

# LSTM layer 5
regressor.add(LSTM(units=50))
regressor.add(Dropout(0.2))

# Fully connected layer
regressor.add(Dense(units=1))

# Compiling the RNN
regressor.compile(optimizer='adam', loss='mean_squared_error')

# Fitting the RNN model
regressor.fit(X_train, y_train, epochs=120, batch_size=32)

The dataframe that I'm using is a standard OHLCV with a datetime index so it will look like this:

Datetime     Open         High      Low      Close     Volume
01/01/2021   102.42     103.33     100.57    101.23      1990
02/01/2021   101.23     105.22     99.45     100.11      1970
   ...         ...        ...       ...        ...        ...
 
01/12/2021   203.22     210.34     199.22    201.11      2600

Solution

  • You can follow exactly the same process, the only difference is that the length of the last dimension of the arrays with the input sequences (X_train and X_test) will be greater than one (as it will be equal to the number of external regressors plus one, where the plus one comes from the fact that the past values of the target are also used as an input).

    import pandas as pd
    import numpy as np
    import yfinance as yf
    from sklearn.preprocessing import MinMaxScaler
    from keras.models import Sequential
    from keras.layers import Dense, LSTM, Dropout
    pd.options.mode.chained_assignment = None
    
    # define the target and features
    target = ['Close']
    features = ['Volume', 'High', 'Low']
    
    # download the data
    df = yf.download(tickers=['AAPL'], period='1y')
    df = df[features + target]
    
    # split the data
    split = int(df.shape[0] * 2 / 3)
    df_train = df.iloc[:split, :].copy()
    df_test = df.iloc[split:, :].copy()
    
    # scale the data
    target_scaler = MinMaxScaler().fit(df_train[target])
    df_train[target] = target_scaler.transform(df_train[target])
    df_test[target] = target_scaler.transform(df_test[target])
    
    features_scaler = MinMaxScaler().fit(df_train[features])
    df_train[features] = features_scaler.transform(df_train[features])
    df_test[features] = features_scaler.transform(df_test[features])
    
    # extract the input sequences and output values
    sequence_length = 30
    
    X_train, y_train = [], []
    
    for i in range(sequence_length, df_train.shape[0]):
        X_train.append(df_train[features + target].iloc[i - sequence_length: i])
        y_train.append(df_train[target].iloc[i])
    
    X_train, y_train = np.array(X_train), np.array(y_train)
    
    X_test, y_test = [], []
    
    for i in range(sequence_length, df_test.shape[0]):
        X_test.append(df_test[features + target].iloc[i - sequence_length: i])
        y_test.append(df_test[target].iloc[i])
    
    X_test, y_test = np.array(X_test), np.array(y_test)
    
    print(X_train.shape)
    # (138, 30, 4)
    
    print(X_test.shape)
    # (55, 30, 4)
    
    # build and train the model
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=X_train.shape[1:]))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    model.fit(X_train, y_train, epochs=120, batch_size=32)
    
    model.evaluate(X_test, y_test)
    
    # generate the test set predictions
    y_pred = model.predict(X_test)
    y_pred = target_scaler.inverse_transform(y_pred)
    
    # plot the test set predictions
    df['Predicted Close'] = np.nan
    df['Predicted Close'].iloc[- y_pred.shape[0]:] = y_pred.flatten()
    df[['Close', 'Predicted Close']].plot()