Search code examples
pythondeep-learninglstmforex

LSTM error with date format


This is my first attempt in deep learning, the purpose of this code is to predict the FOREX market direction.

Here is the code:

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential

column_names = ['Date', 'Time', 'Open', 'High', 'Low','Close', 'Volume']

data = pd.read_csv(r"E:\Tutorial\EURUSD60.csv", header=None, names=column_names)

data['DateTime'] = pd.to_datetime(data.Date + ' ' + data.Time)
del data['Date']
del data['Time']

sequence_length = 21
n_features = len(data.columns)
val_ratio = 0.1
n_epochs = 300
batch_size = 512

data = data.as_matrix()
data_processed = []
for index in range(len(data) - sequence_length):
    data_processed.append(data[index: index + sequence_length])
data_processed = np.array(data_processed)

val_split = round((1 - val_ratio) * data_processed.shape[0])
train = data_processed[: int(val_split), :]
val = data_processed[int(val_split):, :]

print('Training data: {}'.format(train.shape))
print('Validation data: {}'.format(val.shape))

train_samples, train_nx, train_ny = train.shape
val_samples, val_nx, val_ny = val.shape

train = train.reshape((train_samples, train_nx * train_ny))
val = val.reshape((val_samples, val_nx * val_ny))

preprocessor = MinMaxScaler().fit(train)
train = preprocessor.transform(train)
val = preprocessor.transform(val)

train = train.reshape((train_samples, train_nx, train_ny))
val = val.reshape((val_samples, val_nx, val_ny))

X_train = train[:, : -1]
y_train = train[:, -1][:, -1]
X_val = val[:, : -1]
y_val = val[:, -1][:, -1]

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], n_features))
X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1], n_features))

model = Sequential()
model.add(LSTM(input_shape=(X_train.shape[1:]), units=128, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.25))
model.add(Dense(units=1))
model.add(Activation("linear"))

model.compile(loss="mse", optimizer="adam")

history = model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=n_epochs,
    verbose=2)

preds_val = model.predict(X_val)
diff = []
for i in range(len(y_val)):
    pred = preds_val[i][0]
    diff.append(y_val[i] - pred)

real_min = preprocessor.data_min_[104]
real_max = preprocessor.data_max_[104]
print(preprocessor.data_min_[104])
print(preprocessor.data_max_[104])

preds_real = preds_val * (real_max - real_min) + real_min
y_val_real = y_val * (real_max - real_min) + real_min

plt.plot(preds_real, label='Predictions')
plt.plot(y_val_real, label='Actual values')
plt.xlabel('test')
plt.legend(loc=0)
plt.show()

Here is the error:

Using TensorFlow backend. 2017-12-03 13:26:44.494199: W
C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\36\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but
these are available on your machine and could speed up CPU
computations. 2017-12-03 13:26:44.494660: W
C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\36\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but
these are available on your machine and could speed up CPU
computations. Training data: (1824, 21, 6) Validation data: (203, 21,
6) Traceback (most recent call last): File "E:/Tutorial/Deep
Learning.py", line 42, in preprocessor = MinMaxScaler().fit(train) File "C:\Users\sydgo\Anaconda3\lib\site-packages\sklearn\preprocessing\data.py",
line 308, in fit
return self.partial_fit(X, y) File "C:\Users\sydgo\Anaconda3\lib\site-packages\sklearn\preprocessing\data.py",
line 334, in partial_fit
estimator=self, dtype=FLOAT_DTYPES) File "C:\Users\sydgo\Anaconda3\lib\site-packages\sklearn\utils\validation.py",
line 433, in check_array array = np.array(array, dtype=dtype, order=order, copy=copy) TypeError: float() argument must be a string or a number, not
'Timestamp'


Solution

  • This is the code after fixing the error

    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    from sklearn.preprocessing import MinMaxScaler
    from keras.layers.core import Dense, Activation, Dropout
    from keras.layers.recurrent import LSTM
    from keras.models import Sequential
    
    column_names = ['Date', 'Time', 'Open', 'High', 'Low','Close', 'Volume']
    
    df = pd.read_csv(r"E:\Tutorial\EURUSD60.csv", header=None, names=column_names)
    
    df['DateTime'] = pd.to_datetime(df.Date + ' ' + df.Time)
    del df['Date']
    del df['Time']
    
    df.rename(columns={'DateTime': 'timestamp', 'Open': 'open',
                       'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume'}, inplace=True)
    df['timestamp'] = pd.to_datetime(df['timestamp'], infer_datetime_format=True)
    df.set_index('timestamp', inplace=True)
    df = df.astype(float)
    df['hour'] = df.index.hour
    df['day'] = df.index.weekday
    df['week'] = df.index.week
    
    
    sequence_length = 21
    n_features = len(df.columns)
    val_ratio = 0.1
    n_epochs = 300
    batch_size = 512
    
    data = df.as_matrix()
    data_processed = []
    for index in range(len(data) - sequence_length):
        data_processed.append(data[index: index + sequence_length])
    data_processed = np.array(data_processed)
    
    val_split = round((1 - val_ratio) * data_processed.shape[0])
    train = data_processed[: int(val_split), :]
    val = data_processed[int(val_split):, :]
    
    print('Training data: {}'.format(train.shape))
    print('Validation data: {}'.format(val.shape))
    
    train_samples, train_nx, train_ny = train.shape
    val_samples, val_nx, val_ny = val.shape
    
    train = train.reshape((train_samples, train_nx * train_ny))
    val = val.reshape((val_samples, val_nx * val_ny))
    
    preprocessor = MinMaxScaler().fit(train)
    train = preprocessor.transform(train)
    val = preprocessor.transform(val)
    
    train = train.reshape((train_samples, train_nx, train_ny))
    val = val.reshape((val_samples, val_nx, val_ny))
    
    X_train = train[:, : -1]
    y_train = train[:, -1][:, -1]
    X_val = val[:, : -1]
    y_val = val[:, -1][:, -1]
    
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], n_features))
    X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1], n_features))
    
    model = Sequential()
    model.add(LSTM(input_shape=(X_train.shape[1:]), units=128, return_sequences=True))
    model.add(Dropout(0.5))
    model.add(LSTM(128, return_sequences=False))
    model.add(Dropout(0.25))
    model.add(Dense(units=1))
    model.add(Activation("linear"))
    
    model.compile(loss="mse", optimizer="adam")
    
    history = model.fit(
        X_train,
        y_train,
        batch_size=batch_size,
        epochs=n_epochs,
        verbose=2)
    
    preds_val = model.predict(X_val)
    diff = []
    for i in range(len(y_val)):
        pred = preds_val[i][0]
        diff.append(y_val[i] - pred)
    
    real_min = preprocessor.data_min_[104]
    real_max = preprocessor.data_max_[104]
    print(preprocessor.data_min_[:120])
    print(preprocessor.data_max_[:120])
    
    preds_real = preds_val * (real_max - real_min) + real_min
    y_val_real = y_val * (real_max - real_min) + real_min
    
    plt.plot(preds_real, label='Predictions')
    plt.plot(y_val_real, label='Actual values')
    plt.xlabel('test')
    plt.legend(loc=0)
    plt.show()