After getting help on so many thing I'm here for a last time for my last problem that I can't find a solution.
Following my previous question an user pointed the fact that my bad results on my time series prediction may be because of my architecture not converging.
After looking at it and tried some fixes I've found on other questions (set weight, lower learning rate, change optimizer/activation) I can't seem to get better results, always getting an accuracy at 0 (or 0.0003, which isn't good enough).
My code:
import numpy
import numpy as np
import tflearn
from pandas import DataFrame
from pandas import Series
from pandas import concat
from pandas import read_csv
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from math import sqrt
import datetime
# Preprocessing function
from tflearn import Accuracy, Momentum
def preprocess(data):
return np.array(data, dtype=np.int32)
def parser(x):
return datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
# frame a sequence as a supervised learning problem
def timeseries_to_supervised(data, lag=1):
df = DataFrame(data)
columns = [df.shift(i) for i in range(1, lag + 1)]
columns.append(df)
df = concat(columns, axis=1)
df.fillna(0, inplace=True)
return df
def difference(dataset, interval=1):
diff = list()
for i in range(interval, len(dataset)):
value = dataset[i] - dataset[i - interval]
diff.append(value)
return Series(diff)
# invert differenced value
def inverse_difference(history, yhat, interval=1):
return yhat + history[-interval]
# scale train and test data to [-1, 1]
def scale(train, test):
# fit scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(train)
# transform train
train = train.reshape(train.shape[0], train.shape[1])
train_scaled = scaler.transform(train)
# transform test
test = test.reshape(test.shape[0], test.shape[1])
test_scaled = scaler.transform(test)
return scaler, train_scaled, test_scaled
# inverse scaling for a forecasted value
def invert_scale(scaler, X, value):
new_row = [x for x in X] + [value]
array = numpy.array(new_row)
array = array.reshape(1, len(array))
inverted = scaler.inverse_transform(array)
return inverted[0, -1]
def fit_lstm(train, batch_size, nb_epoch, neurons):
X, y = train[0:-1], train[:, -1]
X = X[:, 0].reshape(len(X), 1, 1)
y = y.reshape(len(y), 1)
print (X.shape)
print (y.shape)
# Build neural network
net = tflearn.input_data(shape=[None, 1, 1])
tnorm = tflearn.initializations.uniform(minval=-1.0, maxval=1.0)
net = tflearn.dropout(net, 0.8)
net = tflearn.fully_connected(net, 1, activation='linear', weights_init=tnorm)
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
loss='mean_square')
# Define model
model = tflearn.DNN(net, tensorboard_verbose=3, best_val_accuracy=0.6)
model.fit(X, y, n_epoch=nb_epoch, batch_size=batch_size, shuffle=False, show_metric=True)
score = model.evaluate(X, y, batch_size=128)
print (score)
return model
# make a one-step forecast
def forecast_lstm(model, X):
X = X.reshape(len(X), 1, 1)
yhat = model.predict(X)
return yhat[0, 0]
# Load CSV file, indicate that the first column represents labels
data = read_csv('nowcastScaled.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
# transform data to be stationary
raw_values = data.values
diff_values = difference(raw_values, 1)
# transform data to be supervised learning
supervised = timeseries_to_supervised(diff_values, 1)
supervised_values = supervised.values
# split data into train and test-sets
train, test = supervised_values[0:10000], supervised_values[10000:10100]
# transform the scale of the data
scaler, train_scaled, test_scaled = scale(train, test)
repeats = 1
for r in range(repeats):
# fit the model
lstm_model = fit_lstm(train_scaled, 128, 6, 1)
# forecast the entire training dataset to build up state for forecasting
train_reshaped = train_scaled[:, 0].reshape(len(train_scaled), 1, 1)
print (lstm_model.predict(train_reshaped))
# walk-forward validation on the test data
predictions = list()
error_scores = list()
for i in range(len(test_scaled)):
# make one-step forecast
X, y = test_scaled[i, 0:-1], test_scaled[i, -1]
yhat = forecast_lstm(lstm_model, X)
# invert scaling
yhat = invert_scale(scaler, X, yhat)
# # invert differencing
yhat = inverse_difference(raw_values, yhat, len(test_scaled) + 1 - i)
# store forecast
predictions.append(yhat)
rmse = sqrt(mean_squared_error(raw_values[10000:10100], predictions))
print('%d) Test RMSE: %.3f' % (1, rmse))
error_scores.append(rmse)
print predictions
print raw_values[10000:10100]
Here is the result I get from running it (raising epoch doesn't seem to make it better):
Training Step: 472 | total loss: 0.00486 | time: 0.421s
| Adam | epoch: 006 | loss: 0.00486 - binary_acc: 0.0000 -- iter: 9856/9999
Training Step: 473 | total loss: 0.00453 | time: 0.427s
| Adam | epoch: 006 | loss: 0.00453 - binary_acc: 0.0000 -- iter: 9984/9999
Training Step: 474 | total loss: 0.00423 | time: 0.430s
| Adam | epoch: 006 | loss: 0.00423 - binary_acc: 0.0000 -- iter: 9999/9999
I've tried to lower/raise most of the setting but nothing.
Here is an extract of the data I'm using (univariate time series), using more or less data in training didn't do anything either.
(Ps: My code is mostly from this tutorial, I had to change a bit it since I wanted to try using Tflearn)
You cant define accuracy
for a regression problem. You just track the MSE of the predicted and the actual. Your training loss seems to be low, so if the prediction is not close, then either your scaling inverse is not right or your overfitting.