python python-2.7 machine-learning lstm tflearn

TFLearn time series forecasting prediction

After defining my neural network and training my model:

net = tflearn.input_data(shape=[None, 1, 1])
tnorm = tflearn.initializations.uniform(minval=-1.0, maxval=1.0)
net = tflearn.lstm(net, timesteps, dropout=0.8)

net = tflearn.fully_connected(net, 1, activation='linear', weights_init=tnorm)
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
                             loss='mean_square', metric='R2')
# Define model
model = tflearn.DNN(net, clip_gradients=0.)
model.fit(X, y, n_epoch=nb_epoch, batch_size=batch_size, shuffle=False, show_metric=True)
score = model.evaluate(X, y, batch_size=128)
model.save('ModeSpot.tflearn')

I'm now having a problem, most of the tutorial I've found doing time series forecasting used a testing set to predict (they gave their testing set to .predict()). The problem is that in reality we don't know this since this is what we want to predict.

For now I'm using that:

def forecast_lstm(model, X):
    X = X.reshape(len(X), 1, 1)
    yhat = model.predict(X)
    return yhat[0, 0]

# split data into train and test-sets
    train, test = supervised_values[0:-10000], supervised_values[-10000:]

    # transform the scale of the data
    scaler, train_scaled, test_scaled = scale(train, test)

    # Build neural network
    net = tflearn.input_data(shape=[None, 1, 1])
    tnorm = tflearn.initializations.uniform(minval=-1.0, maxval=1.0)
    net = tflearn.lstm(net, 1, dropout=0.3)
    net = tflearn.fully_connected(net, 1, activation='linear', weights_init=tnorm)
    net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
                             loss='mean_square', metric='R2')
    lstm_model = tflearn.DNN(net, clip_gradients=0.)
    lstm_model.load('ModeSpot.tflearn')

    # forecast the entire training dataset to build up state for forecasting
    train_reshaped = train_scaled[:, 0].reshape(len(train_scaled), 1, 1)
    lstm_model.predict(train_reshaped)
    # walk-forward validation on the test data
    predictions = list()
    error_scores = list()
    for i in range(len(test_scaled)):
        # make one-step forecast
        X, y = test_scaled[i, 0:-1], test_scaled[i, -1]
        yhat = forecast_lstm(lstm_model, X)
        # invert scaling
        yhat2 = invert_scale(scaler, X, yhat)
        # # invert differencing
        yhat3 = inverse_difference(raw_values, yhat2, len(test_scaled) + 1 - i)
        # store forecast
        predictions.append(yhat3)

But it only work for my testing set. How do I do to predict the next x values? I think I've seen somewhere that to predict value at T I would have to use the value at T-1 for the predict (then T for T+1 and like that until I reach the number of prediction I wanted). Is that a good way to go?

I've tried to do that:

def forecast_lstm2(model, X):
    X = X.reshape(-1, 1, 1)
    yhat = model.predict(X)
    return yhat[0, 0]

test = list()
X, y = train_scaled[0, 0:-1], train_scaled[0, -1]
test.append(X)
for i in range(len(test_scaled)):
    # make one-step forecast
    yhat = forecast_lstm2(lstm_model, test[i])
    test.append(yhat)

    # invert scaling
    yhat2 = invert_scale(scaler, test[i+1], yhat)
    # # invert differencing
    yhat3 = inverse_difference(raw_values, yhat2, len(test) + 1 - i)
    # store forecast
    predictions.append(yhat3)

But it didn't have the desired effect (after some prediction it always gave the same result).

Thanks for your attention and time.

Solution

In the end this seem to work: # make a one-step forecast def forecast_lstm2(model, X): X = X.reshape(-1, 1, 1) yhat = model.predict(X) return yhat[0, 0]

def prediction(spotId):
    epoch = [5, 15, 25, 35, 45, 50, 100]
    for e in epoch:
        tf.reset_default_graph()

        # Load CSV file, indicate that the first column represents labels
        data = read_csv('nowcastScaled'+str(spotId)+'.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)

        # transform data to be stationary
        raw_values = data.values
        diff_values = difference(raw_values, 1)

        # transform data to be supervised learning
        supervised = timeseries_to_supervised(diff_values, 1)
        supervised_values = supervised.values

        # split data into train and test-sets (I removed the testing data from the excel file)
        train = supervised_values[x:]

        # transform the scale of the data (and removed anything related to testing set)
        scaler, train_scaled = scale(train)
        # Build neural network
        net = tflearn.input_data(shape=[None, 1, 1])
        tnorm = tflearn.initializations.uniform(minval=-1.0, maxval=1.0)
        net = tflearn.lstm(net, 1, dropout=0.8)
        net = tflearn.fully_connected(net, 1, activation='linear', weights_init=tnorm)
        net = tflearn.regression(net, optimizer='adam', learning_rate=0.0001,
                                     loss='mean_square', metric='R2')
        lstm_model = tflearn.DNN(net, clip_gradients=0.)
        lstm_model.load('ModeSpot'+str(spotId)+'Epoch'+str(e)+'.tflearn')

        # forecast the entire training dataset to build up state for forecasting
        train_reshaped = train_scaled[:, 0].reshape(len(train_scaled), 1, 1)
        lstm_model.predict(train_reshaped)
        # walk-forward validation on the test data
        predictions = list()
        predictionFeeder = list()
        X, y = train_scaled[0, 0:-1], train_scaled[0, -1]
        predictionFeeder.append(X)
        for i in range(0, 10000):
            # make one-step forecast
            yhat = forecast_lstm2(lstm_model, predictionFeeder[i])
            predictionFeeder.append(yhat)
            # invert scaling
            yhat2 = invert_scale(scaler, predictionFeeder[i + 1], yhat)
            yhat3 = inverse_difference(raw_values, yhat2, 10000 + 1 - i)
            predictions.append(yhat3)