python pandas tensorflow machine-learning keras

Tensorflow Keras ValueError: Unexpected result of `predict_function` (Empty batch_outputs)

I am trying to train some machine learning models to predict the price action for 4 chosen stocks from a list of NASDAQ-100 stocks.

I am very new to Python, so I've run into a few issues I have not been able to fix. The first has been while trying to use the LSTM model. I get the following error upon executing my code:

ValueError: Unexpected result of predict_function (Empty batch_outputs). Please use Model.compile(..., run_eagerly=True), or tf.config.run_functions_eagerly(True) for more information of where went wrong, or file a issue/bug to tf.keras.

The model seems to work upon passing in a Dataframe but not a Series with one stock. Why is this? I only want to run the model using one stock (eg. CTSH), so how can I do this?

run_eagerly=True did not provide any additional information as to what was going wrong.

This is my code:

# Imports
import os
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import io
from PIL import Image
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf

# Chosen stocks from NASDAQ-100
chosen_stocks = ['CTSH', 'BKNG', 'REGN', 'MSFT']

def get_data():
    # Get list of tickers
    tickers = open("dataset/nasdaq_100_tickers.txt", "r")
    data = tickers.read().splitlines()

    # Check if the data has already been downloaded, drop NaN values
    if os.path.exists('dataframe.csv'):
        dataframe = pd.read_csv('dataframe.csv', index_col="Date", parse_dates=True).dropna()
    else:
        # Download Close data from Yahoo Finance
        data = yf.download(tickers=data, period='1y', interval='1d')['Close']
        data.to_csv('dataframe.csv')
        # Convert array to Pandas dataframe, drop NaN values
        complete_data = data.dropna()
        dataframe = pd.DataFrame(complete_data)
    dataframe.drop(['GEHC'], axis=1, inplace=True) # Dropping GEHC because it contains NULL values

    return dataframe


def get_ctsh_data(dataframe):
    get_ctsh_data = dataframe.iloc[:, 30]
    return get_ctsh_data


# LSTM stock predictions on Close
def lstm_prediction(dataframe):
    keras = tf.keras
    Sequential = keras.models.Sequential
    Dense = keras.layers.Dense
    LSTM = keras.layers.LSTM
    # Scale down data
    scaler = MinMaxScaler()
    dataframe = scaler.fit_transform(np.array(dataframe).reshape(-1, 1))
    dataframe.shape
    # Use 65% of data for training & 35% for testing
    train_size = int(len(dataframe) * 0.65)
    test_size = len(dataframe) - train_size
    train_data, test_data = dataframe[0:train_size, :], dataframe[train_size:len(dataframe), :1]
    # Create a data matrix
    def create_dataset(dataset, time_step = 1):
        input_data, output_data = [], []
        for i in range(len(dataset)-time_step-1):
           a = dataset[i:(i+time_step), 0]
           input_data.append(a)
           output_data.append(dataset[i + time_step, 0])
        return np.array(input_data), np.array(output_data)

    # Calling the create dataset function to split the data into input output datasets with time
    # step 100
    time_step = 100
    input_train, output_train = create_dataset(train_data, time_step)
    input_test, output_test = create_dataset(test_data, time_step)
    # checking values
    print("Checking values:")
    print(input_train.shape)
    print(input_train)
    print(input_test.shape)
    print(output_test.shape)

    # Create and fit LSTM model - 4 layers (1 input, 2 hidden, 1 Dense output) & 50 neurons
    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=(input_train.shape[1], 1)))
    model.add(LSTM(50, return_sequences=True))
    model.add(LSTM(50))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam', run_eagerly=True) # Adam optimizer - mean squared error
    model.summary()

    model.fit(input_train, output_train, validation_data=(input_test, output_test), epochs=10, batch_size=64, verbose=1)

    train_predict = model.predict(input_train)
    test_predict = model.predict(input_test)
    # Transform to original form
    train_predict = scaler.inverse_transform(train_predict)
    test_predict = scaler.inverse_transform(test_predict)

    print("Mean Squared Errors:")
    print(math.sqrt(mean_squared_error(output_train, train_predict)))
    print(math.sqrt(mean_squared_error(output_test, test_predict)))
    # If difference is less than 50 - model is good

    look_back = 100 # Takes the number of values behind the current value
    train_predict_plot = np.empty_like(dataframe)
    train_predict_plot[:, :] = np.nan
    train_predict_plot[look_back: len(train_predict) + look_back, :] = train_predict
    test_predict_plot = np.empty_like(dataframe)
    test_predict_plot[:, :] = np.nan
    test_predict_plot[len(train_predict) + (look_back) * 2 + 1: len(dataframe) - 1, :] = test_predict

    plt.plot(scaler.inverse_transform(dataframe))
    plt.plot(train_predict_plot)
    plt.plot(test_predict_plot)
    plt.show()


dataframe = get_data()

# Make Series with CTSH Close data
ctsh_data = get_ctsh_data(dataframe)

lstm_prediction(ctsh_data)

My dataframe looks like this:

                  AAPL        ABNB  ...         ZM          ZS
Date                                ...                       
2022-12-15  136.500000   90.610001  ...  70.199997  117.169998
2022-12-16  134.509995   89.570000  ...  69.860001  114.209999
2022-12-19  132.369995   85.930000  ...  69.089996  112.269997
2022-12-20  132.300003   87.620003  ...  68.559998  113.540001
2022-12-21  135.449997   87.070000  ...  69.930000  112.769997
...                ...         ...  ...        ...         ...
2023-11-28  190.399994  127.559998  ...  67.529999  193.850006
2023-11-29  189.369995  126.480003  ...  67.949997  199.839996
2023-11-30  189.949997  126.339996  ...  67.830002  197.529999
2023-12-01  191.240005  135.020004  ...  70.290001  198.029999
2023-12-04  188.669998  134.539993  ...  67.720001  197.919998

The full traceback is:

C:\Users\xxx\source\repos\Project\venv\Scripts\python.exe C:/Users/xxx/source/repos/Project/main.py 

Checking values:
(56, 100)
[[0.08179096 0.03462697 0.         ... 0.39761184 0.39701493 0.45253732]
 [0.03462697 0.         0.00358217 ... 0.39701493 0.45253732 0.4119403 ]
 [0.         0.00358217 0.03880605 ... 0.45253732 0.4119403  0.3988059 ]
 ...
 [0.45552235 0.38447744 0.39104464 ... 0.76776083 0.76298506 0.67522381]
 [0.38447744 0.39104464 0.35402979 ... 0.76298506 0.67522381 0.6435821 ]
 [0.39104464 0.35402979 0.32238808 ... 0.67522381 0.6435821  0.6597013 ]]
(0,)
(0,)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 lstm (LSTM)                 (None, 100, 50)           10400     
                                                                 
 lstm_1 (LSTM)               (None, 100, 50)           20200     
                                                                 
 lstm_2 (LSTM)               (None, 50)                20200     
                                                                 
 dense (Dense)               (None, 1)                 51        
                                                                 
=================================================================
Total params: 50851 (198.64 KB)
Trainable params: 50851 (198.64 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
1/1 [==============================] - 1s 814ms/step - loss: 0.2619
Epoch 2/10
1/1 [==============================] - 1s 671ms/step - loss: 0.1607
Epoch 3/10
1/1 [==============================] - 1s 670ms/step - loss: 0.0753
Epoch 4/10
1/1 [==============================] - 1s 868ms/step - loss: 0.0173
Epoch 5/10
1/1 [==============================] - 1s 613ms/step - loss: 0.0157
Epoch 6/10
1/1 [==============================] - 1s 615ms/step - loss: 0.0539
Epoch 7/10
1/1 [==============================] - 1s 611ms/step - loss: 0.0489
Epoch 8/10
1/1 [==============================] - 1s 616ms/step - loss: 0.0251
Epoch 9/10
1/1 [==============================] - 1s 596ms/step - loss: 0.0098
Epoch 10/10
1/1 [==============================] - 1s 611ms/step - loss: 0.0079
2/2 [==============================] - 0s 225ms/step
Traceback (most recent call last):
  File "C:/Users/xxx/source/repos/Project/main.py", line 395, in <module>
    lstm_prediction(ctsh_data)
  File "C:/Users/xxx/source/repos/Project/main.py", line 313, in lstm_prediction
    test_predict = model.predict(input_test)
  File "C:\Users\xxx\source\repos\Project\venv\lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "C:\Users\xxx\source\repos\Project\venv\lib\site-packages\keras\src\engine\training.py", line 2579, in predict
    raise ValueError(
ValueError: Unexpected result of `predict_function` (Empty batch_outputs). Please use `Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`.

Process finished with exit code 1

Any help is appreciated.

Solution

I discovered that the cause of the error was due to setting time_step to 100 - a number far too high. Setting it to a lower number (eg 10) fixed the issue.

I also had to change look_back to a lower number as well as a result of this, so I also chose 10.

Thank you everybody for your help!