I am trying to train some machine learning models to predict the price action for 4 chosen stocks from a list of NASDAQ-100 stocks.
I am very new to Python, so I've run into a few issues I have not been able to fix. The first has been while trying to use the ARIMA model. I get the following error upon executing my code:
None if faux_endog else np.any(np.isnan(self.endog))) TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
I have already tried using dropna()
, fillna()
and isna()
to find/remove NaN or NULL values. Therefore there should be none left.
This is my code:
# Imports
import os
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import io
from PIL import Image
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
# Chosen stocks from NASDAQ-100
chosen_stocks = ['CTSH', 'BKNG', 'REGN', 'MSFT']
def get_data():
# Get list of tickers
tickers = open("dataset/nasdaq_100_tickers.txt", "r")
data = tickers.read().splitlines()
# Check if the data has already been downloaded, drop NaN values
if os.path.exists('dataframe.csv'):
dataframe = pd.read_csv('dataframe.csv', index_col="Date", parse_dates=True).dropna()
# Download Close data from Yahoo Finance
data = yf.download(tickers=data, period='1y', interval='1d')['Close']
# Convert array to Pandas dataframe, drop NaN values
complete_data = data.dropna()
dataframe = pd.DataFrame(complete_data)
dataframe.drop(['GEHC'], axis=1, inplace=True) # Dropping GEHC because it contains NULL values
return dataframe
def arima_prediction(stock):
train_data, test_data = stock[3:int(len(dataframe) * 0.5)], stock[int(len(dataframe) * 0.5):]
train_arima = train_data
test_arima = test_data
history = [x for x in train_arima]
y = test_arima
predictions = list()
model = ARIMA(history, order=(1, 1, 0))
model_fit = model.fit()
forecast = model_fit.forecast()[0]
for i in range(1, len(y)):
# Predict
model = ARIMA(history, order=(1, 1, 0))
model_fit = model.fit()
forecast = model_fit.forecast()[0]
# Invert transformed prediction
# Observation
observation = y[i]
# Report performance
mean_squared = mean_squared_error(y, predictions)
print('Mean Squared Error: ' + str(mean_squared))
mean_absolute = mean_absolute_error(y, predictions)
print('Mean Absolute Error: ' + str(mean_absolute))
root_mean_squared = math.sqrt(mean_squared_error(y, predictions))
print('Root Mean Squared Error: ' + str(root_mean_squared))
dataframe = get_data()
for stock in chosen_stocks:
My dataframe looks like this:
Date ...
2022-12-15 136.500000 90.610001 ... 70.199997 117.169998
2022-12-16 134.509995 89.570000 ... 69.860001 114.209999
2022-12-19 132.369995 85.930000 ... 69.089996 112.269997
2022-12-20 132.300003 87.620003 ... 68.559998 113.540001
2022-12-21 135.449997 87.070000 ... 69.930000 112.769997
... ... ... ... ... ...
2023-11-28 190.399994 127.559998 ... 67.529999 193.850006
2023-11-29 189.369995 126.480003 ... 67.949997 199.839996
2023-11-30 189.949997 126.339996 ... 67.830002 197.529999
2023-12-01 191.240005 135.020004 ... 70.290001 198.029999
2023-12-04 188.669998 134.539993 ... 67.720001 197.919998
The full traceback is:
Traceback (most recent call last):
File "C:/Users/xxx/source/repos/Project/main.py", line 370, in <module>
File "C:/Users/xxx/source/repos/Project/main.py", line 217, in arima_prediction
model = ARIMA(history, order=(1, 1, 0))
File "C:\Users\xxx\source\repos\Project\venv\lib\site-packages\statsmodels\tsa\arima\model.py", line 158, in __init__
self._spec_arima = SARIMAXSpecification(
File "C:\Users\xxx\source\repos\Project\venv\lib\site-packages\statsmodels\tsa\arima\specification.py", line 458, in __init__
None if faux_endog else np.any(np.isnan(self.endog)))
TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
Process finished with exit code 1
Any help is appreciated.
It turns out the problem was being caused by the stock not being passed in properly, as commenters suggested.
Therefore in order to make sure the stocks were passed in properly instead of as a string, instead of passing in the stock by:
for stock in chosen_stocks:
I've instead used:
def get_stock_data(dataframe):
get_stock_data = dataframe.iloc[:, 30]
return get_stock_data
stock_data = get_stock_data(dataframe)
Thank you to everyone for your help!