I am trying to forecast stock prices (Adj Close) using SVR. I am able to train the model for training data but I'm getting an error for test data. Train data is stored in dataframe df
, from 2014 to 2018 and test data is stored in dataframe test_df
from 2019 till today. Here is the code:
import pandas as pd
import pandas_datareader.data as web
import datetime
import numpy as np
from matplotlib import style
# Get the stock data using yahoo API:
# get 2014-2018 data to train our model
start = datetime.datetime(2014,1,1)
end = datetime.datetime(2018,12,30)
df = web.DataReader("TSLA", 'yahoo', start, end)
# get 2019 data to test our model on
start = datetime.datetime(2019,1,1)
end = datetime.date.today()
test_df = web.DataReader("TSLA", 'yahoo', start, end)
# sort by date
df = df.sort_values('Date')
test_df = test_df.sort_values('Date')
# fix the date
df.set_index("Date", inplace=True)
test_df.set_index("Date", inplace=True)
# Converting dates
import matplotlib.dates as mdates
# change the dates into ints for training
dates_df = df.copy()
dates_df = dates_df.reset_index()
# Store the original dates for plotting the predicitons
org_dates = dates_df['Date']
# convert to ints
dates_df['Date'] = dates_df['Date'].map(mdates.date2num)
# Use sklearn support vector regression to predicit our data:
from sklearn.svm import SVR
dates = dates_df['Date'].to_numpy()
prices = df['Adj Close'].to_numpy()
#Convert to 1d Vector
dates = np.reshape(dates, (len(dates), 1))
prices = np.reshape(prices, (len(prices), 1))
svr_rbf = SVR(kernel= 'rbf', C= 1e3, gamma= 0.1)
svr_rbf.fit(dates, prices)
plt.figure(figsize = (12,6))
plt.plot(dates, prices, color= 'black', label= 'Data')
plt.plot(org_dates, svr_rbf.predict(dates), color= 'red', label= 'RBF model')
For training data it works fine till here. Next, how do I forecast test data (test_df
Following your convention, it should look as follows:
# change the dates into ints for training
test_dates_df = test_df.copy()
test_dates_df = test_dates_df.reset_index()
# Store the original dates for plotting the predicitons
test_org_dates = test_dates_df['Date']
# convert to ints
test_dates_df['Date'] = test_dates_df['Date'].map(mdates.date2num)
test_dates = test_dates_df['Date'].to_numpy()
test_prices = test_df['Adj Close'].to_numpy()
#Convert to 1d Vector
test_dates = np.reshape(test_dates, (len(test_dates), 1))
test_prices = np.reshape(test_prices, (len(test_prices), 1))
# Predict on unseen test data
y_hat_test = svr_rbf.predict(test_dates)
# Visualize predictions against real values
plt.figure(figsize = (12,6))
plt.plot(test_dates, test_prices, color= 'black', label= 'Data')
plt.plot(test_org_dates, y_hat_test, color= 'red', label= 'RBF model (test)')