I'm trying to implement this code to predict a future stock price. The code seems to work well the error raise while I run the last line of the code:
predicted_stock_price = scaler.inverse_transform(predicted_stock_price)
Below I add the entire code:
import pandas_datareader as web
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
ticker = "AAPL"
while True:
data_inizio = str(input("data d'inizi dell'analisi (inserisci giorno mese ed anno): " ))
try:
start1 = datetime.strptime(data_inizio, '%d %m %Y')
break
except ValueError:
print('il formato inserito non è corretto scrivere la data inserendo giorno mese ed anno separati dallo spazio es. 20 01 2021')
while True:
data_fine = str(input("data di fine analisi (inserisci giorno mese ed anno): " ))
try:
end1 = datetime.strptime(data_fine, '%d %m %Y')
break
except ValueError:
print('il formato inserito non è corretto scrivere la data inserendo giorno mese ed anno separati dallo spazio es. 20 01 2021')
inizio_analisi = start1
fine_analisi = end1
dati_finanziari_ticker = web.DataReader(ticker, data_source="yahoo", start=inizio_analisi, end=fine_analisi)
periodo_analisi = dati_finanziari_ticker.shape[0]
periodo_allenamento_programma = round(periodo_analisi*0.8)
periodo_test_programma = periodo_analisi
dati_finanziari_ticker_scalati = dati_finanziari_ticker.iloc[:, 3:4].values
scaler = MinMaxScaler(feature_range=(0,1))
dati_allenamento_scalati = scaler.fit_transform(dati_finanziari_ticker_scalati)
X_allenamento = []
y_allenamento = []
for i in range(60, periodo_allenamento_programma):
X_allenamento.append(dati_finanziari_ticker_scalati[i-60:i, 0])
y_allenamento.append(dati_finanziari_ticker_scalati[i,0])
X_allenamento, y_allenamento = np.array(X_allenamento), np.array(y_allenamento)
X_allenamento = np.reshape(X_allenamento, (X_allenamento.shape[0], X_allenamento.shape[1], 1))
#LSTM
model1 = Sequential()
model1.add(LSTM(70, activation = "relu", return_sequences = True, input_shape = (X_allenamento.shape[1], 1)))
model1.add(Dropout(0.2))
model1.add(LSTM(90, activation = "relu", return_sequences = True))
model1.add(Dropout(0.3))
model1.add(LSTM(120, activation = "relu", return_sequences = True))
model1.add(Dropout(0.4))
model1.add(LSTM(150, activation = "relu", return_sequences = True))
model1.add(Dropout(0.6))
model1.add(Dense(1))
model1.summary
model1.compile(optimizer = "adam", loss = "mean_squared_error")
model1.fit(X_allenamento, y_allenamento, epochs=10, batch_size=28)
#CREAZIONE DEGLI ARRAY PER IL TEST
X_test = []
y_test = []
for i in range(periodo_allenamento_programma, periodo_test_programma):
X_test.append(dati_finanziari_ticker_scalati[i-60:i, 0])
y_test.append(dati_finanziari_ticker_scalati[i,0])
X_test, y_test = np.array(X_test), np.array(y_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = model1.predict(X_test)
predicted_stock_price = scaler.inverse_transform(predicted_stock_price)
That's the entire error:
ValueError Traceback (most recent call last)
<ipython-input-224-dacdc53f9aa9> in <module>
1 #PREDIZIONE
2 predicted_stock_price = model1.predict(X_test)
----> 3 predicted_stock_price = scaler.inverse_transform(predicted_stock_price)
4
5 #nsamples, nx, ny = predicted_stock_price.shape
1 frames
/usr/local/lib/python3.7/dist-packages/sklearn/preprocessing/_data.py in inverse_transform(self, X)
524
525 X = check_array(
--> 526 X, copy=self.copy, dtype=FLOAT_DTYPES, force_all_finite="allow-nan"
527 )
528
/usr/local/lib/python3.7/dist-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
794 raise ValueError(
795 "Found array with dim %d. %s expected <= 2."
--> 796 % (array.ndim, estimator_name)
797 )
798
ValueError: Found array with dim 3. Estimator expected <= 2.
I've tried to reshape the array, but probably I've done it bad because it didn't work.
I beleve your scaler applies to the whole dataframe on which you build X
and Y
right ? I would make a scaler for the X values and one for the Y values if necessary in each case, so maybe operate on the numpy
data
I would also recommend you to fit on the train dataset, in order to remove any use of the test dataset in the training process.
Thus I would do:
scaler_target = MinMaxScaler(feature_range=(0,1))
Y_allamento = scaler.fit_transform(Y_allenamento)
And then:
predicted_stock_price = model1.predict(X_test)
predicted_stock_price = scaler.inverse_transform(predicted_stock_price)
Here I removed the X
scaling, but you can put it if you want