I've gotten to the end of Sentdex's tutorial series about RNN and crypto currency. For the life of me I can't figure out where I've gone wrong. My best guess is that there is something wrong with how I'm saving the data but no matter what I try I keep getting the same "ValueError" on my line "callbacks=[tensorboard, checkpoint]. Any help would be greatly appreciated.
Code:
import pandas as pd
import os
from sklearn import preprocessing
from collections import deque
import random
import numpy as np
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDICT = "LTC-USD"
EPOCHS = 10
BATCH_SIZE = 64
NAME = f"{RATIO_TO_PREDICT}-{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"
def classify(current, future):
if float(future) > float(current):
return 1
else:
return 0
def preprocess_df(df):
df = df.drop('future', 1)
for col in df.columns:
if col != "target":
df[col] = df[col].pct_change()
df.dropna(inplace=True)
df[col] = preprocessing.scale(df[col].values)
df.dropna(inplace=True)
sequential_data = []
prev_days = deque(maxlen=SEQ_LEN)
for i in df.values:
prev_days.append([n for n in i[:-1]])
if len(prev_days) == SEQ_LEN:
sequential_data.append([np.array(prev_days), i[-1]])
random.shuffle(sequential_data)
buys = []
sells = []
for seq, target in sequential_data:
if target == 0:
sells.append([seq, target])
elif target == 1:
buys.append([seq, target])
random.shuffle(buys)
random.shuffle(sells)
lower = min(len(buys), len(sells))
buys = buys[:lower]
sells = sells[:lower]
sequential_data = buys+sells
random.shuffle(sequential_data)
X = []
y = []
for seq, target in sequential_data:
X.append(seq)
y.append(target)
return np.array(X), y
main_df = pd.DataFrame()
ratios = ["BTC-USD", "LTC-USD", "BCH-USD", "ETH-USD"]
for ratio in ratios:
ratio = ratio.split('.csv')[0]
print(ratio)
dataset = f'C:/crypto_data/{ratio}.csv' # get the full path to the file.
df = pd.read_csv(dataset, names=['time', 'low', 'high', 'open', 'close', 'volume'])
df.rename(columns={"close":f"{ratio}_close", "volume":f"{ratio}_volume"}, inplace=True)
df.set_index('time', inplace=True)
df = df[[f"{ratio}_close", f"{ratio}_volume"]]
if len(main_df) == 0:
main_df = df
else:
main_df = main_df.join(df)
main_df['future'] = main_df[f'{RATIO_TO_PREDICT}_close'].shift(-FUTURE_PERIOD_PREDICT)
main_df['target'] = list(map(classify, main_df[f'{RATIO_TO_PREDICT}_close'], main_df['future']))
main_df.dropna(inplace=True)
times = sorted(main_df.index.values)
last_5pct = sorted(main_df.index.values)[-int(0.05*len(times))]
validation_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]
train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)
print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")
model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())
model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy']
)
tensorboard = TensorBoard(log_dir="C:/logs/{}".format(NAME))
filepath = "RNN_Final-{epoch:02d}-{val_accuracy:.3f}" # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("C:/models/{}.model".format(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')) # saves only the best ones
history = model.fit(
train_x, train_y,
batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_data=(validation_x, validation_y),
callbacks=[tensorboard, checkpoint],
)
model.save("C:/models/{}".format(NAME))
Error:
ValueError Traceback (most recent call last)
<ipython-input-118-2583585ee356> in <module>
152 epochs=EPOCHS,
153 validation_data=(validation_x, validation_y),
--> 154 callbacks=[tensorboard, checkpoint],
155 )
156
~\anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
817 max_queue_size=max_queue_size,
818 workers=workers,
--> 819 use_multiprocessing=use_multiprocessing)
820
821 def evaluate(self,
~\anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
233 max_queue_size=max_queue_size,
234 workers=workers,
--> 235 use_multiprocessing=use_multiprocessing)
236
237 total_samples = _get_total_number_of_samples(training_data_adapter)
~\anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in _process_training_inputs(model, x, y, batch_size, epochs, sample_weights, class_weights, steps_per_epoch, validation_split, validation_data, validation_steps, shuffle, distribution_strategy, max_queue_size, workers, use_multiprocessing)
531 'at same time.')
532
--> 533 adapter_cls = data_adapter.select_data_adapter(x, y)
534
535 # Handle validation_split, we want to split the data and get the training
~\anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\engine\data_adapter.py in select_data_adapter(x, y)
996 "Failed to find data adapter that can handle "
997 "input: {}, {}".format(
--> 998 _type_name(x), _type_name(y)))
999 elif len(adapter_cls) > 1:
1000 raise RuntimeError(
ValueError: Failed to find data adapter that can handle input: <class 'numpy.ndarray'>, (<class 'list'> containing values of types {"<class 'numpy.float64'>"})
EDIT:
I found mentioned sendtex tutorial and there is no information how old is this tutorial - but there is video which you can open on YouTube and it shows it is 2 years old
tutorial. Maybe 2 years ago this code worked in older version of tensorflow.
Error shows problem with data type
ValueError: Failed to find data adapter that can handle input:
<class 'numpy.ndarray'>, (<class 'list'> containing values of types {"<class 'numpy.float64'>"})
for line
--> 154 callbacks=[tensorboard, checkpoint],
which is part of function model.fit()
So I get all variables in model.fit()
and check type
print('tensorboard:', type(tensorboard))
print('checkpoint:', type(checkpoint))
print('train_x:', type(train_x))
print('train_y:', type(train_y))
print('validation_x:', type(validation_x))
print('validation_y:', type(validation_y))
which gives
tensorboard: <class 'tensorflow.python.keras.callbacks.TensorBoard'>
checkpoint: <class 'tensorflow.python.keras.callbacks.ModelCheckpoint'>
train_x: <class 'numpy.ndarray'>
train_y: <class 'list'>
validation_x: <class 'numpy.ndarray'>
validation_y: <class 'list'>
and you can see mentioned in error <class 'numpy.ndarray'>
for train_x
, validation_x
and <class 'list'>
for train_y
, validation_y
Digging on internet I found in Tensorflow tutorial Training and evaluation with the built-in methods two lines before model.fit()
y_train = y_train.astype("float32")
y_test = y_test.astype("float32")
which mean y
values have to be numpy.array
but in your code train_y
is a normal list
Finally code works when model.fit()
gets np.array(train_y)
instead of train_y
history = model.fit(
train_x,
np.array(train_y), # <--- instead of `train_y`
batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_data=(validation_x, validation_y),
callbacks=[tensorboard, checkpoint],
)