I am attempting to do a hyper-parameter optimization task on a LSTM model (purly Tensorflow) using the scikit optimize package. I am using the Bayesian optimization method using Gaussian Processes (gp_minimize) for this. The demo code provided for the function can be found through this link. When I try to run my code I keep getting the below error:
ValueError: Not all points are within the bounds of the space.
My complete code is shown below:
import skopt
from skopt import gp_minimize, forest_minimize
from skopt.space import Real, Categorical, Integer
from skopt.plots import plot_convergence
from skopt.plots import plot_objective, plot_evaluations
from skopt.utils import use_named_args
import csv
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from math import sqrt
import atexit
from time import time, strftime, localtime
from datetime import timedelta
input_size = 1
num_layers = 1
hidden1_activation = tf.nn.relu
hidden2_activation = tf.nn.relu
lstm_activation = tf.nn.relu
columns = ['Sales', 'DayOfWeek', 'SchoolHoliday', 'Promo']
features = len(columns)
fileName = None
column_min_max = None
# fileNames = ['store2_1.csv', 'store85_1.csv', 'store259_1.csv', 'store519_1.csv', 'store725_1.csv', 'store749_1.csv', 'store934_1.csv', 'store1019_1.csv']
# column_min_max_all = [[[0, 11000], [1, 7]], [[0, 17000], [1, 7]], [[0, 23000], [1, 7]], [[0, 14000], [1, 7]], [[0, 14000], [1, 7]], [[0, 15000], [1, 7]], [[0, 17000], [1, 7]], [[0, 25000], [1, 7]]]
fileNames = ['store2_1.csv']
column_min_max_all = [[[0, 11000], [1, 7]]]
num_steps = None
lstm_size = None
batch_size = None
init_learning_rate = 0.01
learning_rate_decay = None
init_epoch = None # 5
max_epoch = None # 100 or 50
hidden1_nodes = None
hidden2_nodes = None
dropout_rate= None
best_accuracy = 0.0
start = None
lstm_num_steps = Categorical(categories=[2,3,4,5,6,7,8,9,10,11,12,13,14], name ='lstm_num_steps')
size = Categorical(categories=[8,16,32,64,128], name ='size')
lstm_hidden1_nodes = Categorical(categories=[4,8,16,32,64], name= 'lstm_hidden1_nodes')
lstm_hidden2_nodes = Categorical(categories=[2,4,8,16,32],name= 'lstm_hidden2_nodes')
lstm_learning_rate_decay = Categorical(categories=[0.99,0.8,0.7], name='lstm_learning_rate_decay')
lstm_max_epoch = Categorical(categories=[60,50,100,120,150,200], name='lstm_max_epoch')
lstm_init_epoch = Categorical(categories=[5, 10, 15, 20],name='lstm_init_epoch')
lstm_batch_size = Categorical(categories=[5, 8, 16, 30, 31, 64] , name = 'lstm_batch_size')
lstm_dropout_rate = Categorical(categories=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9] , name = 'lstm_dropout_rate')
dimensions = [lstm_num_steps, size,lstm_hidden1_nodes, lstm_hidden2_nodes,lstm_init_epoch,lstm_max_epoch,lstm_learning_rate_decay,lstm_batch_size, lstm_dropout_rate]
default_parameters = [5,35,30,15,5,60,0.99,8,0.1]
# def log_dir_name(lstm_num_steps, size,lstm_hidden1_nodes, lstm_hidden2_nodes,lstm_learning_rate,lstm_init_epoch,lstm_max_epoch,lstm_learning_rate_decay,lstm_batch_size):
#
# # The dir-name for the TensorBoard log-dir.
# s = "./19_logs/{1}_{2}_{3}_{4}_{5}_{6}_{7}_{8}_{9}/"
#
# # Insert all the hyper-parameters in the dir-name.
# log_dir = s.format(lstm_num_steps, size,lstm_hidden1_nodes, lstm_hidden2_nodes,lstm_learning_rate,lstm_init_epoch,lstm_max_epoch,lstm_learning_rate_decay,lstm_batch_size)
#
# return log_dir
def secondsToStr(elapsed=None):
if elapsed is None:
return strftime("%Y-%m-%d %H:%M:%S", localtime())
else:
return str(timedelta(seconds=elapsed))
def log(s, elapsed=None):
line = "="*40
print(line)
print(secondsToStr(), '-', s)
if elapsed:
print("Elapsed time:", elapsed)
print(line)
print()
def endlog():
end = time()
elapsed = end-start
log("End Program", secondsToStr(elapsed))
def generate_batches(train_X, train_y, batch_size):
num_batches = int(len(train_X)) // batch_size
if batch_size * num_batches < len(train_X):
num_batches += 1
batch_indices = range(num_batches)
for j in batch_indices:
batch_X = train_X[j * batch_size: (j + 1) * batch_size]
batch_y = train_y[j * batch_size: (j + 1) * batch_size]
# assert set(map(len, batch_X)) == {num_steps}
yield batch_X, batch_y
def segmentation(data):
seq = [price for tup in data[columns].values for price in tup]
seq = np.array(seq)
# split into items of features
seq = [np.array(seq[i * features: (i + 1) * features])
for i in range(len(seq) // features)]
# split into groups of num_steps
X = np.array([seq[i: i + num_steps] for i in range(len(seq) - num_steps)])
y = np.array([seq[i + num_steps] for i in range(len(seq) - num_steps)])
# get only sales value
y = [[y[i][0]] for i in range(len(y))]
y = np.asarray(y)
return X, y
def scale(data):
for i in range (len(column_min_max)):
data[columns[i]] = (data[columns[i]] - column_min_max[i][0]) / ((column_min_max[i][1]) - (column_min_max[i][0]))
return data
def rescle(test_pred):
prediction = [(pred * (column_min_max[0][1] - column_min_max[0][0])) + column_min_max[0][0] for pred in test_pred]
return prediction
def pre_process():
store_data = pd.read_csv(fileName)
# sftp://[email protected]/home/wso2/suleka/salesPred/store2_1.csv
store_data = store_data.drop(store_data[(store_data.Open == 0) & (store_data.Sales == 0)].index)
#
# store_data = store_data.drop(store_data[(store_data.Open != 0) & (store_data.Sales == 0)].index)
# ---for segmenting original data --------------------------------
original_data = store_data.copy()
## train_size = int(len(store_data) * (1.0 - test_ratio))
validation_len = len(store_data[(store_data.Month == 6) & (store_data.Year == 2015)].index)
test_len = len(store_data[(store_data.Month == 7) & (store_data.Year == 2015)].index)
train_size = int(len(store_data) - (validation_len+test_len))
train_data = store_data[:train_size]
validation_data = store_data[(train_size-num_steps): validation_len+train_size]
test_data = store_data[((validation_len+train_size) - num_steps): ]
original_val_data = validation_data.copy()
original_test_data = test_data.copy()
# -------------- processing train data---------------------------------------
scaled_train_data = scale(train_data)
train_X, train_y = segmentation(scaled_train_data)
# -------------- processing validation data---------------------------------------
scaled_validation_data = scale(validation_data)
val_X, val_y = segmentation(scaled_validation_data)
# -------------- processing test data---------------------------------------
scaled_test_data = scale(test_data)
test_X, test_y = segmentation(scaled_test_data)
# ----segmenting original validation data-----------------------------------------------
nonescaled_val_X, nonescaled_val_y = segmentation(original_val_data)
# ----segmenting original test data-----------------------------------------------
nonescaled_test_X, nonescaled_test_y = segmentation(original_test_data)
return train_X, train_y, test_X, test_y, val_X, val_y, nonescaled_test_y,nonescaled_val_y
def setupRNN(inputs):
cell = tf.contrib.rnn.LSTMCell(lstm_size, state_is_tuple=True, activation=lstm_activation)
val1, _ = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)
val = tf.transpose(val1, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1, name="last_lstm_output")
# hidden layer
hidden1 = tf.layers.dense(last, units=hidden1_nodes, activation=hidden2_activation)
hidden2 = tf.layers.dense(hidden1, units=hidden2_nodes, activation=hidden1_activation)
dropout = tf.layers.dropout(hidden2, rate=dropout_rate, training=True)
weight = tf.Variable(tf.truncated_normal([hidden2_nodes, input_size]))
bias = tf.Variable(tf.constant(0.1, shape=[input_size]))
prediction = tf.matmul(dropout, weight) + bias
return prediction
# saver = tf.train.Saver()
# saver.save(sess, "checkpoints_sales/sales_pred.ckpt")
@use_named_args(dimensions=dimensions)
def fitness(lstm_num_steps, size,lstm_hidden1_nodes,lstm_hidden2_nodes,lstm_init_epoch,lstm_max_epoch,
lstm_learning_rate_decay,lstm_batch_size,lstm_dropout_rate):
global num_steps, lstm_size, hidden2_nodes, hidden2_activation, hidden1_activation, hidden1_nodes, lstm_activation, init_epoch, max_epoch, learning_rate_decay, dropout_rate
num_steps = lstm_num_steps
lstm_size = size
batch_size = lstm_batch_size
learning_rate_decay = lstm_learning_rate_decay
init_epoch = lstm_init_epoch
max_epoch = lstm_max_epoch
hidden1_nodes = lstm_hidden1_nodes
hidden2_nodes = lstm_hidden2_nodes
dropout_rate = lstm_dropout_rate
# log_dir = log_dir_name(lstm_num_steps, size,lstm_hidden1_nodes,lstm_hidden2_nodes,lstm_learning_rate,lstm_init_epoch,lstm_max_epoch,
# lstm_learning_rate_decay,lstm_batch_size)
train_X, train_y, test_X, test_y, val_X, val_y, nonescaled_test_y, nonescaled_val_y = pre_process()
inputs = tf.placeholder(tf.float32, [None, num_steps, features], name="inputs")
targets = tf.placeholder(tf.float32, [None, input_size], name="targets")
learning_rate = tf.placeholder(tf.float32, None, name="learning_rate")
prediction = setupRNN(inputs)
with tf.name_scope('loss'):
model_loss = tf.losses.mean_squared_error(targets, prediction)
with tf.name_scope('adam_optimizer'):
train_step = tf.train.AdamOptimizer(learning_rate).minimize(model_loss)
train_step = train_step
# with tf.name_scope('accuracy'):
# correct_prediction = tf.sqrt(tf.losses.mean_squared_error(prediction, targets))
#
# accuracy = correct_prediction
sess = tf.Session()
sess.run(tf.global_variables_initializer())
learning_rates_to_use = [
init_learning_rate * (
learning_rate_decay ** max(float(i + 1 - init_epoch), 0.0)
) for i in range(max_epoch)]
for epoch_step in range(max_epoch):
current_lr = learning_rates_to_use[epoch_step]
for batch_X, batch_y in generate_batches(train_X, train_y, batch_size):
train_data_feed = {
inputs: batch_X,
targets: batch_y,
learning_rate: current_lr,
}
sess.run(train_step, train_data_feed)
val_data_feed = {
inputs: val_X,
targets: val_y,
learning_rate: 0.0,
}
pred = sess.run(prediction, val_data_feed)
pred_vals = rescle(pred)
pred_vals = np.array(pred_vals)
pred_vals = pred_vals.flatten()
pred_vals = pred_vals.tolist()
nonescaled_y = nonescaled_val_y.flatten()
nonescaled_y = nonescaled_y.tolist()
val_accuracy = sqrt(mean_squared_error(nonescaled_y, pred_vals))
global best_accuracy
if val_accuracy < best_accuracy:
# Save the new model to harddisk.
saver = tf.train.Saver()
saver.save(sess, "checkpoints_sales/sales_pred.ckpt")
with open("best_configs.csv", "a") as f:
writer = csv.writer(f)
writer.writerows(zip([fileName], [num_steps], [lstm_size], [hidden2_nodes], [hidden2_activation], [hidden1_activation], [hidden1_nodes], [lstm_size], [lstm_activation], [init_epoch], [max_epoch], [learning_rate_decay], [dropout_rate],[val_accuracy]))
# Update the classification accuracy.
best_accuracy = val_accuracy
# Clear the Keras session, otherwise it will keep adding new
# models to the same TensorFlow graph each time we create
# a model with a different set of hyper-parameters.
# sess.clear_session()
sess.close()
tf.reset_default_graph()
# NOTE: Scikit-optimize does minimization so it tries to
# find a set of hyper-parameters with the LOWEST fitness-value.
# Because we are interested in the HIGHEST classification
# accuracy, we need to negate this number so it can be minimized.
return val_accuracy
if __name__ == '__main__':
start = time()
for i in range(len(fileNames)):
fileName = '{}{}'.format('home/suleka/Documents/sales_prediction/', fileNames[i])
#/home/suleka/Documents/sales_prediction/
column_min_max = column_min_max_all[i]
#Bayesian optimization using Gaussian Processes.
#acq_func -> https://arxiv.org/pdf/1807.02811.pdf
search_result = gp_minimize(func=fitness,
dimensions=dimensions,
acq_func='EI', # Expected Improvement.
n_calls=40,
x0=default_parameters)
atexit.register(endlog)
log("Start Program")
Shown below is the complete stack trace:
/home/wso2/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`. from ._conv import register_converters as _register_converters auto_LSTM_skopt.py:138: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy data[columns[i]] = (data[columns[i]] - column_min_max[i][0]) / ((column_min_max[i][1]) - (column_min_max[i][0])) /home/wso2/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py:100: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory. "Converting sparse IndexedSlices to a dense Tensor of unknown shape. " Traceback (most recent call last): File "auto_LSTM_skopt.py", line 365, in <module>
x0=default_parameters) File "/home/wso2/anaconda3/lib/python3.6/site-packages/skopt/optimizer/gp.py", line 228, in gp_minimize
callback=callback, n_jobs=n_jobs) File "/home/wso2/anaconda3/lib/python3.6/site-packages/skopt/optimizer/base.py", line 240, in base_minimize
result = optimizer.tell(x0, y0) File "/home/wso2/anaconda3/lib/python3.6/site-packages/skopt/optimizer/optimizer.py", line 432, in tell
check_x_in_space(x, self.space) File "/home/wso2/anaconda3/lib/python3.6/site-packages/skopt/utils.py", line 186, in check_x_in_space
raise ValueError("Not all points are within the bounds of"
ValueError: Not all points are within the bounds of the space.
Issue is with your size dimension. All values in default_parameters must be in the lists of the parameter dimensions to be optimized, if not skopt
throws the Not all points are within the bounds of the space error.
You currently have: size = Categorical(categories=[8,16,32,64,128], name ='size')
In your default parameters: default_parameters = [5,35,30,15,5,60,0.99,8,0.1]
the second item (representing 'size') has the value of 35, which is not part of the size parameters to search.
FIX 1. Include 35 in size space:
size = Categorical(categories=[8,16,32,35,64,128], name ='size')
FIX 2. Change 35 to '32' in default_parameters:
default_parameters = [5,32,30,15,5,60,0.99,8,0.1]
Use any of the fixes above and your code will run like a charm :)