I want to do a grid search on time series data. Is there any function to do that to search through what I have listed on "lgb_params" for example?
lgb_params = {
"learning_rate": [0.001, 0.01, 0.1, 0.2],
"max_depth": [3, 5, 7, 9],
"num_leaves": [5, 10, 15],
"num_boost_round": 10000,
"early_stopping_rounds": 300,
"feature_fraction": [0.2, 0.3, 0.5, 0.7, 0.8],
"verbose": 0
}
lgbtrain = lgb.Dataset(data=X_train, label=y_train, feature_name=cols)
lgbval = lgb.Dataset(data=X_val, label=y_val, reference=lgbtrain, feature_name=cols)
model = lgb.train(lgb_params, lgbtrain,
valid_sets=[lgbtrain, lgbval],
num_boost_round=lgb_params['num_boost_round'],
early_stopping_rounds=lgb_params['early_stopping_rounds'],
feval=lgbm_smape,
verbose_eval=100)
The code above of course does not work in the end since lgb params contains keys with more than 1 values (e.g., learning_rate, max_depth etc.). Well, those are the ones I actually want to search for and that's where the problem is...
I think I came up with a solution, it is currently running and haven't finished since it searches through a lot of values but here's the function that I wrote just in case anyone needs it:
def param_search(lgb_param_dict):
min_error = float("inf")
best_params = dict()
best_iter = float("inf")
for i in range(len(lgb_param_dict["learning_rate"])):
lgb_params = dict()
lgb_params["learning_rate"] = lgb_param_dict["learning_rate"][i]
for j in range(len(lgb_param_dict["max_depth"])):
lgb_params["max_depth"] = lgb_param_dict["max_depth"][j]
for k in range(len(lgb_param_dict["num_leaves"])):
lgb_params["num_leaves"] = lgb_param_dict["num_leaves"][k]
for s in range(len(lgb_param_dict["feature_fraction"])):
lgb_params["feature_fraction"] = lgb_param_dict["feature_fraction"][s]
print(" ")
print("##########")
print("Learning_rate = " + str(lgb_params["learning_rate"]))
print("max_depth = " + str(lgb_params["max_depth"]))
print("num_leaves = " + str(lgb_params["num_leaves"]))
print("feature_fraction = " + str(lgb_params["feature_fraction"]))
model = lgb.train(lgb_params, lgbtrain,
valid_sets=[lgbtrain, lgbval],
num_boost_round=lgb_full_params["num_boost_round"],
early_stopping_rounds=lgb_full_params["early_stopping_rounds"],
feval=lgbm_smape,
verbose_eval=500)
print("Learning_rate = " + str(lgb_params["learning_rate"]))
print("max_depth = " + str(lgb_params["max_depth"]))
print("num_leaves = " + str(lgb_params["num_leaves"]))
print("feature_fraction = " + str(lgb_params["feature_fraction"]))
if min_error > dict(model.best_score["valid_1"])["SMAPE"]:
min_error = dict(model.best_score["valid_1"])["SMAPE"]
best_params = model.params
best_iter = model.best_iteration
else:
continue
return min_error, best_params, best_iter
Print statement are for readability. There is probably better way to write this function but I'll approve it as an answer if it finishes without any problems.
Edit: It worked!