I used bayes_optto tunse hper-parameter of CatBoostRegressor (from catboost) for regression and got the following error: CatBoostError: catboost/private/libs/target/data_providers.cpp:603: Currently only multi-regression, multilabel and survival objectives work with multidimensional target
Here is the code:
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from catboost import Pool, CatBoostRegressor
from bayes_opt import BayesianOptimization
from bayes_opt.util import Colours
from sklearn.metrics import accuracy_score
def get_data():
""" Preparing data ."""
# trainx, testx, trainy, testy= train_test_split(XN, YN, test_size=0.2, random_state= 31)
return trainx, testx, trainy, testy
def CBR_cv(iterations, learning_rate, depth, l2_leaf_reg, min_child_samples, trainx, testx, trainy, testy):
train_pool = Pool(trainx, trainy)
test_pool = Pool(testx)
model = CatBoostRegressor(iterations = iterations, learning_rate = learning_rate, depth = depth,
l2_leaf_reg = l2_leaf_reg, min_child_samples = min_child_samples, loss_function='RMSE' )
# param['learning_rate'] = trial.suggest_discrete_uniform("learning_rate", 0.001, 0.02, 0.001)
# param['depth'] = trial.suggest_int('depth', 9, 15)
# param['l2_leaf_reg'] = trial.suggest_discrete_uniform('l2_leaf_reg', 1.0, 5.5, 0.5)
# param['min_child_samples'] = trial.suggest_categorical('min_child_samples', [1, 4, 8, 16, 32])
# cval = cross_val_score(model, trainx, trainy, scoring='accuracy', cv=4)
# return cval.mean()
## fit the model
model.fit(train_pool)
## evaluate performance
yhat = model.predict(test_pool)
score = r2_score(testy, yhat)
return score
def optimize_XGB(trainx2, testx2, trainy2, testy2):
"""Apply Bayesian Optimization to Random Forest parameters."""
def CBR_crossval(iterations, learning_rate, depth, l2_leaf_reg, min_child_samples):
"""Wrapper of RandomForest cross validation.
Notice how we ensure n_estimators and min_samples_split are casted
to integer before we pass them along. Moreover, to avoid max_features
taking values outside the (0, 1) range, we also ensure it is capped
accordingly.
"""
return CBR_cv(iterations = int(iterations),
learning_rate = max(min(learning_rate, 0.5), 1e-3),
depth = int(depth),
l2_leaf_reg = max(min(l2_leaf_reg, 5.5), 1.0),
min_child_samples = int(min_child_samples),
trainx = trainx2, testx= testx2, trainy = trainy2, testy= testy2)
optimizer = BayesianOptimization(
f=CBR_crossval,
pbounds={
"iterations": (50, 500),
"depth": (2, 25),
"learning_rate": (0.01, 0.5),
"l2_leaf_reg": (1.0, 5.5),
"min_child_samples": (1, 50),
},
random_state=1234,
verbose=2
)
optimizer.maximize(n_iter=1000)
print("Final result:", optimizer.max)
if __name__ == "__main__":
trainx2, testx2, trainy2, testy2 = get_data()
print(Colours.green("--- Optimizing XGB ---"))
optimize_XGB(trainx2, testx2, trainy2, testy2)
In model = CatBoostRegressor(iterations = iterations, learning_rate = learning_rate, depth = depth,l2_leaf_reg = l2_leaf_reg, min_child_samples = min_child_samples, loss_function='RMSE' )
change the loss_function
to 'MultiRMSE'