Search code examples

Currently only multi-regression, multilabel and survival objectives work with multidimensional target

I used bayes_optto tunse hper-parameter of CatBoostRegressor (from catboost) for regression and got the following error: CatBoostError: catboost/private/libs/target/data_providers.cpp:603: Currently only multi-regression, multilabel and survival objectives work with multidimensional target

Here is the code:

from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from catboost import Pool, CatBoostRegressor
from bayes_opt import BayesianOptimization
from bayes_opt.util import Colours
from sklearn.metrics import accuracy_score

def get_data():
    """ Preparing data ."""
    # trainx, testx, trainy, testy= train_test_split(XN, YN, test_size=0.2, random_state= 31)
    return trainx, testx, trainy, testy

def CBR_cv(iterations, learning_rate, depth, l2_leaf_reg, min_child_samples, trainx, testx, trainy, testy):

    train_pool = Pool(trainx, trainy)
    test_pool = Pool(testx) 

    model = CatBoostRegressor(iterations = iterations, learning_rate = learning_rate, depth = depth,
                              l2_leaf_reg = l2_leaf_reg, min_child_samples = min_child_samples, loss_function='RMSE' )

    # param['learning_rate'] = trial.suggest_discrete_uniform("learning_rate", 0.001, 0.02, 0.001)
    # param['depth'] = trial.suggest_int('depth', 9, 15)
    # param['l2_leaf_reg'] = trial.suggest_discrete_uniform('l2_leaf_reg', 1.0, 5.5, 0.5)
    # param['min_child_samples'] = trial.suggest_categorical('min_child_samples', [1, 4, 8, 16, 32])

    # cval = cross_val_score(model, trainx, trainy, scoring='accuracy', cv=4)
    # return cval.mean()

    ## fit the model
    ## evaluate performance
    yhat = model.predict(test_pool)
    score = r2_score(testy, yhat)
    return score

def optimize_XGB(trainx2, testx2, trainy2, testy2):
    """Apply Bayesian Optimization to Random Forest parameters."""
    def CBR_crossval(iterations, learning_rate, depth, l2_leaf_reg, min_child_samples):
        """Wrapper of RandomForest cross validation.

        Notice how we ensure n_estimators and min_samples_split are casted
        to integer before we pass them along. Moreover, to avoid max_features
        taking values outside the (0, 1) range, we also ensure it is capped
        return CBR_cv(iterations = int(iterations),
                      learning_rate =  max(min(learning_rate, 0.5), 1e-3),
                      depth = int(depth),
                      l2_leaf_reg = max(min(l2_leaf_reg, 5.5), 1.0),
                      min_child_samples = int(min_child_samples),
                      trainx = trainx2, testx= testx2, trainy = trainy2, testy= testy2)

    optimizer = BayesianOptimization(
            "iterations": (50, 500),
            "depth": (2, 25),
            "learning_rate": (0.01, 0.5),
            "l2_leaf_reg": (1.0, 5.5),
            "min_child_samples": (1, 50),

    print("Final result:", optimizer.max)

if __name__ == "__main__":
    trainx2, testx2, trainy2, testy2 = get_data()

    print("--- Optimizing XGB ---"))
    optimize_XGB(trainx2, testx2, trainy2, testy2)


  • In model = CatBoostRegressor(iterations = iterations, learning_rate = learning_rate, depth = depth,l2_leaf_reg = l2_leaf_reg, min_child_samples = min_child_samples, loss_function='RMSE' ) change the loss_function to 'MultiRMSE'