Search code examples
pythonscikit-learnxgboostpython-classxgbregressor

how to properly initialize a child class of XGBRegressor?


I want to build a quantile regressor based on XGBRegressor, the scikit-learn wrapper class for XGBoost. I have the following two versions: the second version is simply trimmed from the first one, but it no longer works.

I am wondering why I need to put every parameters of XGBRegressor in its child class's initialization? What if I just want to take all the default parameter values except for max_depth?

(My XGBoost is of version 1.4.2.)

No.1 the full version that works as expected:

class XGBoostQuantileRegressor(XGBRegressor):
    def __init__(
            self, quant_alpha, n_estimators=100, max_depth=3, base_score=0.5, gpu_id=None,
            booster='gbtree', colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1, gamma=0,
            importance_type=None, interaction_constraints=None, n_jobs=1, random_state=0,
            tree_method='auto', missing=1, objective='reg:linear', learning_rate=0.1,
            max_delta_step=0, min_child_weight=1, monotone_constraints=None, num_parallel_tree=1,
            reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=0.5, validate_parameters=False,
            verbosity=0
    ):
        self.quant_alpha = quant_alpha
        super().__init__(
            n_jobs=n_jobs, random_state=random_state, tree_method=tree_method, missing=1,
            n_estimators=n_estimators, max_depth=max_depth, objective=objective,
            base_score=base_score, booster=booster, colsample_bylevel=colsample_bylevel,
            colsample_bynode=colsample_bynode, colsample_bytree=colsample_bytree, gamma=gamma,
            gpu_id=gpu_id, importance_type=importance_type, learning_rate=learning_rate,
            interaction_constraints=interaction_constraints, max_delta_step=max_delta_step,
            min_child_weight=min_child_weight, monotone_constraints=monotone_constraints,
            num_parallel_tree=num_parallel_tree, reg_alpha=reg_alpha, reg_lambda=reg_lambda,
            scale_pos_weight=scale_pos_weight, validate_parameters=validate_parameters,
            verbosity=verbosity, subsample=subsample)

    def fit(self, X, y):
        super().set_params(
            objective=partial(XGBoostQuantileRegressor.log_cosh_loss, alpha=self.quant_alpha))
        super().fit(X, y)
        return self

    def predict(self, X):
        return super().predict(X)

    @staticmethod
    def log_cosh_loss(y_true, y_pred, alpha):
        err = y_pred - y_true
        err = np.where(err < 0, alpha * err, (1 - alpha) * err)
        grad = np.tanh(err)
        hess = 1 / np.cosh(err)**2
        return grad, hess

No. 2 the trimmed version that no longer works:

class XGBoostQuantileRegressor(XGBRegressor):
    def __init__(self, quant_alpha, max_depth=3):
        self.quant_alpha = quant_alpha
        super().__init__(max_depth=max_depth)

    def fit(self, X, y):
        super().set_params(
            objective=partial(XGBoostQuantileRegressor.log_cosh_loss, alpha=self.quant_alpha))
        super().fit(X, y)
        return self

    def predict(self, X):
        return super().predict(X)

    @staticmethod
    def log_cosh_loss(y_true, y_pred, alpha):
        err = y_pred - y_true
        err = np.where(err < 0, alpha * err, (1 - alpha) * err)
        grad = np.tanh(err)
        hess = 1 / np.cosh(err)**2
        return grad, hess

Here is the traceback:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/temp.py in <module>
      230 z1 = xgboost_quantile_regressor(alpha=0.95)[0][1]
----> 231 z1.fit(x_trainval, y_trainval)
      232 pred_y1 = z1.predict(x_trainval)
      233 
      234 z2 = xgboost_quantile_regressor(alpha=0.05)[0][1]

/temp.py in fit(self, X, y)

~/.local/lib/python3.9/site-packages/optuna/integration/sklearn.py in fit(self, X, y, groups, **fit_params)
    873         )
    874 
--> 875         self.study_.optimize(
    876             objective, n_jobs=self.n_jobs, n_trials=self.n_trials, timeout=self.timeout
    877         )

~/.local/lib/python3.9/site-packages/optuna/study/study.py in optimize(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
    398             )
    399 
--> 400         _optimize(
    401             study=self,
    402             func=func,

~/.local/lib/python3.9/site-packages/optuna/study/_optimize.py in _optimize(study, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
     64     try:
     65         if n_jobs == 1:
---> 66             _optimize_sequential(
     67                 study,
     68                 func,

~/.local/lib/python3.9/site-packages/optuna/study/_optimize.py in _optimize_sequential(study, func, n_trials, timeout, catch, callbacks, gc_after_trial, reseed_sampler_rng, time_start, progress_bar)
    161 
    162         try:
--> 163             trial = _run_trial(study, func, catch)
    164         except Exception:
    165             raise

~/.local/lib/python3.9/site-packages/optuna/study/_optimize.py in _run_trial(study, func, catch)
    262 
    263     if state == TrialState.FAIL and func_err is not None and not isinstance(func_err, catch):
--> 264         raise func_err
    265     return trial
    266 

~/.local/lib/python3.9/site-packages/optuna/study/_optimize.py in _run_trial(study, func, catch)
    211 
    212     try:
--> 213         value_or_values = func(trial)
    214     except exceptions.TrialPruned as e:
    215         # TODO(mamu): Handle multi-objective cases.

~/.local/lib/python3.9/site-packages/optuna/integration/sklearn.py in __call__(self, trial)
    219     def __call__(self, trial: Trial) -> float:
    220 
--> 221         estimator = clone(self.estimator)
    222         params = self._get_params(trial)
    223 

~/.local/lib/python3.9/site-packages/sklearn/base.py in clone(estimator, safe)
     80     for name, param in new_object_params.items():
     81         new_object_params[name] = clone(param, safe=False)
---> 82     new_object = klass(**new_object_params)
     83     params_set = new_object.get_params(deep=False)
     84 

TypeError: __init__() got an unexpected keyword argument 'objective'

Solution

  • I am not an expert with scikit-learn but it seems that one of the requirements of various objects used by this framework is that they can be cloned by calling the sklearn.base.clone method. This appears to be something that the existing XGBRegressor class does, so is something your subclass of XGBRegressor must also do.

    What may help is to pass any other unexpected keyword arguments as a **kwargs parameter. In your constructor, kwargs will contain a dict of all of the other keyword parameters that weren't assigned to other constructor parameters. You can pass this dict of parameters on to the call to the superclass constructor by referring to them as **kwargs again: this will cause Python to expand them out:

    class XGBoostQuantileRegressor(XGBRegressor):
        def __init__(self, quant_alpha, max_depth=3, **kwargs):
            self.quant_alpha = quant_alpha
            super().__init__(max_depth=max_depth, **kwargs)
    
        # other methods unchanged and omitted for brevity.
    

    I have answered a question from you previously, and I will reiterate here two points I made in that answer.

    Firstly, I am not a data scientist. I have never worked with scikit-learn before, so I have not tested the code I posted above.

    Secondly, this is another situation where I believe you should prefer composition over inheritance. You have chosen to use inheritance, and you have hit a problem because of that choice. If your class did not inherit from XGBRegressor but instead had simply created an XGBRegressor and stored it in an attribute, e.g. using a line self.xgb_regressor = XGBRegressor(max_depth=max_depth), and the calls to predict and fit had called self.xgb_regressor.predict and self.xgb_regressor.fit, you wouldn't have had this problem.