Search code examples
machine-learningscikit-learnhyperparameterslightgbmoptuna

Optuna Light GBM classifier : All estimators failed to fit


I am currently working on the "Bank Scoring Case" Kaggle competition (https://www.kaggle.com/competitions/bank-scoring-case). And I try to hoptimize the hyperparameter of my Light GBM classifier model with Optuna and custom score to reduce False negative . Unfortunately I get a "NotFittedError: All estimators failed to fit" error whenever I am using optuna and I cannot figure out why that is. Any help is much appreciated.

import optuna
from optuna.distributions import IntDistribution as IntUniDist
from optuna.distributions import UniformDistribution as UniDist
from optuna.distributions import LogUniformDistribution as LogUniDist
from optuna.distributions import CategoricalDistribution as CatDist
from lightgbm import LGBMClassifier
from sklearn.pipeline import Pipeline
from optuna.distributions import FloatDistribution as fl

LogUniDist(1e-5, 1e0).to_internal_repr(1e-4)


param_distributions = { 
                       'clf__n_estimators': fl(10, 2000),
                       'clf__num_leaves': fl(10, 100),
                       'clf__reg_alpha': fl(1e-5, 1e0),
                       'clf__reg_lambda':  fl(1e-3, 1e0),
                       'clf__learning_rate': fl(0.01,0.1),
                       'clf__max_depth': fl(-1,20),
                        }



model=LGBMClassifier(random_state=8)#,class_weight='balanced'
model_optuna=Pipeline(steps=[#('prepro',preprocessor_ord),
                                 ('clf',model)])

from sklearn.metrics import confusion_matrix
from sklearn.metrics import fbeta_score, make_scorer

def customScore(y_test, y_pred):
    #beta a choisir entre 0 et 1 on prend 0.7
    beta=0.7 #le poids beta qui donnera plus de poids au FN 
    #et 1-beta moins de poids au FP 
    logi_confusion = confusion_matrix(y_test, y_pred) 
    tn, fp, fn, tp = logi_confusion.ravel()
    alpha=10*fn+fp
    return  alpha

#scorer = make_scorer(customScore ,greater_is_better = False)   
#création de notre propre score grace à make_scorer 



opt_search_hp = optuna.integration.OptunaSearchCV(
                                                  model_optuna,
                                                  param_distributions,
                                                  n_trials=None,
                                                  verbose=3,
                                                  refit=True,
                                                  timeout=1800,
                                                  scoring= make_scorer(metric_custom_perte,needs_proba=True), 
                                                  cv=4,
                                                  return_train_score=False
                                                  )


opt_search_hp.fit(x_train,y_train)

The error I get is as follows:

[I 2023-02-20 16:17:29,106] A new study created in memory with name: no-name-55879412-2d54-486c-af2b-3c4d74911d05
[I 2023-02-20 16:17:29,108] Searching the best hyperparameters using 128290 samples...
[W 2023-02-20 16:17:33,370] Trial 0 failed with parameters: {'clf__n_estimators': 1152.9940273025156, 'clf__num_leaves': 78.72037192055771, 'clf__reg_alpha': 0.16664156644372716, 'clf__reg_lambda': 0.7704390387651396, 'clf__learning_rate': 0.0994775554864861, 'clf__max_depth': 2.3794054350476657} because of the following error: NotFittedError('All estimators failed to fit').
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.7/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/opt/anaconda3/lib/python3.7/site-packages/optuna/integration/sklearn.py", line 239, in __call__
    scoring=self.scoring,
  File "/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py", line 292, in cross_validate
    _insert_error_scores(results, error_score)
  File "/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py", line 331, in _insert_error_scores
    raise NotFittedError("All estimators failed to fit")
sklearn.exceptions.NotFittedError: All estimators failed to fit
[W 2023-02-20 16:17:33,376] Trial 0 failed with value None.
---------------------------------------------------------------------------
NotFittedError                            Traceback (most recent call last)
<ipython-input-113-2e14a5b0857d> in <module>
----> 1 opt_search_hp.fit(x_train,y_train)

/opt/anaconda3/lib/python3.7/site-packages/optuna/integration/sklearn.py in fit(self, X, y, groups, **fit_params)
    903             n_trials=self.n_trials,
    904             timeout=self.timeout,
--> 905             callbacks=self.callbacks,
    906         )
    907 

/opt/anaconda3/lib/python3.7/site-packages/optuna/study/study.py in optimize(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
    432             callbacks=callbacks,
    433             gc_after_trial=gc_after_trial,
--> 434             show_progress_bar=show_progress_bar,
    435         )
    436 

/opt/anaconda3/lib/python3.7/site-packages/optuna/study/_optimize.py in _optimize(study, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
     74                 reseed_sampler_rng=False,
     75                 time_start=None,
---> 76                 progress_bar=progress_bar,
     77             )
     78         else:

/opt/anaconda3/lib/python3.7/site-packages/optuna/study/_optimize.py in _optimize_sequential(study, func, n_trials, timeout, catch, callbacks, gc_after_trial, reseed_sampler_rng, time_start, progress_bar)
    161 
    162         try:
--> 163             frozen_trial = _run_trial(study, func, catch)
    164         finally:
    165             # The following line mitigates memory problems that can be occurred in some

/opt/anaconda3/lib/python3.7/site-packages/optuna/study/_optimize.py in _run_trial(study, func, catch)
    249         and not isinstance(func_err, catch)
    250     ):
--> 251         raise func_err
    252     return frozen_trial
    253 

/opt/anaconda3/lib/python3.7/site-packages/optuna/study/_optimize.py in _run_trial(study, func, catch)
    198     with get_heartbeat_thread(trial._trial_id, study._storage):
    199         try:
--> 200             value_or_values = func(trial)
    201         except exceptions.TrialPruned as e:
    202             # TODO(mamu): Handle multi-objective cases.

/opt/anaconda3/lib/python3.7/site-packages/optuna/integration/sklearn.py in __call__(self, trial)
    237                 groups=self.groups,
    238                 return_train_score=self.return_train_score,
--> 239                 scoring=self.scoring,
    240             )
    241 

/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
    290     # the correct key.
    291     if callable(scoring):
--> 292         _insert_error_scores(results, error_score)
    293 
    294     results = _aggregate_score_dicts(results)

/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in _insert_error_scores(results, error_score)
    329 
    330     if successful_score is None:
--> 331         raise NotFittedError("All estimators failed to fit")
    332 
    333     if isinstance(successful_score, dict):

# NotFittedError: All estimators failed to fit


Solution

  • Some of your parameters should be of type int but are defined as float. You should use IntDistribution instead:

    from optuna.distributions import IntDistribution as intd
    
    param_distributions = { 
                       'clf__n_estimators': intd(10, 2000),
                       'clf__num_leaves': intd(10, 100),
                       'clf__reg_alpha': fl(1e-5, 1e0),
                       'clf__reg_lambda':  fl(1e-3, 1e0),
                       'clf__learning_rate': fl(0.01,0.1),
                       'clf__max_depth': intd(-1,20),
                        }