Search code examples
pythonscikit-learnparameter-passingrandom-forest

How to Pass Hyperparameters to RandomForestRegressor as a Single Variable in For Loop


I'm trying to use a to send different hyperparameters to RandomForestRegressor()

I'm using the code below to create hyperparams (list? array?) to control the loop. I keep getting the error listed after the code which occurs when I try to fit the model.

Is what I'm trying to do possible, and if so, how would I do it?

hyperparams = [{
                'n_estimators':460,
                'bootstrap':False,
                'criterion':'poisson',
                'max_depth':60,
                'max_features':2,
                'min_samples_leaf':1,
                'min_samples_split':2
            },
            {
                'n_estimators':60,
                'bootstrap':False,
                'criterion':'friedman_mse',
                'max_depth':90,
                'max_features':3,
                'min_samples_leaf':1,
                'min_samples_split':2
            }]
for hparams in hyperparams:
    model_regressor = RandomForestRegressor(hparams)
    print(model_regressor.get_params())
    print(model_regressor.get_params())

    total_r2_score_value = 0
    total_mean_squared_error_array = 0

    total_explained_variance_score_value = 0
    total_max_error_value = 0
    total_mean_absolute_error_value = 0
    total_mean_absolute_percent_value = 0
    total_median_absolute_error_value = 0
    total_mean_tweedie_deviance_value = 0
    total_mean_pinball_loss_value = 0
    total_d2_pinball_score_value = 0
    total_d2_absolute_error_score_value = 0
    
    total_tests = 10
    for index in range(1, total_tests+1):
        
        # model fitting
        model_regressor.fit(X_train, y_train)

ERROR:

Traceback (most recent call last):
File "c:\Projects\Python\DATA260\data_260_python\src\DATA_280A_Course\src\week6_project_work\jess_obesity_dataset_optimized_RFR.py", line 283, in <module>      
  main()
File "c:\Projects\Python\DATA260\data_260_python\src\DATA_280A_Course\src\week6_project_work\jess_obesity_dataset_optimized_RFR.py", line 210, in main
  model_regressor.fit(X_train, y_train)
File "C:\Users\Jess\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 1144, in wrapper
  estimator._validate_params()
File "C:\Users\Jess\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 637, in _validate_params
  validate_parameter_constraints(
File "C:\Users\Jess\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints   
  raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'n_estimators' parameter of RandomForestRegressor must be an int in the range [1, inf). Got {'n_estimators': 460, 'bootstrap': False, 'criterion': 'poisson', 'max_depth': 60, 'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2} instead.

Solution

  • I think you are looping over json objects as you can see in this example:

    from sklearn.ensemble import RandomForestRegressor
    
    hyperparams = [{
        'n_estimators': 460,
        'bootstrap': False,
        'criterion': 'poisson',
        'max_depth': 60,
        'max_features': 2,
        'min_samples_leaf': 1,
        'min_samples_split': 2
    },
        {
            'n_estimators': 60,
            'bootstrap': False,
            'criterion': 'friedman_mse',
            'max_depth': 90,
            'max_features': 3,
            'min_samples_leaf': 1,
            'min_samples_split': 2
        }]
    for hparams in hyperparams:
        model_regressor = RandomForestRegressor(hparams)
        print(model_regressor.get_params())
    
    
    {'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'squared_error', 'max_depth': None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': {'n_estimators': 460, 'bootstrap': False, 'criterion': 'poisson', 'max_depth': 60, 'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2}, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}
    {'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'squared_error', 'max_depth': None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': {'n_estimators': 460, 'bootstrap': False, 'criterion': 'poisson', 'max_depth': 60, 'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2}, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}
    {'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'squared_error', 'max_depth': None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': {'n_estimators': 60, 'bootstrap': False, 'criterion': 'friedman_mse', 'max_depth': 90, 'max_features': 3, 'min_samples_leaf': 1, 'min_samples_split': 2}, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}
    {'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'squared_error', 'max_depth': None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': {'n_estimators': 60, 'bootstrap': False, 'criterion': 'friedman_mse', 'max_depth': 90, 'max_features': 3, 'min_samples_leaf': 1, 'min_samples_split': 2}, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}
    

    In the trace, the key 'n_estimators' is associated to a JSON objet... And the other key have not been associated with any value.

    Look perhaps at this code with one element in the list :

    hyperparams = [{
        'n_estimators': 460,
        'criterion': 'poisson',
        'bootstrap': False,
        'max_depth': 60,
        'max_features': 2,
        'min_samples_leaf': 1,
        'min_samples_split': 2
    },
       ]
    for hparams in hyperparams:
        model_regressor = RandomForestRegressor(hparams['n_estimators'], criterion=hparams['criterion'])
        print(model_regressor.get_params())
    
    {'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'poisson', 'max_depth': None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 460, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}
    

    You have to look in documentation for parameter of RandomForestRegressor. First one is n_estimators and other one are optional...

    And if you want all parameter the good idea is to unpack JSON objects:

    model_regressor = RandomForestRegressor(**hparams)