I'm trying to use a for-loop to send different hyperparameters to RandomForestRegressor()
I'm using the code below to create hyperparams
(list? array?) to control the loop. I keep getting the error listed after the code which occurs when I try to fit the model.
Is what I'm trying to do possible, and if so, how would I do it?
hyperparams = [{
'n_estimators':460,
'bootstrap':False,
'criterion':'poisson',
'max_depth':60,
'max_features':2,
'min_samples_leaf':1,
'min_samples_split':2
},
{
'n_estimators':60,
'bootstrap':False,
'criterion':'friedman_mse',
'max_depth':90,
'max_features':3,
'min_samples_leaf':1,
'min_samples_split':2
}]
for hparams in hyperparams:
model_regressor = RandomForestRegressor(hparams)
print(model_regressor.get_params())
print(model_regressor.get_params())
total_r2_score_value = 0
total_mean_squared_error_array = 0
total_explained_variance_score_value = 0
total_max_error_value = 0
total_mean_absolute_error_value = 0
total_mean_absolute_percent_value = 0
total_median_absolute_error_value = 0
total_mean_tweedie_deviance_value = 0
total_mean_pinball_loss_value = 0
total_d2_pinball_score_value = 0
total_d2_absolute_error_score_value = 0
total_tests = 10
for index in range(1, total_tests+1):
# model fitting
model_regressor.fit(X_train, y_train)
ERROR:
Traceback (most recent call last):
File "c:\Projects\Python\DATA260\data_260_python\src\DATA_280A_Course\src\week6_project_work\jess_obesity_dataset_optimized_RFR.py", line 283, in <module>
main()
File "c:\Projects\Python\DATA260\data_260_python\src\DATA_280A_Course\src\week6_project_work\jess_obesity_dataset_optimized_RFR.py", line 210, in main
model_regressor.fit(X_train, y_train)
File "C:\Users\Jess\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 1144, in wrapper
estimator._validate_params()
File "C:\Users\Jess\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 637, in _validate_params
validate_parameter_constraints(
File "C:\Users\Jess\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'n_estimators' parameter of RandomForestRegressor must be an int in the range [1, inf). Got {'n_estimators': 460, 'bootstrap': False, 'criterion': 'poisson', 'max_depth': 60, 'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2} instead.
I think you are looping over json objects as you can see in this example:
from sklearn.ensemble import RandomForestRegressor
hyperparams = [{
'n_estimators': 460,
'bootstrap': False,
'criterion': 'poisson',
'max_depth': 60,
'max_features': 2,
'min_samples_leaf': 1,
'min_samples_split': 2
},
{
'n_estimators': 60,
'bootstrap': False,
'criterion': 'friedman_mse',
'max_depth': 90,
'max_features': 3,
'min_samples_leaf': 1,
'min_samples_split': 2
}]
for hparams in hyperparams:
model_regressor = RandomForestRegressor(hparams)
print(model_regressor.get_params())
{'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'squared_error', 'max_depth': None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': {'n_estimators': 460, 'bootstrap': False, 'criterion': 'poisson', 'max_depth': 60, 'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2}, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}
{'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'squared_error', 'max_depth': None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': {'n_estimators': 460, 'bootstrap': False, 'criterion': 'poisson', 'max_depth': 60, 'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2}, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}
{'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'squared_error', 'max_depth': None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': {'n_estimators': 60, 'bootstrap': False, 'criterion': 'friedman_mse', 'max_depth': 90, 'max_features': 3, 'min_samples_leaf': 1, 'min_samples_split': 2}, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}
{'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'squared_error', 'max_depth': None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': {'n_estimators': 60, 'bootstrap': False, 'criterion': 'friedman_mse', 'max_depth': 90, 'max_features': 3, 'min_samples_leaf': 1, 'min_samples_split': 2}, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}
In the trace, the key 'n_estimators' is associated to a JSON objet... And the other key have not been associated with any value.
Look perhaps at this code with one element in the list :
hyperparams = [{
'n_estimators': 460,
'criterion': 'poisson',
'bootstrap': False,
'max_depth': 60,
'max_features': 2,
'min_samples_leaf': 1,
'min_samples_split': 2
},
]
for hparams in hyperparams:
model_regressor = RandomForestRegressor(hparams['n_estimators'], criterion=hparams['criterion'])
print(model_regressor.get_params())
{'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'poisson', 'max_depth': None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 460, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}
You have to look in documentation for parameter of RandomForestRegressor. First one is n_estimators and other one are optional...
And if you want all parameter the good idea is to unpack JSON objects:
model_regressor = RandomForestRegressor(**hparams)