Search code examples
machine-learningscikit-learnbayesianhyperopt

TypeError: 'float' object cannot be interpreted as an integer | XGBoost | Hyperopt | Bayesian


The error that I'm facing is in the line: best = fmin(fn=objective, The error is: 'float' object cannot be interpreted as an integer.

# Define the objective function
def objective(params):
    model = xgb.XGBRegressor(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    score = -r2_score(y_test, y_pred)  # Negative R2 score for minimization
    return {'loss': score, 'status': STATUS_OK}

# Define the search space for hyperparameters
param_space = {
    'n_estimators': hp.quniform('n_estimators', 100, 300, 1),
    'max_depth': hp.randint('max_depth', 3, 6),  # Use hp.randint for integer choices
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
    'min_child_weight': hp.quniform('min_child_weight', 1, 3, 1),
    'reg_alpha': hp.uniform('reg_alpha', 0, 1),
    'reg_lambda': hp.uniform('reg_lambda', 0, 1)
}

# Create an XGBoost model
model = xgb.XGBRegressor()

# Initialize Hyperopt Trials
trials = Trials()

# Set a random seed for Hyperopt
np.random.seed(42)

# Perform Bayesian hyperparameter tuning
best = fmin(fn=objective,
            space=param_space,
            algo=tpe.suggest,
            max_evals=50,  # Number of optimization iterations
            trials=trials)

# Get the best hyperparameters from the optimization
best_n_estimators = int(best['n_estimators'])
best_max_depth = int(best['max_depth'])  # Cast to an integer
best_learning_rate = best['learning_rate']
best_min_child_weight = int(best['min_child_weight'])
best_reg_alpha = best['reg_alpha']
best_reg_lambda = best['reg_lambda']

# Train the final model with the best hyperparameters
best_params = {
    'n_estimators': best_n_estimators,
    'max_depth': int(best_max_depth),  # Keep it as is
    'learning_rate': best_learning_rate,
    'min_child_weight': int(best_min_child_weight),
    'reg_alpha': best_reg_alpha,
    'reg_lambda': best_reg_lambda
}

final_model = xgb.XGBRegressor(**best_params)
final_model.fit(X_train, y_train)

# Make predictions on training and testing data
y_train_pred = final_model.predict(X_train)
y_test_pred = final_model.predict(X_test)

# Calculate R2 scores for training and testing data
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

# Print the best hyperparameters and R2 scores
print(f'Best Hyperparameters: {best_params}')
print(f'Training R2 Score: {train_r2:.2f}')
print(f'Testing R2 Score: {test_r2:.2f}')

The code is using Hyperopt for Bayesian hyperparameter tuning with XGBoost regressor. However, there are a few issues and missing parts in the code that need to be addressed. Can you please help me with the problem? All the import imports are already included.


Solution

  • The error is being generated because certain parameters in XGBoost like min_child_weight, n_estimators, and max_depth are expected as integers, but the fmin function from Hyperopt sometimes returns them as floats. This can happen due to functions like hp.quniform which, despite their quantized nature, return float values. To overcome this issue you need to ensure that certain hyperparameters are explicitly cast to integers and we can use scope.int() from Hyperopt. Here are the following changes that have been done to the code that you shared - which hopefully should work.

    import xgboost as xgb
    from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, scope
    from sklearn.metrics import r2_score
    
    # Define the objective function
    def objective(params):
        model = xgb.XGBRegressor(**params)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        score = -r2_score(y_test, y_pred)  # Negative R2 score for minimization
        return {'loss': score, 'status': STATUS_OK}
    
    # Define the search space for hyperparameters
    param_space = {
        'n_estimators': scope.int(hp.quniform('n_estimators', 100, 300, 1)),
        'max_depth': hp.randint('max_depth', 3, 6),  # Use hp.randint for integer choices
        'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
        'min_child_weight': scope.int(hp.quniform('min_child_weight', 1, 3, 1)),
        'reg_alpha': hp.uniform('reg_alpha', 0, 1),
        'reg_lambda': hp.uniform('reg_lambda', 0, 1)
    }
    
    # Initialize Hyperopt Trials
    trials = Trials()
    
    # Set a random seed for Hyperopt
    np.random.seed(42)
    
    # Perform Bayesian hyperparameter tuning
    best = fmin(fn=objective,
                space=param_space,
                algo=tpe.suggest,
                max_evals=50,
                trials=trials)
    
    # Extract the best hyperparameters
    best_n_estimators = int(best['n_estimators'])
    best_max_depth = int(best['max_depth'])
    best_learning_rate = best['learning_rate']
    best_min_child_weight = int(best['min_child_weight'])
    best_reg_alpha = best['reg_alpha']
    best_reg_lambda = best['reg_lambda']
    
    best_params = {
        'n_estimators': best_n_estimators,
        'max_depth': best_max_depth,
        'learning_rate': best_learning_rate,
        'min_child_weight': best_min_child_weight,
        'reg_alpha': best_reg_alpha,
        'reg_lambda': best_reg_lambda
    }
    
    # Train the final model with the best hyperparameters
    final_model = xgb.XGBRegressor(**best_params)
    final_model.fit(X_train, y_train)
    
    # Make predictions
    y_train_pred = final_model.predict(X_train)
    y_test_pred = final_model.predict(X_test)
    
    # Calculate R2 scores
    train_r2 = r2_score(y_train, y_train_pred)
    test_r2 = r2_score(y_test, y_test_pred)
    
    # Print results
    print(f'Best Hyperparameters: {best_params}')
    print(f'Training R2 Score: {train_r2:.2f}')
    print(f'Testing R2 Score: {test_r2:.2f}')