All my models are initialized with the below:
def intiailize_clf_models(self):
model = RandomForestClassifier(random_state=42)
self.clf_models.append((model))
model = ExtraTreesClassifier(random_state=42)
self.clf_models.append((model))
model = MLPClassifier(random_state=42)
self.clf_models.append((model))
model = LogisticRegression(random_state=42)
self.clf_models.append((model))
model = xgb.XGBClassifier(random_state=42)
self.clf_models.append((model))
model = lgb.LGBMClassifier(random_state=42)
self.clf_models.append((model))
Which loops through the models and performs k fold cross validation with :
def kfold_cross_validation(self):
clf_models = self.get_models()
models = []
self.results = {}
for model in clf_models:
self.current_model_name = model.__class__.__name__
cross_validate = cross_val_score(model, self.xtrain, self.ytrain, cv=4)
self.mean_cross_validation_score = cross_validate.mean()
print("Kfold cross validation for", self.current_model_name)
self.results[self.current_model_name] = self.mean_cross_validation_score
models.append(model)
Anytime i run this cross validation, i get a different result even after i have set a random state on the different models. I would like to know why i get different results in cross validation and what can be done about it
I found the solution to my question.
Setting a random seed with the below solved the problem:
seed = np.random.seed(22)