I have tables assessing the quality of classification for 6 models at the same time. It annoys me that the code takes up so much space. That is why I would like to ask if anyone could introduce this with a loop.
from prettytable import PrettyTable
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier
NBC = GaussianNB()
LRE = LogisticRegression(solver='lbfgs')
GBC = GradientBoostingClassifier()
RFC = RandomForestClassifier()
LGBM = LGBMClassifier()
CBC = CatBoostClassifier(verbose=0, n_estimators=100)
classifiers = [NBC,LRE,GBC,RFC,LGBM,CBC]
for cls in classifiers:
cls.fit(X_train, y_train)
AUC_A1 = np.round(metrics.roc_auc_score(y_train,NBC.predict_proba(X_train)[:,1]),decimals=3)
AUC_B1 = np.round(metrics.roc_auc_score(y_test,NBC.predict_proba(X_test)[:,1]),decimals=3)
AUC_A2 = np.round(metrics.roc_auc_score(y_train,LRE.predict_proba(X_train)[:,1]),decimals=3)
AUC_B2 = np.round(metrics.roc_auc_score(y_test,LRE.predict_proba(X_test)[:,1]),decimals=3)
AUC_A3 = np.round(metrics.roc_auc_score(y_train,GBC.predict_proba(X_train)[:,1]),decimals=3)
AUC_B3 = np.round(metrics.roc_auc_score(y_test,GBC.predict_proba(X_test)[:,1]),decimals=3)
AUC_A4 = np.round(metrics.roc_auc_score(y_train,RFC.predict_proba(X_train)[:,1]),decimals=3)
AUC_B4 = np.round(metrics.roc_auc_score(y_test,RFC.predict_proba(X_test)[:,1]),decimals=3)
AUC_A5 = np.round(metrics.roc_auc_score(y_train,LGBM.predict_proba(X_train)[:,1]),decimals=3)
AUC_B5 = np.round(metrics.roc_auc_score(y_test,LGBM.predict_proba(X_test)[:,1]),decimals=3)
AUC_A6 = np.round(metrics.roc_auc_score(y_train,CBC.predict_proba(X_train)[:,1]),decimals=3)
AUC_B6 = np.round(metrics.roc_auc_score(y_test,CBC.predict_proba(X_test)[:,1]),decimals=3)
t = PrettyTable(['Name', 'GN','LogReg','GradBoos','RandFor','LGBM','CatBoost'])
t.add_row(['AUC_train: ', AUC_A1,AUC_A2,AUC_A3,AUC_A4,AUC_A5,AUC_A6])
t.add_row(['AUC_test: ', AUC_B1,AUC_B2,AUC_B3,AUC_B4,AUC_A5,AUC_A6])
print(t)
try to define a function and iterate with a for loop
from prettytable import PrettyTable
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from lightgbm import LGBMClassifier
NBC = GaussianNB()
LRE = LogisticRegression(solver='lbfgs')
GBC = GradientBoostingClassifier()
RFC = RandomForestClassifier()
LGBM = LGBMClassifier()
CBC = CatBoostClassifier(verbose=0, n_estimators=100)
classifiers = [NBC,LRE,GBC,RFC,LGBM,CBC]
AUC_train = ['AUC_train: ']
AUC_test = ['AUC_test: ']
def compute_metric(model):
auc_train = np.round(metrics.roc_auc_score(y_train,model.predict_proba(X_train)[:,1]),decimals=3)
auc_test = np.round(metrics.roc_auc_score(y_test,model.predict_proba(X_test)[:,1]),decimals=3)
return auc_train, auc_test
for cls in classifiers:
cls.fit(X_train, y_train)
results = compute_metric(cls)
AUC_train.append(results[0])
AUC_test.append(results[1])
t = PrettyTable(['Name', 'GN','LogReg','GradBoos','RandFor','LGBM','CatBoost'])
t.add_row(AUC_train)
t.add_row(AUC_test)
print(t)