I have a function that does classification. I am encountering a problem when exporting data. Here is the stack trace:
[Errno 36] File name too long: "testLogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n intercept_scaling=1, l1_ratio=None, max_iter=100,\n multi_class='warn', n_jobs=None, penalty='l2',\n random_state=None, solver='warn', tol=0.0001, verbose=0,\n warm_start=False).xlsx"
My code :
def classifieur(X, y):
X = matrix(X)
model_l = LinearSVC()
model_m = MultinomialNB()
model_lr = LogisticRegression()
model_r = RandomForestClassifier()
model_k = KNeighborsClassifier()
models = [model_l, model_m, model_lr, model_r, model_k]
cv_splitter = KFold(n_splits=10, shuffle=False, random_state=None)
for model in models:
y_pred = cross_val_predict(model, X, y, cv=cv_splitter)
print("Model: {}".format(model))
print("Accuracy: {}".format(accuracy_score(y, y_pred)))
# export
res = pd.DataFrame()
res['Expected Output'] = y
res['Predicted Output'] = y_pred
print(output.head())
res.to_excel("test{}.xlsx".format(model))
classifieur(X, y)
The function works. It is just the name of the file that poses a problem. I am working in a linux environment.
Create a dict with model name and model value and use the model name as file name:
def classifieur(X, y):
X = matrix(X)
model_l = LinearSVC()
model_m = MultinomialNB()
model_lr = LogisticRegression()
model_r = RandomForestClassifier()
model_k = KNeighborsClassifier()
models = {'model_l': model_l, 'model_m': model_m, 'model_lr': model_lr, 'model_r': model_r, 'model_k': model_k}
cv_splitter = KFold(n_splits=10, shuffle=False, random_state=None)
for model_name, model in models.items():
y_pred = cross_val_predict(model, X, y, cv=cv_splitter)
print("Model: {}".format(model))
print("Accuracy: {}".format(accuracy_score(y, y_pred)))
# export
res = pd.DataFrame()
res['Expected Output'] = y
res['Predicted Output'] = y_pred
print(output.head())
res.to_excel("test{}.xlsx".format(model_name))