python machine-learning scikit-learn svm cross-validation

Getting Precision and Recall using sklearn

Using the code below, I have the Accuracy . Now I am trying to

1) find the precision and recall for each fold (10 folds total)

2) get the mean for precision

3) get the mean for recall

This could be similar to print(scores) and print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) below.

Any thoughts?

import numpy as np
from sklearn import cross_validation
from sklearn import datasets
from sklearn import svm
from sklearn.model_selection import StratifiedKFold

iris = datasets.load_iris()
skf = StratifiedKFold(n_splits=10)
clf = svm.SVC(kernel='linear', C=1)
scores = cross_validation.cross_val_score(clf, iris.data, iris.target, cv=10)
print(scores)  #[ 1. 0.93333333   1.  1. 0.86666667  1.  0.93333333   1.  1.  1.]
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # Accuracy: 0.97 (+/- 0.09)

Solution

This is a bit different, because cross_val_score can't calculate precision/recall for non-binary classification, so you need to use recision_score, recall_score and make cross-validation manually. Parameter average='micro' calculates global precision/recall.

import numpy as np
from sklearn import cross_validation
from sklearn import datasets
from sklearn import svm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import precision_score, recall_score

iris = datasets.load_iris()
skf = StratifiedKFold(n_splits=10)
clf = svm.SVC(kernel='linear', C=1)

X = iris.data
y = iris.target
precision_scores = []
recall_scores = []
for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    y_pred = clf.fit(X_train, y_train).predict(X_test)
    precision_scores.append(precision_score(y_test, y_pred, average='micro'))
    recall_scores.append(recall_score(y_test, y_pred, average='micro'))

print(precision_scores)
print("Recall: %0.2f (+/- %0.2f)" % (np.mean(precision_scores), np.std(precision_scores) * 2))
print(recall_scores)
print("Recall: %0.2f (+/- %0.2f)" % (np.mean(recall_scores), np.std(recall_scores) * 2))