Search code examples
pythonmachine-learningscikit-learnsvmcross-validation

Getting Precision and Recall using sklearn


Using the code below, I have the Accuracy . Now I am trying to

1) find the precision and recall for each fold (10 folds total)

2) get the mean for precision

3) get the mean for recall

This could be similar to print(scores) and print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) below.

Any thoughts?

import numpy as np
from sklearn import cross_validation
from sklearn import datasets
from sklearn import svm
from sklearn.model_selection import StratifiedKFold

iris = datasets.load_iris()
skf = StratifiedKFold(n_splits=10)
clf = svm.SVC(kernel='linear', C=1)
scores = cross_validation.cross_val_score(clf, iris.data, iris.target, cv=10)
print(scores)  #[ 1. 0.93333333   1.  1. 0.86666667  1.  0.93333333   1.  1.  1.]
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # Accuracy: 0.97 (+/- 0.09)

Solution

  • This is a bit different, because cross_val_score can't calculate precision/recall for non-binary classification, so you need to use recision_score, recall_score and make cross-validation manually. Parameter average='micro' calculates global precision/recall.

    import numpy as np
    from sklearn import cross_validation
    from sklearn import datasets
    from sklearn import svm
    from sklearn.model_selection import StratifiedKFold
    from sklearn.metrics import precision_score, recall_score
    
    iris = datasets.load_iris()
    skf = StratifiedKFold(n_splits=10)
    clf = svm.SVC(kernel='linear', C=1)
    
    X = iris.data
    y = iris.target
    precision_scores = []
    recall_scores = []
    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
    
        y_pred = clf.fit(X_train, y_train).predict(X_test)
        precision_scores.append(precision_score(y_test, y_pred, average='micro'))
        recall_scores.append(recall_score(y_test, y_pred, average='micro'))
    
    print(precision_scores)
    print("Recall: %0.2f (+/- %0.2f)" % (np.mean(precision_scores), np.std(precision_scores) * 2))
    print(recall_scores)
    print("Recall: %0.2f (+/- %0.2f)" % (np.mean(recall_scores), np.std(recall_scores) * 2))