Search code examples
pythonscikit-learndecision-treeattributeerrorprecision-recall

AttributeError: 'DecisionTreeClassifier' object has no attribute 'precision_score'


i just recently started learning data science. this is what i wrote:

import pandas as pd
from sklearn.tree import DecisionTreeClassifier
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import KFold
 from sklearn.metrics import precision_score, recall_score
 import numpy as np

 #reading data
 df = pd.read_csv('titanic.csv')
 df['male'] = df['Sex'] == 'male'
 X = df[['Pclass', 'male', 'Age', 'Siblings/Spouses', 'Parents/Children', 'Fare']].values
 y = df['Survived'].values

 #spliting data into train/test
 kf = KFold(n_splits=4+1, shuffle=True, random_state=10)
 tree_scores = {'accuracy_scores':[],'precision_scores':[],'recall_scores':[]}
 logistic_scores = {'accuracy_scores':[],'precision_scores':[],'recall_scores':[]}

 #making the models
 for train_indexes, test_indexes in kf.split(X):
     X_train, X_test = X[train_indexes], X[test_indexes]
     y_train, y_test = y[train_indexes], y[test_indexes]

     tree = DecisionTreeClassifier()
     tree.fit(X_train, y_train)
     tree_scores['accuracy_scores'].append(tree.score(X_test,y_test))
     tree_prediction = tree.predict(X_test)
     #tree_scores['precision_scores'].append(tree.precision_score(y_test,tree_prediction))
     #tree_scores['recall_scores'].append(tree.recall_score(y_test,tree_prediction))

     logistic = LogisticRegression()
     logistic.fit(X_train,y_train)
     logistic_scores['accuracy_scores'].append(logistic.score(X_test,y_test))
     logistic_prediction = logistic.predict(X_test)
     logistic_scores['precision_scores'].append(precision_score(y_test,logistic_prediction))
     logistic_scores['recall_scores'].append(recall_score(y_test,logistic_prediction))

 print("Decision Tree")
 print("  accuracy:", np.mean(tree_scores['accuracy_scores']))
 print("  precision:", np.mean(tree_scores['precision_scores']))
 print("  recall:", np.mean(tree_scores['recall_scores']))
 print("Logistic Regression")
 print("  accuracy:", np.mean(logistic_scores['accuracy_scores']))
 print("  precision:", np.mean(logistic_scores['precision_scores']))
 print("  recall:", np.mean(logistic_scores['recall_scores']))

the two lines commented in for loop give error for both precision and recall, i dont know why. ALthough before when i was running both precision n recall they worked. and i cant seem to find any spelling mistake either.

i wonder if the different python syntaxes are messing with sklearn? because once i tried a combination like this:

X = df.loc['Plass':'Fare'].values
y = df.Survived.values

and it gave errors but when i used normal expected way it worked fine.

(note: the code may be wrongly indented, first time using stackexchange guys.)


Solution

  • DecisionTreeClassifier doesn't have such a method indeed.

    You need to change:

    tree_scores['precision_scores'].append(tree.precision_score(y_test,tree_prediction))
    tree_scores['recall_scores'].append(tree.recall_score(y_test,tree_prediction))
    

    to:

    tree_scores['precision_scores'].append(precision_score(y_test,tree_prediction))
    tree_scores['recall_scores'].append(recall_score(y_test,tree_prediction))
    

    and you're fine to go