Search code examples
pythonnaivebayes

AttributeError: 'GaussianNB' object has no attribute 'accuracy_score'


While executing the following I got the error "AttributeError: 'GaussianNB' object has no attribute 'accuracy_score' "

    import sys
    from time import time
    sys.path.append("../tools/")
    from email_preprocess import preprocess
    features_train, features_test, labels_train, labels_test = preprocess()
    from sklearn.naive_bayes import GaussianNB
    clf = GaussianNB()
    clf.fit(features_train, labels_train)
    pred=clf.predict(features_test)
    from sklearn.metrics import accuracy_score
    print clf.accuracy_score(pred, labels_test)

and the email_preprocess.py is as follows

   import pickle
   import cPickle
   import numpy
   from sklearn import cross_validation
   from sklearn.feature_extraction.text import TfidfVectorizer
   from sklearn.feature_selection import SelectPercentile, f_classif
   def preprocess(words_file = "../tools/word_data.pkl", 
   authors_file="../tools/email_authors.pkl"):
       authors_file_handler = open(authors_file, "r")
       authors = pickle.load(authors_file_handler)
       authors_file_handler.close()
       words_file_handler = open(words_file, "r")
       word_data = cPickle.load(words_file_handler)
       words_file_handler.close()
       features_train, features_test, labels_train, labels_test = 
       cross_validation.train_test_split(word_data, authors, 
       test_size=0.5,random_state=42)
       vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5,
                             stop_words='english')
       features_train_transformed = vectorizer.fit_transform(features_train)
       features_test_transformed  = vectorizer.transform(features_test)
       selector = SelectPercentile(f_classif, percentile=10)
       selector.fit(features_train_transformed, labels_train)
       features_train_transformed = 
       selector.transform(features_train_transformed).toarray()
       features_test_transformed  = 
       selector.transform(features_test_transformed).toarray()
       print "no. of Chris training emails:", sum(labels_train)
       print "no. of Sara training emails:", len(labels_train)-
       sum(labels_train)
       return features_train_transformed, features_test_transformed, 
       labels_train, labels_test

Can anyone help me with this?


Solution

  • You have:

    from sklearn.metrics import accuracy_score
    print clf.accuracy_score(pred, labels_test)
    

    you need to remove the leading clf. as that is trying to find accuracy_score in the GaussianNB instance.

    Try:

    from sklearn.metrics import accuracy_score
    print accuracy_score(pred, labels_test)