Search code examples
python-3.xclassclassificationtypeerrorweka

error with classifier.score() python-weka


I'm trying to do a weka classifier for python to use python libraries with weka models, like @fracpete do on https://github.com/fracpete/sklearn-weka-plugin, but by my own.

Right now I have that and it works for predictions, use the SHAP library, etc.

from sklearn.base import BaseEstimator
from weka.classifiers import Classifier
from weka.core.dataset import Attribute, Instance, Instances
from sklearn.metrics import accuracy_score

class weka_classifier(BaseEstimator):
    
    def __init__(self, classifier = None, dataset = None, index = None):
        #Classifier: es el pww3/weka model
        #Dataset: Data for fit the model
        if classifier is not None:
            self.classifier = classifier
            
        elif dataset is not None:
            self.dataset = dataset
            self.dataset.class_is_last()
               
    def fit(self, X, y):
        return self.fit()
    
    def fit(self):
        return self.classifier.build_classifier(self.dataset)
    
    def predict_instance(self,x):
        x.append(0.0)
        inst = Instance.create_instance(x,classname='weka.core.DenseInstance', weight=1.0)
        inst.dataset = self.dataset
        return self.classifier.classify_instance(inst)
    
    def predict_proba_instance(self,x):
        x.append(0.0)
        inst = Instance.create_instance(x,classname='weka.core.DenseInstance', weight=1.0)
        inst.dataset = self.dataset
        return self.classifier.distribution_for_instance(inst)
    
    def predict_proba(self,X):
        prediction = []
        
        for i in range(X.shape[0]):
            instance = []
            for j in range(X.shape[1]):
                instance.append(X[i][j])
            instance.append(0.0)
            instance = Instance.create_instance(instance,classname='weka.core.DenseInstance', weight=1.0)
            instance.dataset=self.dataset
            
            prediction.append(self.classifier.distribution_for_instance(instance))

        return np.asarray(prediction)    
    
    def predict(self,X):
        prediction = []
        for i in range(X.shape[0]):
            instance = []
            for j in range(X.shape[1]):
                instance.append(X[i][j])
            instance.append(0.0)
            instance = Instance.create_instance(instance,classname='weka.core.DenseInstance', weight=1.0)
            instance.dataset=self.dataset
            prediction.append(self.classifier.classify_instance(instance))

        return np.asarray(prediction)
    

    def set_data(self,dataset):
        self.dataset = dataset
        self.dataset.class_is_last()
        
          
    def score(self,X,y):
        y_pred = self.predict(X)
        score = accuracy_score(y, y_pred)
        
        return score

But when I try to evaluate the classifier with this funcion

evaluate_models_cv(sci_Model_1,X,y,10)

from sklearn.model_selection import cross_validate

def evaluate_models_cv(weka_model, X, y, cv, scoring = None):
    return_train_score = True
    n_jobs = -1
    
    scores = cross_validate(weka_model, X, y, cv = cv, n_jobs = n_jobs, scoring = scoring, return_train_score = return_train_score)
    
    scores = pd.DataFrame(scores)
    means = scores.mean(axis = 0)
    sds = scores.std(axis = 0)
    
    results = dict(mean = means, sd = sds)
    results = pd.DataFrame(results)
    
    return results

I'm getting this error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/tmp/ipykernel_2608/4224958314.py in <module>
----> 1 evaluate_models_cv(sci_Model_1,X,y,10)

/tmp/ipykernel_2608/3738102976.py in evaluate_models_cv(weka_model, X, y, cv, scoring)
      5     n_jobs = -1
      6 
----> 7     scores = cross_validate(weka_model, X, y, cv = cv, n_jobs = n_jobs, scoring = scoring, return_train_score = return_train_score)
      8 
      9     scores = pd.DataFrame(scores)

/usr/local/lib/python3.8/dist-packages/sklearn/model_selection/_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
    258         scorers = scoring
    259     elif scoring is None or isinstance(scoring, str):
--> 260         scorers = check_scoring(estimator, scoring)
    261     else:
    262         scorers = _check_multimetric_scoring(estimator, scoring)

/usr/local/lib/python3.8/dist-packages/sklearn/metrics/_scorer.py in check_scoring(estimator, scoring, allow_none)
    475             return None
    476         else:
--> 477             raise TypeError(
    478                 "If no scoring is specified, the estimator passed should "
    479                 "have a 'score' method. The estimator %r does not." % estimator

TypeError: If no scoring is specified, the estimator passed should have a 'score' method. The estimator weka_classifier(classifier=AttributeSelectedClassifier:

So I tried to solve it makeing the score() function on the class, but that didn't work.

I'm doing something wrong?


Solution

  • Ok it works, I didn't extends the ClassifierMixin class like:

    class weka_classifier(BaseEstimator, ClassifierMixin):