I'm trying to do a weka classifier for python to use python libraries with weka models, like @fracpete do on https://github.com/fracpete/sklearn-weka-plugin, but by my own.
Right now I have that and it works for predictions, use the SHAP library, etc.
from sklearn.base import BaseEstimator
from weka.classifiers import Classifier
from weka.core.dataset import Attribute, Instance, Instances
from sklearn.metrics import accuracy_score
class weka_classifier(BaseEstimator):
def __init__(self, classifier = None, dataset = None, index = None):
#Classifier: es el pww3/weka model
#Dataset: Data for fit the model
if classifier is not None:
self.classifier = classifier
elif dataset is not None:
self.dataset = dataset
self.dataset.class_is_last()
def fit(self, X, y):
return self.fit()
def fit(self):
return self.classifier.build_classifier(self.dataset)
def predict_instance(self,x):
x.append(0.0)
inst = Instance.create_instance(x,classname='weka.core.DenseInstance', weight=1.0)
inst.dataset = self.dataset
return self.classifier.classify_instance(inst)
def predict_proba_instance(self,x):
x.append(0.0)
inst = Instance.create_instance(x,classname='weka.core.DenseInstance', weight=1.0)
inst.dataset = self.dataset
return self.classifier.distribution_for_instance(inst)
def predict_proba(self,X):
prediction = []
for i in range(X.shape[0]):
instance = []
for j in range(X.shape[1]):
instance.append(X[i][j])
instance.append(0.0)
instance = Instance.create_instance(instance,classname='weka.core.DenseInstance', weight=1.0)
instance.dataset=self.dataset
prediction.append(self.classifier.distribution_for_instance(instance))
return np.asarray(prediction)
def predict(self,X):
prediction = []
for i in range(X.shape[0]):
instance = []
for j in range(X.shape[1]):
instance.append(X[i][j])
instance.append(0.0)
instance = Instance.create_instance(instance,classname='weka.core.DenseInstance', weight=1.0)
instance.dataset=self.dataset
prediction.append(self.classifier.classify_instance(instance))
return np.asarray(prediction)
def set_data(self,dataset):
self.dataset = dataset
self.dataset.class_is_last()
def score(self,X,y):
y_pred = self.predict(X)
score = accuracy_score(y, y_pred)
return score
But when I try to evaluate the classifier with this funcion
evaluate_models_cv(sci_Model_1,X,y,10)
from sklearn.model_selection import cross_validate
def evaluate_models_cv(weka_model, X, y, cv, scoring = None):
return_train_score = True
n_jobs = -1
scores = cross_validate(weka_model, X, y, cv = cv, n_jobs = n_jobs, scoring = scoring, return_train_score = return_train_score)
scores = pd.DataFrame(scores)
means = scores.mean(axis = 0)
sds = scores.std(axis = 0)
results = dict(mean = means, sd = sds)
results = pd.DataFrame(results)
return results
I'm getting this error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/tmp/ipykernel_2608/4224958314.py in <module>
----> 1 evaluate_models_cv(sci_Model_1,X,y,10)
/tmp/ipykernel_2608/3738102976.py in evaluate_models_cv(weka_model, X, y, cv, scoring)
5 n_jobs = -1
6
----> 7 scores = cross_validate(weka_model, X, y, cv = cv, n_jobs = n_jobs, scoring = scoring, return_train_score = return_train_score)
8
9 scores = pd.DataFrame(scores)
/usr/local/lib/python3.8/dist-packages/sklearn/model_selection/_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
258 scorers = scoring
259 elif scoring is None or isinstance(scoring, str):
--> 260 scorers = check_scoring(estimator, scoring)
261 else:
262 scorers = _check_multimetric_scoring(estimator, scoring)
/usr/local/lib/python3.8/dist-packages/sklearn/metrics/_scorer.py in check_scoring(estimator, scoring, allow_none)
475 return None
476 else:
--> 477 raise TypeError(
478 "If no scoring is specified, the estimator passed should "
479 "have a 'score' method. The estimator %r does not." % estimator
TypeError: If no scoring is specified, the estimator passed should have a 'score' method. The estimator weka_classifier(classifier=AttributeSelectedClassifier:
So I tried to solve it makeing the score() function on the class, but that didn't work.
I'm doing something wrong?
Ok it works, I didn't extends the ClassifierMixin class like:
class weka_classifier(BaseEstimator, ClassifierMixin):