I'm trying to setup a model for cross validation, but I can't seem to figure out why the prediction function isn't working. Here is my code:
results = {}
kf = KFold(3, shuffle=True)
c=0
for train_index, test_index in kf.split(X, y):
c+=1;print(c)
mdl = lgb.LGBMClassifier(objective="binary", n_estimators=50, importance_type="gain")
X_train, X_test = X.iloc[train_index], X.iloc[test_index]
y_train, y_test = y[train_index], y[test_index]
mdl.fit(X=X_train, y=y_train, eval_metric=roc_auc_score, )
#import pdb;pdb.set_trace()
y_proba = mdl.predict_proba(X_test, y_test)
results["estimator"] =mdl
results["score"] = roc_auc_score(y_test, y_proba[:, 1])
results["feature_importances"] = [mdl.feature_names, mdl.feature_importances_]
And here is the error stack:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-24-d5ef259cd27f> in <module>
10 mdl.fit(X=X_train, y=y_train, eval_metric=roc_auc_score, )
11 #import pdb;pdb.set_trace()
---> 12 y_proba = mdl.predict_proba(X_test, y_test)
13 results["estimator"] =mdl
14 results["score"] = roc_auc_score(y_test, y_proba[:, 1])
C:\ProgramData\Anaconda3\lib\site-packages\lightgbm\sklearn.py in predict_proba(self, X, raw_score, start_iteration, num_iteration, pred_leaf, pred_contrib, **kwargs)
918 pred_leaf=False, pred_contrib=False, **kwargs):
919 """Docstring is set after definition, using a template."""
--> 920 result = super().predict(X, raw_score, start_iteration, num_iteration, pred_leaf, pred_contrib, **kwargs)
921 if callable(self._objective) and not (raw_score or pred_leaf or pred_contrib):
922 _log_warning("Cannot compute class probabilities or labels "
C:\ProgramData\Anaconda3\lib\site-packages\lightgbm\sklearn.py in predict(self, X, raw_score, start_iteration, num_iteration, pred_leaf, pred_contrib, **kwargs)
724 "input n_features is %s "
725 % (self._n_features, n_features))
--> 726 return self._Booster.predict(X, raw_score=raw_score, start_iteration=start_iteration, num_iteration=num_iteration,
727 pred_leaf=pred_leaf, pred_contrib=pred_contrib, **kwargs)
728
C:\ProgramData\Anaconda3\lib\site-packages\lightgbm\basic.py in predict(self, data, start_iteration, num_iteration, raw_score, pred_leaf, pred_contrib, data_has_header, is_reshape, **kwargs)
3140 else:
3141 num_iteration = -1
-> 3142 return predictor.predict(data, start_iteration, num_iteration,
3143 raw_score, pred_leaf, pred_contrib,
3144 data_has_header, is_reshape)
C:\ProgramData\Anaconda3\lib\site-packages\lightgbm\basic.py in predict(self, data, start_iteration, num_iteration, raw_score, pred_leaf, pred_contrib, data_has_header, is_reshape)
698 data = _data_from_pandas(data, None, None, self.pandas_categorical)[0]
699 predict_type = C_API_PREDICT_NORMAL
--> 700 if raw_score:
701 predict_type = C_API_PREDICT_RAW_SCORE
702 if pred_leaf:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in __nonzero__(self)
1440 @final
1441 def __nonzero__(self):
-> 1442 raise ValueError(
1443 f"The truth value of a {type(self).__name__} is ambiguous. "
1444 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
I can't think of a reason why it would be trying to test the truthiness of an array in this instance.
According to the docs
https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMClassifier.html
the 2nd argument is raw_score
boolean - a single True/False value. You pass y_test
.