Search code examples

SequentialFeatureSelector ValueError: continuous format is not supported

I am new to Machine learning and trying to understand the SequentialFeatureSelector concept from sklearn. I am using Anaconda and Jupyter notebook for poc. I have imported

from mlxtend.feature_selection import SequentialFeatureSelector as SFS

package. by default mlxtend package was not part of Anaconda, then I have installed via pip install mlxtend command.

I have used sklearn Boston housing dataset for this poc and did below code. while fitting sfs, I am getting error.

How to fix this error ?

import numpy as np

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mlxtend.feature_selection import SequentialFeatureSelector as sfs
from sklearn.metrics import roc_curve, roc_auc_score
%matplotlib inline
data = load_boston()
X = pd.DataFrame(
X.columns = data.feature_names
y =


ValueError                                Traceback (most recent call last)
<ipython-input-77-96b29660189d> in <module>
      2 X_train.shape
----> 3,y_train)

C:\ProgramData\Anaconda3\lib\site-packages\mlxtend\feature_selection\ in fit(self, X, y, custom_feature_names, **fit_params)
    371                         X=X_,
    372                         y=y,
--> 373                         **fit_params
    374                     )
    375                 else:

C:\ProgramData\Anaconda3\lib\site-packages\mlxtend\feature_selection\ in _inclusion(self, orig_set, subset, X, y, ignore_feature, **fit_params)
    528                              tuple(subset | {feature}),
    529                              **fit_params)
--> 530                             for feature in remaining
    531                             if feature != ignore_feature)

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\ in __call__(self, iterable)
    915             # remaining jobs.
    916             self._iterating = False
--> 917             if self.dispatch_one_batch(iterator):
    918                 self._iterating = self._original_iterator is not None

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\ in dispatch_one_batch(self, iterator)
    757                 return False
    758             else:
--> 759                 self._dispatch(tasks)
    760                 return True

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\ in _dispatch(self, batch)
    714         with self._lock:
    715             job_idx = len(self._jobs)
--> 716             job = self._backend.apply_async(batch, callback=cb)
    717             # A job can complete so quickly than its callback is
    718             # called before we get here, causing self._jobs to

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\ in apply_async(self, func, callback)
    180     def apply_async(self, func, callback=None):
    181         """Schedule a func to be run"""
--> 182         result = ImmediateResult(func)
    183         if callback:
    184             callback(result)

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\ in __init__(self, batch)
    547         # Don't delay the application, to avoid keeping the input
    548         # arguments in memory
--> 549         self.results = batch()
    551     def get(self):

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\ in __call__(self)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    227     def __len__(self):

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\ in <listcomp>(.0)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    227     def __len__(self):

C:\ProgramData\Anaconda3\lib\site-packages\mlxtend\feature_selection\ in _calc_score(selector, X, y, indices, **fit_params)
     32                                  n_jobs=1,
     33                                  pre_dispatch=selector.pre_dispatch,
---> 34                                  fit_params=fit_params)
     35     else:
     36[:, indices], y, **fit_params)

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\ in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)
    400                                 fit_params=fit_params,
    401                                 pre_dispatch=pre_dispatch,
--> 402                                 error_score=error_score)
    403     return cv_results['test_score']

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\ in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
    238             return_times=True, return_estimator=return_estimator,
    239             error_score=error_score)
--> 240         for train, test in cv.split(X, y, groups))
    242     zipped_scores = list(zip(*scores))

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\ in __call__(self, iterable)
    915             # remaining jobs.
    916             self._iterating = False
--> 917             if self.dispatch_one_batch(iterator):
    918                 self._iterating = self._original_iterator is not None

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\ in dispatch_one_batch(self, iterator)
    757                 return False
    758             else:
--> 759                 self._dispatch(tasks)
    760                 return True

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\ in _dispatch(self, batch)
    714         with self._lock:
    715             job_idx = len(self._jobs)
--> 716             job = self._backend.apply_async(batch, callback=cb)
    717             # A job can complete so quickly than its callback is
    718             # called before we get here, causing self._jobs to

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\ in apply_async(self, func, callback)
    180     def apply_async(self, func, callback=None):
    181         """Schedule a func to be run"""
--> 182         result = ImmediateResult(func)
    183         if callback:
    184             callback(result)

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\ in __init__(self, batch)
    547         # Don't delay the application, to avoid keeping the input
    548         # arguments in memory
--> 549         self.results = batch()
    551     def get(self):

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\ in __call__(self)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    227     def __len__(self):

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\ in <listcomp>(.0)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    227     def __len__(self):

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\ in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
    566         fit_time = time.time() - start_time
    567         # _score will return dict if is_multimetric is True
--> 568         test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric)
    569         score_time = time.time() - start_time - fit_time
    570         if return_train_score:

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\ in _score(estimator, X_test, y_test, scorer, is_multimetric)
    603     """
    604     if is_multimetric:
--> 605         return _multimetric_score(estimator, X_test, y_test, scorer)
    606     else:
    607         if y_test is None:

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\ in _multimetric_score(estimator, X_test, y_test, scorers)
    633             score = scorer(estimator, X_test)
    634         else:
--> 635             score = scorer(estimator, X_test, y_test)
    637         if hasattr(score, 'item'):

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\metrics\ in __call__(self, clf, X, y, sample_weight)
    174         y_type = type_of_target(y)
    175         if y_type not in ("binary", "multilabel-indicator"):
--> 176             raise ValueError("{0} format is not supported".format(y_type))
    178         if is_regressor(clf):

ValueError: continuous format is not supported


  • Looking closely at the trace, you will see that the error is not raised by mlxtend - it is raised by the module of scikit-learn, and it is because the roc_auc_score you are using is suitable for classification problems only; for regression problems, such as yours here, it is meaninglesss.

    From the docs (emphasis added):

    sklearn.metrics.roc_auc_score(y_true, y_score, average=’macro’, sample_weight=None, max_fpr=None)

    Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC) from prediction scores.

    Note: this implementation is restricted to the binary classification task or multilabel classification task in label indicator format.

    See also the scikit-learn list of metrics per kind of problem, where you can confirm that roc_auc is not suitable for regression.

    So, change it in your sfs definition to something like


    like in the docs example of SequentialFeatureSelector, or to any other metric suitable for regression, and you will be fine.