Search code examples
pythonoopinheritancescikit-learnsuper

Custom Transformer Class Inheritance


I'm attempting to put together a Custom Transformer for sklearn which returns either a dataframe or array on my X data. It inherits from sklearn and a library called tsmoothie. However, I'm not quite sure about the use of super() and inheritance. I'm getting this error:

TypeError: __init__() got an unexpected keyword argument 'smooth_fraction'

My code:

import pandas as pd
from tsmoothie.smoother import LowessSmoother
from sklearn.base import BaseEstimator, TransformerMixin

class LowessSmootherWrap(TransformerMixin, BaseEstimator, LowessSmoother):
    def __init__(self, df=True):
        super().__init__(smooth_fraction=0.01, iterations=2)
        self.df = df

    def fit(self, X, y=None):
        self._is_fitted = True
        if self.df == True:
            self.feature_names_ = X.columns
            self.index_ = X.index
        return self

    def transform(self, X, y=None, **kwargs):
        self.smooth(X.T)
        return pd.DataFrame(self.smooth_data.T,
                            index=self.index_,
                            columns=self.feature_names_) \
            if self.df == True else self.smooth_data.T

    def fit_transform(self, X, y=None, **kwargs):
        return self.fit(X).transform(X)

smoother = LowessSmootherWrap(smooth_fraction=0.01, iterations=2, df=False)

Solution

  • class LowessSmootherWrap(TransformerMixin, BaseEstimator):
        def __init__(self, smoothing_fraction, n_iterations, df=True, **kwargs):
            super().__init__(**kwargs)
            self.smoothing_fraction = smoothing_fraction
            self.n_iterations = n_iterations
            self.df = df
    
        def fit(self, X, y=None):
            self._is_fitted = True
            if self.df == True:
                self.feature_names_ = X.columns
                self.index_ = X.index
            return self
    
        def transform(self, X, y=None, **kwargs):
            self.smoother = LowessSmoother(smooth_fraction=self.smoothing_fraction,
                                           iterations=self.n_iterations)
            self.smoother.smooth(X.copy().T)
            return pd.DataFrame(self.smoother.smooth_data.T,
                                index=self.index_,
                                columns=self.feature_names_) \
                if self.df == True else self.smoother.smooth_data.T
    
        def fit_transform(self, X, y=None, **kwargs):
            return super().fit_transform(X)