I have below piece of code where i am trying use one hot encoder. But i get the the errorValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import pandas as pd
target=train_features_df['y']
train_features_df=train_features_df.drop(['y'], axis=1)
# Categorical boolean mask this is done to find all categorical dfeature
categorical_feature_mask = train_features_df.dtypes==object
# filter categorical columns using mask and turn it into a list
categorical_cols = train_features_df.columns[categorical_feature_mask].tolist()
# instantiate labelencoder object
le = LabelEncoder()
# apply le on categorical feature columns
train_features_df[categorical_cols] = train_features_df[categorical_cols].apply(lambda col:
le.fit_transform(col))
train_features_df[categorical_cols].head(10)
# instantiate OneHotEncoder
ohe = OneHotEncoder(categories = categorical_feature_mask, sparse=False )
# categorical_features = boolean mask for categorical columns
# sparse = False output an array not sparse matrix
# apply OneHotEncoder on categorical feature columns
ohe.fit_transform(train_features_df)
I am get this error on the last line "ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all(). on line ohe.fit_transform(train_features_df)
Full traceback message as requested is below:-
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-12-72e45bd93f15> in <module>
23
24 # apply OneHotEncoder on categorical feature columns
---> 25 ohe.fit_transform(train_features_df)
26 #train_encoded_df=pd.DataFrame(data = ohe.fit_transform(train_features_df)) # It returns an numpy array
~\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in fit_transform(self, X, y)
408 """
409 self._validate_keywords()
--> 410 return super().fit_transform(X, y)
411
412 def transform(self, X):
~\Anaconda3\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
688 if y is None:
689 # fit method of arity 1 (unsupervised transformation)
--> 690 return self.fit(X, **fit_params).transform(X)
691 else:
692 # fit method of arity 2 (supervised transformation)
~\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in fit(self, X, y)
383 """
384 self._validate_keywords()
--> 385 self._fit(X, handle_unknown=self.handle_unknown)
386 self.drop_idx_ = self._compute_drop_idx()
387 return self
~\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in _fit(self, X, handle_unknown)
74 X_list, n_samples, n_features = self._check_X(X)
75
---> 76 if self.categories != 'auto':
77 if len(self.categories) != n_features:
78 raise ValueError("Shape mismatch: if categories is an array,"
~\Anaconda3\lib\site-packages\pandas\core\generic.py in __nonzero__(self)
1477 def __nonzero__(self):
1478 raise ValueError(
-> 1479 f"The truth value of a {type(self).__name__} is ambiguous. "
1480 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
1481 )
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
Invictus,
The error is caused by the fact that you are passing in categories
parameter something that is not expected by encoder function.
If you want to select just categorical columns using selection, do this:
ohe = OneHotEncoder(categories = 'auto', sparse=False )
selection = train_features_df[train_features_df.columns[categorical_feature_mask]]
encoded = ohe.fit_transform(selection)
and then merge the encoded result with the non-categorical columns
if you want to use categories
parameter to pass categories values - use example from here
A more elegant would be to use Pandas function for one-hot encoding:
pd.get_dummies(data=train_features_df, columns=train_features_df.columns[categorical_feature_mask])