python scikit-learn pytorch ensemble-learning skorch

RuntimeError when using StackingClassifier with PyTorch neural network wrapped by skorch and Random Forest

I'm trying to create a stacking ensemble for binary classification using the Breast Cancer Wisconsin Dataset. My base models are a PyTorch neural network wrapped by skorch and a Random Forest, and my meta model is a Logistic Regression. I'm using StackingClassifier from scikit-learn for stacking.

However, I'm encountering a RuntimeError: Found dtype Long but expected Float.

What could be the cause of this error, and how can I fix it?

Here's my code:

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from skorch import NeuralNetBinaryClassifier

# Load the Breast Cancer Wisconsin dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the input data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the PyTorch neural network
class SimpleNN(nn.Module):
    def __init__(self, num_features):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(num_features, 32)
        self.fc2 = nn.Linear(32, 16)
        self.fc3 = nn.Linear(16, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x.squeeze()

# Create a skorch-wrapped PyTorch neural network
num_features = X_train.shape[1]
net = NeuralNetBinaryClassifier(
    SimpleNN, module__num_features=num_features, criterion=nn.BCELoss, optimizer=optim.Adam,
    lr=0.001, max_epochs=20, batch_size=64, device='cuda' if torch.cuda.is_available() else 'cpu'
)

# Create a Random Forest classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Create the StackingClassifier with the base models and a Logistic Regression meta-model
estimators = [
    ('nn', net),
    ('rf', rf),
]
stacking_clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())

# Train the stacking model
stacking_clf.fit(X_train.astype(np.float32), y_train)

# Evaluate the stacking model
score = stacking_clf.score(X_test.astype(np.float32), y_test)
print(f"Stacking Classifier Accuracy: {score:.4f}")

And error message:

RuntimeError                              Traceback (most recent call last)
Cell In[1], line 56
     53 stacking_clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
     55 # Train the stacking model
---> 56 stacking_clf.fit(X_train.astype(np.float32), y_train)
     58 # Evaluate the stacking model
     59 score = stacking_clf.score(X_test.astype(np.float32), y_test)

File ~/.pyenv/versions/3.10.3/lib/python3.10/site-packages/sklearn/ensemble/_stacking.py:660, in StackingClassifier.fit(self, X, y, sample_weight)
    658     self.classes_ = self._label_encoder.classes_
    659     y_encoded = self._label_encoder.transform(y)
--> 660 return super().fit(X, y_encoded, sample_weight)

File ~/.pyenv/versions/3.10.3/lib/python3.10/site-packages/sklearn/ensemble/_stacking.py:209, in _BaseStacking.fit(self, X, y, sample_weight)
    204             self.estimators_.append(estimator)
    205 else:
    206     # Fit the base estimators on the whole training data. Those
    207     # base estimators will be used in transform, predict, and
    208     # predict_proba. They are exposed publicly.
--> 209     self.estimators_ = Parallel(n_jobs=self.n_jobs)(
    210         delayed(_fit_single_estimator)(clone(est), X, y, sample_weight)
    211         for est in all_estimators
    212         if est != "drop"
    213     )
...
   3095     new_size = _infer_size(target.size(), weight.size())
   3096     weight = weight.expand(new_size)
-> 3098 return torch._C._nn.binary_cross_entropy(input, target, weight, reduction_enum)

RuntimeError: Found dtype Long but expected Float

I've already tried converting the target labels to float32 before fitting the stacking model, but the same error persists.

# Train the stacking model
stacking_clf.fit(X_train.astype(np.float32), y_train.astype(np.float32))

Solution

StackingClassifier uses a LabelEncoder internally which is changing the dtype of your targets. You can use a custom NeuralNetBinaryClassifier which casts the labels in the .fit method.

import numpy as np
import skorch

class NeuralNetBinaryClassifier(skorch.NeuralNetBinaryClassifier):
    def fit(self, X, y, **fit_params):
        return super().fit(X, np.asarray(y, dtype=np.float32), **fit_params)

...