I want to use skorch to do multi-output regression. I've created a small toy example as can be seen below. In the example, the NN should predict 5 outputs. I also want to use a preprocessing step that is incorporated using sklearn pipelines (in this example PCA is used, but it could be any other preprocessor). When executing this example I get the following error in the Variable._execution_engine.run_backward step of torch:
RuntimeError: Found dtype Double but expected Float
Am I forgetting something? I suspect, somewhere something has to be cast, but as skorch handles a lot of the pytorch stuff, I don't see what and where.
Example:
import torch
import skorch
from sklearn.datasets import make_classification, make_regression
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.decomposition import PCA
X, y = make_regression(n_samples=1000, n_features=40, n_targets=5)
X = X.astype('float32')
class RegressionModule(torch.nn.Module):
def __init__(self, input_dim=80):
super().__init__()
self.l0 = torch.nn.Linear(input_dim, 10)
self.l1 = torch.nn.Linear(10, 5)
def forward(self, X):
y = self.l0(X)
y = self.l1(y)
return y
class InputShapeSetter(skorch.callbacks.Callback):
def on_train_begin(self, net, X, y):
net.set_params(module__input_dim=X.shape[-1])
net = skorch.NeuralNetRegressor(
RegressionModule,
callbacks=[InputShapeSetter()],
)
pipe = make_pipeline(PCA(n_components=10), net)
pipe.fit(X, y)
print(pipe.predict(X))
Edit 1:
Casting X to float32 at the start won't work for every preprocessor as can be seen from this example:
import torch
import skorch
from sklearn.datasets import make_classification, make_regression
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from category_encoders import OneHotEncoder
X, y = make_regression(n_samples=1000, n_features=40, n_targets=5)
X = pd.DataFrame(X,columns=[f'feature_{i}' for i in range(X.shape[1])])
X['feature_1'] = pd.qcut(X['feature_1'], 3, labels=["good", "medium", "bad"])
y = y.astype('float32')
class RegressionModule(torch.nn.Module):
def __init__(self, input_dim=80):
super().__init__()
self.l0 = torch.nn.Linear(input_dim, 10)
self.l1 = torch.nn.Linear(10, 5)
def forward(self, X):
y = self.l0(X)
y = self.l1(y)
return y
class InputShapeSetter(skorch.callbacks.Callback):
def on_train_begin(self, net, X, y):
net.set_params(module__input_dim=X.shape[-1])
net = skorch.NeuralNetRegressor(
RegressionModule,
callbacks=[InputShapeSetter()],
)
pipe = make_pipeline(OneHotEncoder(cols=['feature_1'], return_df=False), net)
pipe.fit(X, y)
print(pipe.predict(X))
By default OneHotEncoder
returns numpy array of dtype=float64
. So one could simply cast the input-data X
when being fed into forward()
of the model:
class RegressionModule(torch.nn.Module):
def __init__(self, input_dim=80):
super().__init__()
self.l0 = torch.nn.Linear(input_dim, 10)
self.l1 = torch.nn.Linear(10, 5)
def forward(self, X):
X = X.to(torch.float32)
y = self.l0(X)
y = self.l1(y)
return y