Pytorch Expected more than 1 value per channel when training when using BatchNorm

I've written this code:

import numpy as np
import torch
from torch.utils.data import TensorDataset, dataloader

inputDim = 10
n = 1000
X = np.random.rand(n,inputDim)
y = np.random.rand(0,2,n)

tensor_x = torch.Tensor(X)
tensor_y = torch.Tensor(y)
Xy = (tensor_x, tensor_y)
XyLoader = dataloader.DataLoader(Xy, batch_size = 16, shuffle = True, drop_last = True)

model = torch.nn.Sequential(
  torch.nn.Linear(inputDim, 200),
  torch.nn.ReLU(),
  torch.nn.BatchNorm1d(num_features=200),
  torch.nn.Linear(200,100),
  torch.nn.Tanh(),
  torch.nn.BatchNorm1d(num_features=100),
  torch.nn.Linear(100,1),
  torch.nn.Sigmoid()
)

optimizer = torch.optim.Adam(model.parameters(), lr= 0.001)
loss_fn = torch.nn.BCELoss()

nepochs = 1000
for epochs in range(nepochs):
  for X,y in XyLoader:
    batch_size = X.shape[0]
    y_hat = model(X.view(batch_size,-1))
    loss = loss_fn(y_hat, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


with torch.no_grad():
  xt = torch.tensor(np.random.rand(1,inputDim))
  y2 = model(xt.float())
  print(y2.detach().numpy()[0][0])

What am I doing wrong with torch.nn.BatchNorm1d ? If I run the code without the two line with everything goes "ok" what's the problem?

Solution

In your case, PyTorch was simply complaining about the shape of the input to nn.BatchNorm1d, where the input is expected to have a shape of (B, C, L). C is the embedding dimension and L is the length/timesteps of the input sequence. Usually, 1d in PyTorch is referring to a sequence, e.g. a tokenized sentence, of which each token (L in total) is represented as a C dimensional vector and stacked over dim 1. To fix the error, you can do

tensor_x = torch.as_tensor(X).unsqueeze(1)  # use as_tensor to avoid unnecessary data copy
tensor_y = torch.as_tensor(y)

The full modified code:

import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader

inputDim = 10
n = 1000
X = np.random.rand(n, inputDim)
y = np.random.rand(0, 2, n)

tensor_x = torch.as_tensor(X).unsqueeze(1)
tensor_y = torch.as_tensor(y)
Xy = (tensor_x, tensor_y)
XyLoader = DataLoader(Xy, batch_size = 16, shuffle = True, drop_last = True)

model = torch.nn.Sequential(
  torch.nn.Linear(inputDim, 200),
  torch.nn.ReLU(),
  torch.nn.BatchNorm1d(num_features=1),
  torch.nn.Linear(200, 100),
  torch.nn.Tanh(),
  torch.nn.BatchNorm1d(num_features=1),
  torch.nn.Linear(100,1),
  torch.nn.Sigmoid()
)

optimizer = torch.optim.Adam(model.parameters(), lr= 0.001)
loss_fn = torch.nn.BCELoss()

nepochs = 1000
for epochs in range(nepochs):
    for X, y in XyLoader:
        batch_size = X.shape[0]
        y_hat = model(X.view(batch_size,-1))
        loss = loss_fn(y_hat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


with torch.no_grad():
    xt = torch.as_tensor(np.random.rand(1, inputDim), dtype=torch.float32).unsqueeze(1)
    y2 = model(xt.float())
    print(y2.detach().numpy()[0][0])