Search code examples
pythonpytorchbatch-normalization

Pytorch Expected more than 1 value per channel when training when using BatchNorm


I've written this code:

import numpy as np
import torch
from torch.utils.data import TensorDataset, dataloader

inputDim = 10
n = 1000
X = np.random.rand(n,inputDim)
y = np.random.rand(0,2,n)

tensor_x = torch.Tensor(X)
tensor_y = torch.Tensor(y)
Xy = (tensor_x, tensor_y)
XyLoader = dataloader.DataLoader(Xy, batch_size = 16, shuffle = True, drop_last = True)

model = torch.nn.Sequential(
  torch.nn.Linear(inputDim, 200),
  torch.nn.ReLU(),
  torch.nn.BatchNorm1d(num_features=200),
  torch.nn.Linear(200,100),
  torch.nn.Tanh(),
  torch.nn.BatchNorm1d(num_features=100),
  torch.nn.Linear(100,1),
  torch.nn.Sigmoid()
)

optimizer = torch.optim.Adam(model.parameters(), lr= 0.001)
loss_fn = torch.nn.BCELoss()

nepochs = 1000
for epochs in range(nepochs):
  for X,y in XyLoader:
    batch_size = X.shape[0]
    y_hat = model(X.view(batch_size,-1))
    loss = loss_fn(y_hat, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


with torch.no_grad():
  xt = torch.tensor(np.random.rand(1,inputDim))
  y2 = model(xt.float())
  print(y2.detach().numpy()[0][0])

What am I doing wrong with torch.nn.BatchNorm1d ? If I run the code without the two line with everything goes "ok" what's the problem?


Solution

  • In your case, PyTorch was simply complaining about the shape of the input to nn.BatchNorm1d, where the input is expected to have a shape of (B, C, L). C is the embedding dimension and L is the length/timesteps of the input sequence. Usually, 1d in PyTorch is referring to a sequence, e.g. a tokenized sentence, of which each token (L in total) is represented as a C dimensional vector and stacked over dim 1. To fix the error, you can do

    tensor_x = torch.as_tensor(X).unsqueeze(1)  # use as_tensor to avoid unnecessary data copy
    tensor_y = torch.as_tensor(y)
    

    The full modified code:

    import numpy as np
    import torch
    from torch.utils.data import TensorDataset, DataLoader
    
    inputDim = 10
    n = 1000
    X = np.random.rand(n, inputDim)
    y = np.random.rand(0, 2, n)
    
    tensor_x = torch.as_tensor(X).unsqueeze(1)
    tensor_y = torch.as_tensor(y)
    Xy = (tensor_x, tensor_y)
    XyLoader = DataLoader(Xy, batch_size = 16, shuffle = True, drop_last = True)
    
    model = torch.nn.Sequential(
      torch.nn.Linear(inputDim, 200),
      torch.nn.ReLU(),
      torch.nn.BatchNorm1d(num_features=1),
      torch.nn.Linear(200, 100),
      torch.nn.Tanh(),
      torch.nn.BatchNorm1d(num_features=1),
      torch.nn.Linear(100,1),
      torch.nn.Sigmoid()
    )
    
    optimizer = torch.optim.Adam(model.parameters(), lr= 0.001)
    loss_fn = torch.nn.BCELoss()
    
    nepochs = 1000
    for epochs in range(nepochs):
        for X, y in XyLoader:
            batch_size = X.shape[0]
            y_hat = model(X.view(batch_size,-1))
            loss = loss_fn(y_hat, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
    
    with torch.no_grad():
        xt = torch.as_tensor(np.random.rand(1, inputDim), dtype=torch.float32).unsqueeze(1)
        y2 = model(xt.float())
        print(y2.detach().numpy()[0][0])