Search code examples
pytorchbatch-normalization

Expected more than 1 value per channel when training, got input size torch.Size([1, **])


I met an error when I use BatchNorm1d, code:

##% first I set a model
class net(nn.Module):
    def __init__(self, max_len, feature_linear, rnn, input_size, hidden_size, output_dim, num__rnn_layers, bidirectional, batch_first=True, p=0.2):
        super(net, self).__init__()
        self.max_len = max_len
        self.feature_linear = feature_linear
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.bidirectional = bidirectional
        self.num_directions = 2 if bidirectional == True else 1
        self.p = p
        self.batch_first = batch_first
        self.linear1 = nn.Linear(max_len, feature_linear) 
        init.kaiming_normal_(self.linear1.weight, mode='fan_in')
        self.BN1 = BN(feature_linear) 
        
    def forward(self, xb, seq_len_crt):
        rnn_input = torch.zeros(xb.shape[0], self.feature_linear, self.input_size)
        for i in range(self.input_size): 
            out = self.linear1(xb[:, :, i]) # xb[:,:,i].shape:(1,34), out.shape(1,100)
            out = F.relu(out) # 输入:out.shape(1,100), 输出:out.shape(1,100)
            out = self.BN1(out) # 输入:out.shape(1,100),输出:out.shape(1,100)
        
        return y_hat.squeeze(-1)

##% make the model as a function and optimize it
input_size = 5
hidden_size = 32
output_dim = 1
num_rnn_layers = 2
bidirectional = True
rnn = nn.LSTM
batch_size = batch_size
feature_linear = 60
BN = nn.BatchNorm1d

model = net(max_len, feature_linear, rnn, input_size, hidden_size, output_dim, num_rnn_layers, bidirectional, p=0.1)
loss_func = nn.MSELoss(reduction='none')
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=0.01)
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.05)


##% use this model to predict data   
def predict(xb, model, seq_len):
    # xb's shape should be (batch_size, seq_len, n_features)
    if xb.ndim == 2:  # suitable for both ndarray and Tensor   
       # add a {batch_size} dim
       xb = xb[None, ]
            
    if not isinstance(xb, torch.Tensor):  
       xb = torch.Tensor(xb)
    return model(xb, seq_len) # xb.shape(1,34,5)

##% create training/valid/test data    
seq_len_train_iter = []
for i in range(0, len(seq_len_train), batch_size):
    if i + batch_size <= len(seq_len_train):
       seq_len_train_iter.append(seq_len_train[i:i+batch_size])
    else: 
       seq_len_train_iter.append(seq_len_train[i:])
        
seq_len_valid_iter = []
for i in range(0, len(seq_len_valid), batch_size):
    if i + batch_size <= len(seq_len_valid):
        seq_len_valid_iter.append(seq_len_valid[i:i+batch_size])
    else: 
        seq_len_valid_iter.append(seq_len_valid[i:])
        
seq_len_test_iter = []
for i in range(0, len(seq_len_test), batch_size):
    if i + batch_size <= len(seq_len_test):
        seq_len_test_iter.append(seq_len_test[i:i+batch_size])
    else: 
        seq_len_test_iter.append(seq_len_test[i:])

##% fit model
def fit(epochs, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter):
    train_loss_record = []
    valid_loss_record = []
    mean_pct_final = []
    mean_abs_final = []
    
    is_better = False
    last_epoch_abs_error = 0
    last_epoch_pct_error = 0

    mean_pct_final_train = []
    mean_abs_final_train = []
    
    for epoch in range(epochs):
        # seq_len_crt: current batch seq len
        for batches, ((xb, yb), seq_len_crt) in enumerate(zip(train_dl, seq_len_train_iter)):
            if isinstance(seq_len_crt, np.int64):   
                seq_len_crt = [seq_len_crt]
            y_hat = model(xb, seq_len_crt)
            packed_yb = nn.utils.rnn.pack_padded_sequence(yb, seq_len_crt, batch_first=True, enforce_sorted=False)
            final_yb, input_sizes = nn.utils.rnn.pad_packed_sequence(packed_yb)
            final_yb = final_yb.permute(1, 0)
            # assert torch.all(torch.tensor(seq_len_crt).eq(input_sizes))
            loss = loss_func(y_hat, final_yb)

            batch_size_crt = final_yb.shape[0]
            loss = (loss.sum(-1) / input_sizes).sum() / batch_size_crt

            loss.backward()
            optimizer.step()
            # scheduler.step()
            optimizer.zero_grad()
#             print(i)

            with torch.no_grad():
                train_loss_record.append(loss.item())
                if batches % 50 == 0 and epoch % 1 == 0:
#                     print(f'Epoch {epoch}, batch {i} training loss: {loss.item()}')

                    y_hat = predict(xb[0], model, torch.tensor([seq_len_crt[0]])).detach().numpy().squeeze() # xb[0].shape(34,5)
                    label = yb[0][:len(y_hat)]
                    # plt.ion()
                    plt.plot(y_hat, label='predicted')
                    plt.plot(label, label='label')
                    plt.legend(loc='upper right')
                    plt.title('training mode')
                    plt.text(len(y_hat)+1, max(y_hat.max(), label.max()), f'Epoch {epoch}, batch {batches} training loss: {loss.item()}')
                    plt.show()
    return train_loss_record

but I met:Expected more than 1 value per channel when training, got input size torch.Size([1, 60]) the error message is:

ValueError                                Traceback (most recent call last)
<ipython-input-119-fb062ad3f20e> in <module>
----> 1 fit(500, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter)

<ipython-input-118-2eb946c379bf> in fit(epochs, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter)
     38 #                     print(f'Epoch {epoch}, batch {i} training loss: {loss.item()}')
     39 
---> 40                     y_hat = predict(xb[0], model, torch.tensor([seq_len_crt[0]])).detach().numpy().squeeze() # xb[0].shape(34,5)
     41                     label = yb[0][:len(y_hat)]
     42                     # plt.ion()

<ipython-input-116-28afce77e325> in predict(xb, model, seq_len)
      7     if not isinstance(xb, torch.Tensor):
      8         xb = torch.Tensor(xb)
----> 9     return model(xb, seq_len) # xb.shape(None,34,5)

D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

<ipython-input-114-3e9c30d20ed6> in forward(self, xb, seq_len_crt)
     50             out = self.linear1(xb[:, :, i]) # xb[:,:,i].shape:(None,34), out.shape(None,100)
     51             out = F.relu(out) # 输入:out.shape(None,100), 输出:out.shape(None,100)
---> 52             out = self.BN1(out) # 输入:out.shape(None,100),输出:out.shape(None,100)
     53 
     54             out = self.linear2(out)

D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\modules\batchnorm.py in forward(self, input)
    129         used for normalization (i.e. in eval mode when buffers are not None).
    130         """
--> 131         return F.batch_norm(
    132             input,
    133             # If buffers are not to be tracked, ensure that they won't be updated

D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\functional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
   2052                 bias=bias, training=training, momentum=momentum, eps=eps)
   2053     if training:
-> 2054         _verify_batch_size(input.size())
   2055 
   2056     return torch.batch_norm(

D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\functional.py in _verify_batch_size(size)
   2035         size_prods *= size[i + 2]
   2036     if size_prods == 1:
-> 2037         raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size))
   2038 
   2039 

ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 60])

I have checked and I found that in out = self.BN1(out),out.shape = (1,60),it seems that batchsize=1 is not permitted in BatchNorm1d .But I don't know how to modify it.


Solution

  • what does BatchNorm1d do mathematically?
    try and write down the equation for the case of batch_size=1 and you'll understand why pytorch is angry with you.

    How to solve it?
    It is simple: BatchNorm has two "modes of operation": one is for training where it estimates the current batch's mean and variance (this is why you must have batch_size>1 for training).
    The other "mode" is for evaluation: it uses accumulated mean and variance to normalize new inputs without re-estimating the mean and variance. In this mode there is no problem processing samples one by one.

    When evaluating your model use model.eval() before and model.train() after.