Search code examples
deep-learningmodelpytorchlstmtensor

input.size(-1) must be equal to input_size. Expected 763, got 1


I am tring to train my model with the batch size of 50. However I am getting error:

input.size(-1) must be equal to input_size. Expected 763, got 1

My code is:

for epoch in range(1, n_epochs + 1):

    for i, (x_batch, y_batch) in enumerate(trn_dl):
        #model.to(device)
        #model.train()
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)
        #sched.step()

        print('shape of the input batch')
        print(x_batch.shape)
        opt.zero_grad()
        x_batch=torch.unsqueeze(x_batch,2)
        print(x_batch.shape)
        print(x_batch)
        
        out = model(x_batch) # here I am getting error
        y_batch=torch.unsqueeze(y_batch,0)

        print('NOW')
        print(y_batch.dtype)
        y_batch = y_batch.to(torch.float32)

        out = out.to(torch.float32)
        out=torch.transpose(out,1,0)
        loss = loss_function(out, torch.max(y_batch, 1)[1])
        #(out, y_batch)
        #targets = targets.to(torch.float32)
        loss.backward()
        opt.step()

My model is:

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size)
        self.linear =nn.Linear(hidden_size, output_size)
        self.hidden_cell = (torch.zeros(1,1,self.hidden_size),
                            torch.zeros(1,1,self.hidden_size))
    def forward(self, input_seq):
        h0 = torch.zeros(1, input_seq.size(0), self.hidden_size).to(device) 
        c0 = torch.zeros(1, input_seq.size(0), self.hidden_size).to(device)
        lstm_out, _ = self.lstm(input_seq, (h0,c0))
        lstm_out = self.fc(lstm_out[:, -1, :])
        predictions = self.Linear(lstm_out.view(len(input_seq), -1))
        print("predictions",predictions)
        return predictions[-1]

Could anyone please look into it and help me.


Solution

  • By the looks of it, you are trying to pick the last step of the LSTM's output: lstm_out[:, -1, :]. However, by default with nn.RNNs the batch axis is second, not first: (sequence_length, batch_size, features). So you end up picking the last batch element, not the last sequence step. You might want to use batch_first=True when initializing your nn.LSTM:

    Something like:

    self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)