This is my LSTM model and I am finding a peculiar problem while training it.
class LSTM1(nn.Module):
def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length,drop_prob=0.0):
super(LSTM1, self).__init__()
self.num_classes = num_classes #number of classes
self.num_layers = num_layers #number of layers
self.input_size = input_size #input size
self.hidden_size = hidden_size #hidden state
self.seq_length = seq_length #sequence length
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
num_layers=num_layers, dropout=drop_prob,batch_first=True) #lstm
# self.dropout = nn.Dropout(drop_prob)
# self.fc_1 = nn.Linear(hidden_size, num_classes)
self.fc_1 = nn.Linear(hidden_size, 64) #fully connected 1
self.fc = nn.Linear(64, num_classes) #fully connected last layer
self.relu = nn.ReLU()
def forward(self,x):
# h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)).to(device) #hidden state
# c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)).to(device) #internal state
# Propagate input through LSTM
output, (hn, cn) = self.lstm(x) #lstm with input, hidden, and internal state
hn = hn.view(-1, self.hidden_size) #reshaping the data for Dense layer next
#out = self.dropout(hn)
out = self.relu(hn)
out = self.fc_1(out) #first Dense
out = self.relu(out) #relu
out = self.fc(out) #Final Output
out = self.relu(out) #relu
return out
My training data has these dimensions where the first one is the input and second the labels
Training Shape torch.Size([8051, 1, 201]) torch.Size([8051, 1])
When I train with num_layers variable = 1 for the LSTM layer it works fine. However, when I increase the num_layers to 2 I get the error which says
Training Shape torch.Size([8051, 1, 201]) torch.Size([8051, 1])
Testing Shape torch.Size([4930, 1, 201]) torch.Size([4930, 1])
C:\Users\adity\miniconda3\envs\pytorch3\lib\site-packages\torch\nn\modules\loss.py:528: UserWarning: Using a target size (torch.Size([8051, 1])) that is different to the input size (torch.Size([16102, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)
Traceback (most recent call last):
File "C:\Users\adity\OneDrive - Louisiana State University\Documents\CSC 7343 HW\hw1.py", line 135, in <module>
loss = criterion(outputs.to(device), y_train_tensors.to(device))
File "C:\Users\adity\miniconda3\envs\pytorch3\lib\site-packages\torch\nn\modules\module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\adity\miniconda3\envs\pytorch3\lib\site-packages\torch\nn\modules\loss.py", line 528, in forward
return F.mse_loss(input, target, reduction=self.reduction)
File "C:\Users\adity\miniconda3\envs\pytorch3\lib\site-packages\torch\nn\functional.py", line 3089, in mse_loss
expanded_input, expanded_target = torch.broadcast_tensors(input, target)
File "C:\Users\adity\miniconda3\envs\pytorch3\lib\site-packages\torch\functional.py", line 73, in broadcast_tensors
return _VF.broadcast_tensors(tensors) # type: ignore[attr-defined]
RuntimeError: The size of tensor a (16102) must match the size of tensor b (8051) at non-singleton dimension 0
When I changed layers to 3 the error says the size of tensor should be 24153.
Why is the input dimensions changing when I increase num_layers?
The issue is that you are flattening hn
, according to the documentation page, its shape is (D*num_layers, N, Hout)
, i.e. it depends on the number of hidden layers. So you will either have to change the fully connected that is following or only take the last hidden state of your LSTM.