Search code examples
pythonpytorchsequencelstmdimensions

LSTM in Pytorch: how to add/change sequence length dimension?


I am running LSTM in pytorch but as I understand, it is only taking sequence length = 1. When I reshape to have sequence length to 4 or other number, then I get an error of mismatching length in input and target. If I reshape both input and target, then the model complains that it does not accept multi-target labels.

My train dataset has 66512 rows and 16839 columns, 3 categories/classes in the target. I would like to use a batch size 200 and a sequence length of 4, i.e. use 4 rows of data in a sequence.

Please advise how to adjust my model and/or data to be able to run model for various sequence lengths (e.g., 4).

batch_size=200
import torch  
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
train_target = torch.tensor(train_data[['Label1','Label2','Label3']].values.astype(np.float32))
train_target = np.argmax(train_target, axis=1)
train = torch.tensor(train_data.drop(['Label1','Label2','Label3'], axis = 1).values.astype(np.float32)) 
train_tensor = TensorDataset(train.unsqueeze(1), train_target) 
train_loader = DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)

print(train.shape)
print(train_target.shape)

torch.Size([66512, 16839])
torch.Size([66512])


import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTMModel, self).__init__()
        # Hidden dimensions
        self.hidden_dim = hidden_dim

        # Number of hidden layers
        self.layer_dim = layer_dim

        # Building LSTM
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)

        # Readout layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):

        # Initialize hidden state with zeros
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_().to(device)

        # Initialize cell state
        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_().to(device)

        out, (hn, cn) = self.lstm(x, (h0,c0))

        # Index hidden state of last time step
        out = self.fc(out[:, -1, :]) 

        return out        


input_dim = 16839
hidden_dim = 100
output_dim = 3
layer_dim = 1

batch_size = batch_size
num_epochs = 1

model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
learning_rate = 0.1

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)        

print(len(list(model.parameters())))
for i in range(len(list(model.parameters()))):
    print(list(model.parameters())[i].size())

6
torch.Size([400, 16839])
torch.Size([400, 100])
torch.Size([400])
torch.Size([400])
torch.Size([3, 100])
torch.Size([3])


for epoch in range(num_epochs):
    for i, (train, train_target) in enumerate(train_loader):
        # Load data as a torch tensor with gradient accumulation abilities
        train = train.requires_grad_().to(device)
        train_target = train_target.to(device)

        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        outputs = model(train)

        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, train_target)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()
print('Epoch: {}. Loss: {}. Accuracy: {}'.format(epoch, np.around(loss.item(), 4), np.around(accuracy,4)))

Solution

  • This is what worked eventually - reshaping the input data into sequences of 4 and having one target value per sequence, for which I picked last value in the target sequence per my problem logic. Seems very easy now but was very tricky back then. The rest of the posted code is the same.

    train_target = torch.tensor(train_data[['Label1','Label2','Label3']].iloc[3::4].values.astype(np.float32))
    train_target = np.argmax(train_target, axis=1)
    train = torch.tensor(train_data.drop(['Label1','Label2','Label3'], axis = 1).values.reshape(-1, 4, 16839).astype(np.float32)) 
    train_tensor = TensorDataset(train, train_target) 
    train_loader = DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)
    
    print(train.shape)
    print(train_target.shape)
    
    torch.Size([16628, 4, 16839])
    torch.Size([16628])