I am running LSTM in pytorch but as I understand, it is only taking sequence length = 1. When I reshape to have sequence length to 4 or other number, then I get an error of mismatching length in input and target. If I reshape both input and target, then the model complains that it does not accept multi-target labels.
My train dataset has 66512 rows and 16839 columns, 3 categories/classes in the target. I would like to use a batch size 200 and a sequence length of 4, i.e. use 4 rows of data in a sequence.
Please advise how to adjust my model and/or data to be able to run model for various sequence lengths (e.g., 4).
batch_size=200
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
train_target = torch.tensor(train_data[['Label1','Label2','Label3']].values.astype(np.float32))
train_target = np.argmax(train_target, axis=1)
train = torch.tensor(train_data.drop(['Label1','Label2','Label3'], axis = 1).values.astype(np.float32))
train_tensor = TensorDataset(train.unsqueeze(1), train_target)
train_loader = DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)
print(train.shape)
print(train_target.shape)
torch.Size([66512, 16839])
torch.Size([66512])
import torch.nn as nn
class LSTMModel(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super(LSTMModel, self).__init__()
# Hidden dimensions
self.hidden_dim = hidden_dim
# Number of hidden layers
self.layer_dim = layer_dim
# Building LSTM
self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
# Readout layer
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# Initialize hidden state with zeros
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_().to(device)
# Initialize cell state
c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_().to(device)
out, (hn, cn) = self.lstm(x, (h0,c0))
# Index hidden state of last time step
out = self.fc(out[:, -1, :])
return out
input_dim = 16839
hidden_dim = 100
output_dim = 3
layer_dim = 1
batch_size = batch_size
num_epochs = 1
model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.CrossEntropyLoss()
learning_rate = 0.1
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
print(len(list(model.parameters())))
for i in range(len(list(model.parameters()))):
print(list(model.parameters())[i].size())
6
torch.Size([400, 16839])
torch.Size([400, 100])
torch.Size([400])
torch.Size([400])
torch.Size([3, 100])
torch.Size([3])
for epoch in range(num_epochs):
for i, (train, train_target) in enumerate(train_loader):
# Load data as a torch tensor with gradient accumulation abilities
train = train.requires_grad_().to(device)
train_target = train_target.to(device)
# Clear gradients w.r.t. parameters
optimizer.zero_grad()
# Forward pass to get output/logits
outputs = model(train)
# Calculate Loss: softmax --> cross entropy loss
loss = criterion(outputs, train_target)
# Getting gradients w.r.t. parameters
loss.backward()
# Updating parameters
optimizer.step()
print('Epoch: {}. Loss: {}. Accuracy: {}'.format(epoch, np.around(loss.item(), 4), np.around(accuracy,4)))
This is what worked eventually - reshaping the input data into sequences of 4 and having one target value per sequence, for which I picked last value in the target sequence per my problem logic. Seems very easy now but was very tricky back then. The rest of the posted code is the same.
train_target = torch.tensor(train_data[['Label1','Label2','Label3']].iloc[3::4].values.astype(np.float32))
train_target = np.argmax(train_target, axis=1)
train = torch.tensor(train_data.drop(['Label1','Label2','Label3'], axis = 1).values.reshape(-1, 4, 16839).astype(np.float32))
train_tensor = TensorDataset(train, train_target)
train_loader = DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)
print(train.shape)
print(train_target.shape)
torch.Size([16628, 4, 16839])
torch.Size([16628])