This is the code I am implementing: I am using a subset of the CalTech256 dataset to classify images of 10 different kinds of animals. We will go over the dataset preparation, data augmentation and then steps to build the classifier.
def train_and_validate(model, loss_criterion, optimizer, epochs=25):
'''
Function to train and validate
Parameters
:param model: Model to train and validate
:param loss_criterion: Loss Criterion to minimize
:param optimizer: Optimizer for computing gradients
:param epochs: Number of epochs (default=25)
Returns
model: Trained Model with best validation accuracy
history: (dict object): Having training loss, accuracy and validation loss, accuracy
'''
start = time.time()
history = []
best_acc = 0.0
for epoch in range(epochs):
epoch_start = time.time()
print("Epoch: {}/{}".format(epoch+1, epochs))
# Set to training mode
model.train()
# Loss and Accuracy within the epoch
train_loss = 0.0
train_acc = 0.0
valid_loss = 0.0
valid_acc = 0.0
for i, (inputs, labels) in enumerate(train_data_loader):
inputs = inputs.to(device)
labels = labels.to(device)
# Clean existing gradients
optimizer.zero_grad()
# Forward pass - compute outputs on input data using the model
outputs = model(inputs)
# Compute loss
loss = loss_criterion(outputs, labels)
# Backpropagate the gradients
loss.backward()
# Update the parameters
optimizer.step()
# Compute the total loss for the batch and add it to train_loss
train_loss += loss.item() * inputs.size(0)
# Compute the accuracy
ret, predictions = torch.max(outputs.data, 1)
correct_counts = predictions.eq(labels.data.view_as(predictions))
# Convert correct_counts to float and then compute the mean
acc = torch.mean(correct_counts.type(torch.FloatTensor))
# Compute total accuracy in the whole batch and add to train_acc
train_acc += acc.item() * inputs.size(0)
#print("Batch number: {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}".format(i, loss.item(), acc.item()))
# Validation - No gradient tracking needed
with torch.no_grad():
# Set to evaluation mode
model.eval()
# Validation loop
for j, (inputs, labels) in enumerate(valid_data_loader):
inputs = inputs.to(device)
labels = labels.to(device)
# Forward pass - compute outputs on input data using the model
outputs = model(inputs)
# Compute loss
loss = loss_criterion(outputs, labels)
# Compute the total loss for the batch and add it to valid_loss
valid_loss += loss.item() * inputs.size(0)
# Calculate validation accuracy
ret, predictions = torch.max(outputs.data, 1)
correct_counts = predictions.eq(labels.data.view_as(predictions))
# Convert correct_counts to float and then compute the mean
acc = torch.mean(correct_counts.type(torch.FloatTensor))
# Compute total accuracy in the whole batch and add to valid_acc
valid_acc += acc.item() * inputs.size(0)
#print("Validation Batch number: {:03d}, Validation: Loss: {:.4f}, Accuracy: {:.4f}".format(j, loss.item(), acc.item()))
# Find average training loss and training accuracy
avg_train_loss = train_loss/train_data_size
avg_train_acc = train_acc/train_data_size
# Find average training loss and training accuracy
avg_valid_loss = valid_loss/valid_data_size
avg_valid_acc = valid_acc/valid_data_size
history.append([avg_train_loss, avg_valid_loss, avg_train_acc, avg_valid_acc])
epoch_end = time.time()
print("Epoch : {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}%, \n\t\tValidation : Loss : {:.4f}, Accuracy: {:.4f}%, Time: {:.4f}s".format(epoch, avg_train_loss, avg_train_acc*100, avg_valid_loss, avg_valid_acc*100, epoch_end-epoch_start))
# Save if the model has best accuracy till now
torch.save(model, dataset+'_model_'+str(epoch)+'.pt')
return model, history
# Load pretrained ResNet50 Model
resnet50 = models.resnet50(pretrained=True)
#resnet50 = resnet50.to('cuda:0')
# Freeze model parameters
for param in resnet50.parameters():
param.requires_grad = False
# Change the final layer of ResNet50 Model for Transfer Learning
fc_inputs = resnet50.fc.in_features
resnet50.fc = nn.Sequential(
nn.Linear(fc_inputs, 256),
nn.ReLU(),
nn.Dropout(0.4),
nn.Linear(256, num_classes), # Since 10 possible outputs
nn.LogSoftmax(dim=1) # For using NLLLoss()
)
# Convert model to be used on GPU
# resnet50 = resnet50.to('cuda:0')
# Change the final layer of ResNet50 Model for Transfer Learning
fc_inputs = resnet50.fc.in_features
resnet50.fc = nn.Sequential(
nn.Linear(fc_inputs, 256),
nn.ReLU(),
nn.Dropout(0.4),
nn.Linear(256, num_classes), # Since 10 possible outputs
nn.LogSoftmax(dienter code herem=1) # For using NLLLoss()
)
# Convert model to be used on GPU
# resnet50 = resnet50.to('cuda:0')`enter code here`
Error is this:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last) <ipython-input-30-c7a333acb694> in <module>()
6 # Train the model for 25 epochs
7 num_epochs = 30
----> 8 trained_model, history = train_and_validate(resnet50, loss_func, optimizer, num_epochs)
9
10 torch.save(history, dataset+'_history.pt')
<ipython-input-29-239f590d090e> in train_and_validate(model, loss_criterion, optimizer, epochs)
43
44 # Compute loss
---> 45 loss = loss_criterion(outputs, labels)
46
47 # Backpropagate the gradients
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in
__call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
~\Anaconda3\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
202
203 def forward(self, input, target):
--> 204 return F.nll_loss(input, target, weight=self.weight, ignore_index=self.ignore_index, reduction=self.reduction)
205
206
~\Anaconda3\lib\site-packages\torch\nn\functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction) 1836 .format(input.size(0), target.size(0))) 1837 if dim == 2:
-> 1838 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index) 1839 elif dim == 4: 1840 ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
RuntimeError: Assertion `cur_target >= 0 && cur_target < n_classes' failed. at C:\Users\builder\AppData\Local\Temp\pip-req-build-0i480kur\aten\src\THNN/generic/ClassNLLCriterion.c:97
This happens when there are either incorrect labels in your dataset, or the labels are 1-indexed (instead of 0-indexed). As from the error message, cur_target
must be smaller than the total number of classes (10). To verify the issue, check the maximum and minimum label in your dataset. If the data is indeed 1-indexed, just minus one from all annotations and you should be fine.
Note, another possible reason is that there exists some -1 labels in the data. Some (esp older) datasets use -1 as indication of a wrong/dubious label. If you find such labels, just discard them.