python deep-learning pytorch computer-vision resnet

Facing this error while classifying Images, containing 10 classes in pytorch, in ResNet50. My code is:

This is the code I am implementing: I am using a subset of the CalTech256 dataset to classify images of 10 different kinds of animals. We will go over the dataset preparation, data augmentation and then steps to build the classifier.

def train_and_validate(model, loss_criterion, optimizer, epochs=25):
    '''
    Function to train and validate
    Parameters
        :param model: Model to train and validate
        :param loss_criterion: Loss Criterion to minimize
        :param optimizer: Optimizer for computing gradients
        :param epochs: Number of epochs (default=25)
  
    Returns
        model: Trained Model with best validation accuracy
        history: (dict object): Having training loss, accuracy and validation loss, accuracy
    '''
    
    start = time.time()
    history = []
    best_acc = 0.0

    for epoch in range(epochs):
        epoch_start = time.time()
        print("Epoch: {}/{}".format(epoch+1, epochs))
        
        # Set to training mode
        model.train()
        
        # Loss and Accuracy within the epoch
        train_loss = 0.0
        train_acc = 0.0
        
        valid_loss = 0.0
        valid_acc = 0.0
        
        for i, (inputs, labels) in enumerate(train_data_loader):

            inputs = inputs.to(device)
            labels = labels.to(device)
            
            # Clean existing gradients
            optimizer.zero_grad()
            
            # Forward pass - compute outputs on input data using the model
            outputs = model(inputs)
            
            # Compute loss
            loss = loss_criterion(outputs, labels)
            
            # Backpropagate the gradients
            loss.backward()
            
            # Update the parameters
            optimizer.step()
            
            # Compute the total loss for the batch and add it to train_loss
            train_loss += loss.item() * inputs.size(0)
            
            # Compute the accuracy
            ret, predictions = torch.max(outputs.data, 1)
            correct_counts = predictions.eq(labels.data.view_as(predictions))
            
            # Convert correct_counts to float and then compute the mean
            acc = torch.mean(correct_counts.type(torch.FloatTensor))
            
            # Compute total accuracy in the whole batch and add to train_acc
            train_acc += acc.item() * inputs.size(0)
            
            #print("Batch number: {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}".format(i, loss.item(), acc.item()))

            
        # Validation - No gradient tracking needed
        with torch.no_grad():

            # Set to evaluation mode
            model.eval()

            # Validation loop
            for j, (inputs, labels) in enumerate(valid_data_loader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Forward pass - compute outputs on input data using the model
                outputs = model(inputs)

                # Compute loss
                loss = loss_criterion(outputs, labels)

                # Compute the total loss for the batch and add it to valid_loss
                valid_loss += loss.item() * inputs.size(0)

                # Calculate validation accuracy
                ret, predictions = torch.max(outputs.data, 1)
                correct_counts = predictions.eq(labels.data.view_as(predictions))

                # Convert correct_counts to float and then compute the mean
                acc = torch.mean(correct_counts.type(torch.FloatTensor))

                # Compute total accuracy in the whole batch and add to valid_acc
                valid_acc += acc.item() * inputs.size(0)

                #print("Validation Batch number: {:03d}, Validation: Loss: {:.4f}, Accuracy: {:.4f}".format(j, loss.item(), acc.item()))
            
        # Find average training loss and training accuracy
        avg_train_loss = train_loss/train_data_size 
        avg_train_acc = train_acc/train_data_size

        # Find average training loss and training accuracy
        avg_valid_loss = valid_loss/valid_data_size 
        avg_valid_acc = valid_acc/valid_data_size

        history.append([avg_train_loss, avg_valid_loss, avg_train_acc, avg_valid_acc])
                
        epoch_end = time.time()
    
        print("Epoch : {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}%, \n\t\tValidation : Loss : {:.4f}, Accuracy: {:.4f}%, Time: {:.4f}s".format(epoch, avg_train_loss, avg_train_acc*100, avg_valid_loss, avg_valid_acc*100, epoch_end-epoch_start))
        
        # Save if the model has best accuracy till now
        torch.save(model, dataset+'_model_'+str(epoch)+'.pt')
            
    return model, history

# Load pretrained ResNet50 Model
resnet50 = models.resnet50(pretrained=True)
#resnet50 = resnet50.to('cuda:0')


# Freeze model parameters
for param in resnet50.parameters():
    param.requires_grad = False
# Change the final layer of ResNet50 Model for Transfer Learning
fc_inputs = resnet50.fc.in_features

resnet50.fc = nn.Sequential(
    nn.Linear(fc_inputs, 256),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(256, num_classes), # Since 10 possible outputs
    nn.LogSoftmax(dim=1) # For using NLLLoss()
)

# Convert model to be used on GPU
# resnet50 = resnet50.to('cuda:0')

# Change the final layer of ResNet50 Model for Transfer Learning
fc_inputs = resnet50.fc.in_features

resnet50.fc = nn.Sequential(
    nn.Linear(fc_inputs, 256),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(256, num_classes), # Since 10 possible outputs
    nn.LogSoftmax(dienter code herem=1) # For using NLLLoss()
)

# Convert model to be used on GPU
# resnet50 = resnet50.to('cuda:0')`enter code here`

Error is this:

--------------------------------------------------------------------------- 
RuntimeError                              Traceback (most recent call last) <ipython-input-30-c7a333acb694> in <module>()
      6 # Train the model for 25 epochs
      7 num_epochs = 30
----> 8 trained_model, history = train_and_validate(resnet50, loss_func, optimizer, num_epochs)
      9 
     10 torch.save(history, dataset+'_history.pt')

<ipython-input-29-239f590d090e> in train_and_validate(model, loss_criterion, optimizer, epochs)
     43 
     44             # Compute loss
---> 45             loss = loss_criterion(outputs, labels)
     46 
     47             # Backpropagate the gradients

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in
__call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

~\Anaconda3\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
    202 
    203     def forward(self, input, target):
--> 204         return F.nll_loss(input, target, weight=self.weight, ignore_index=self.ignore_index, reduction=self.reduction)
    205 
    206 

~\Anaconda3\lib\site-packages\torch\nn\functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)    1836                          .format(input.size(0), target.size(0)))    1837     if dim == 2:
-> 1838         ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)    1839     elif dim == 4:    1840         ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)

RuntimeError: Assertion `cur_target >= 0 && cur_target < n_classes' failed.  at C:\Users\builder\AppData\Local\Temp\pip-req-build-0i480kur\aten\src\THNN/generic/ClassNLLCriterion.c:97

Solution

This happens when there are either incorrect labels in your dataset, or the labels are 1-indexed (instead of 0-indexed). As from the error message, cur_target must be smaller than the total number of classes (10). To verify the issue, check the maximum and minimum label in your dataset. If the data is indeed 1-indexed, just minus one from all annotations and you should be fine.

Note, another possible reason is that there exists some -1 labels in the data. Some (esp older) datasets use -1 as indication of a wrong/dubious label. If you find such labels, just discard them.