Search code examples
pythondeep-learningpytorchcomputer-visionobject-detection

How can I determine validation loss for faster RCNN (PyTorch)?


I followed this tutorial for object detection: https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html

and their GitHub repository that contains the following train_one_epoch and evaluate functions:

https://github.com/pytorch/vision/blob/main/references/detection/engine.py

However, I want to calculate losses during validation. I implemented this for the evaluation loss, where essentially to obtain losses, model.train() needs to be on:

@torch.no_grad()
def evaluate_loss(model, data_loader, device):
    val_loss = 0
    model.train()
    for images, targets in data_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        val_loss += losses_reduced
  
  validation_loss = val_loss/ len(data_loader)    
  return validation_loss

I then place it after the learning rate scheduler step in my for loop:

 for epoch in range(args.num_epochs):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10)
    
        # update the learning rate
        lr_scheduler.step()

        validation_loss = evaluate_loss(model, valid_data_loader, device=device)

        # evaluate on the test dataset
        evaluate(model, valid_data_loader, device=device)

Does this look correct or can it interfere with training or produce inaccurate validation losses?

If ok, by using this, is there is a simple way in applying early stopping for validation loss?

I'm considering just adding something like this after the evaluate model function shown above:

torch.save({
            'epoch': epoch,
            'model_state_dict': net.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'validation loss': valid_loss,
            }, PATH)

where I also aim to save the model at every epoch for checkpointing purposes. However I need to determine the validation "loss" for saving the "best" model.


Solution

  • So it turns out no stages of the pytorch fasterrcnn return losses when model.eval() is set. However, you can just manually use the forward code to generate the losses in evaluation mode:

    from typing import Tuple, List, Dict, Optional
    import torch
    from torch import Tensor
    from collections import OrderedDict
    from torchvision.models.detection.roi_heads import fastrcnn_loss
    from torchvision.models.detection.rpn import concat_box_prediction_layers
    def eval_forward(model, images, targets):
        # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]
        """
        Args:
            images (list[Tensor]): images to be processed
            targets (list[Dict[str, Tensor]]): ground-truth boxes present in the image (optional)
        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                It returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).
        """
        model.eval()
    
        original_image_sizes: List[Tuple[int, int]] = []
        for img in images:
            val = img.shape[-2:]
            assert len(val) == 2
            original_image_sizes.append((val[0], val[1]))
    
        images, targets = model.transform(images, targets)
    
        # Check for degenerate boxes
        # TODO: Move this to a function
        if targets is not None:
            for target_idx, target in enumerate(targets):
                boxes = target["boxes"]
                degenerate_boxes = boxes[:, 2:] <= boxes[:, :2]
                if degenerate_boxes.any():
                    # print the first degenerate box
                    bb_idx = torch.where(degenerate_boxes.any(dim=1))[0][0]
                    degen_bb: List[float] = boxes[bb_idx].tolist()
                    raise ValueError(
                        "All bounding boxes should have positive height and width."
                        f" Found invalid box {degen_bb} for target at index {target_idx}."
                    )
    
        features = model.backbone(images.tensors)
        if isinstance(features, torch.Tensor):
            features = OrderedDict([("0", features)])
        model.rpn.training=True
        #model.roi_heads.training=True
    
    
        #####proposals, proposal_losses = model.rpn(images, features, targets)
        features_rpn = list(features.values())
        objectness, pred_bbox_deltas = model.rpn.head(features_rpn)
        anchors = model.rpn.anchor_generator(images, features_rpn)
    
        num_images = len(anchors)
        num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness]
        num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors]
        objectness, pred_bbox_deltas = concat_box_prediction_layers(objectness, pred_bbox_deltas)
        # apply pred_bbox_deltas to anchors to obtain the decoded proposals
        # note that we detach the deltas because Faster R-CNN do not backprop through
        # the proposals
        proposals = model.rpn.box_coder.decode(pred_bbox_deltas.detach(), anchors)
        proposals = proposals.view(num_images, -1, 4)
        proposals, scores = model.rpn.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level)
    
        proposal_losses = {}
        assert targets is not None
        labels, matched_gt_boxes = model.rpn.assign_targets_to_anchors(anchors, targets)
        regression_targets = model.rpn.box_coder.encode(matched_gt_boxes, anchors)
        loss_objectness, loss_rpn_box_reg = model.rpn.compute_loss(
            objectness, pred_bbox_deltas, labels, regression_targets
        )
        proposal_losses = {
            "loss_objectness": loss_objectness,
            "loss_rpn_box_reg": loss_rpn_box_reg,
        }
    
        #####detections, detector_losses = model.roi_heads(features, proposals, images.image_sizes, targets)
        image_shapes = images.image_sizes
        proposals, matched_idxs, labels, regression_targets = model.roi_heads.select_training_samples(proposals, targets)
        box_features = model.roi_heads.box_roi_pool(features, proposals, image_shapes)
        box_features = model.roi_heads.box_head(box_features)
        class_logits, box_regression = model.roi_heads.box_predictor(box_features)
    
        result: List[Dict[str, torch.Tensor]] = []
        detector_losses = {}
        loss_classifier, loss_box_reg = fastrcnn_loss(class_logits, box_regression, labels, regression_targets)
        detector_losses = {"loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg}
        boxes, scores, labels = model.roi_heads.postprocess_detections(class_logits, box_regression, proposals, image_shapes)
        num_images = len(boxes)
        for i in range(num_images):
            result.append(
                {
                    "boxes": boxes[i],
                    "labels": labels[i],
                    "scores": scores[i],
                }
            )
        detections = result
        detections = model.transform.postprocess(detections, images.image_sizes, original_image_sizes)  # type: ignore[operator]
        model.rpn.training=False
        model.roi_heads.training=False
        losses = {}
        losses.update(detector_losses)
        losses.update(proposal_losses)
        return losses, detections
    

    Testing this code gives me:

    import torchvision
    from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
    
    # load a model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    
    # replace the classifier with a new one, that has
    # num_classes which is user-defined
    num_classes = 2  # 1 class (person) + background
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    losses, detections = eval_forward(model,torch.randn([1,3,300,300]),[{'boxes':torch.tensor([[100,100,200,200]]),'labels':torch.tensor([0])}])
    
    {'loss_classifier': tensor(0.6594, grad_fn=<NllLossBackward0>),
    'loss_box_reg': tensor(0., grad_fn=<DivBackward0>),
     'loss_objectness': tensor(0.5108, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>),
     'loss_rpn_box_reg': tensor(0.0160, grad_fn=<DivBackward0>)}