Search code examples
tensorflowobject-detection-api

Tensorflow object detection API classification loss increases


I'm training tensorflow object detection API with my own data. The model I'm using is ssd_mobilenet_v1 with pretrained coco checkpoint.
My dataset consist of 12 classes and each class has 110 images so total 1320 of them.
This works fine, but classification loss increases at some point.
enter image description here

I think insufficient of dataset doesn't matter in training phase because they are all similar; actually I extracted them from video.

So what should do? Should I stop training around 10k iteration? or is there any possible parameter tuning or data augmentation?

This is my config file which I just adjusted directories and data_augmentation_option and hard_example_miner.

model {
  ssd {
    num_classes: 12
    box_coder {
      faster_rcnn_box_coder {
        y_scale: 10.0
        x_scale: 10.0
        height_scale: 5.0
        width_scale: 5.0
      }
    }
    matcher {
      argmax_matcher {
        matched_threshold: 0.5
        unmatched_threshold: 0.5
        ignore_thresholds: false
        negatives_lower_than_unmatched: true
        force_match_for_each_row: true
      }
    }
    similarity_calculator {
      iou_similarity {
      }
    }
    anchor_generator {
      ssd_anchor_generator {
        num_layers: 6
        min_scale: 0.2
        max_scale: 0.95
        aspect_ratios: 1.0
        aspect_ratios: 2.0
        aspect_ratios: 0.5
        aspect_ratios: 3.0
        aspect_ratios: 0.3333
      }
    }
    image_resizer {
      fixed_shape_resizer {
        height: 320
        width: 180
      }
    }
    box_predictor {
      convolutional_box_predictor {
        min_depth: 0
        max_depth: 0
        num_layers_before_predictor: 0
        use_dropout: false
        dropout_keep_probability: 0.8
        kernel_size: 1
        box_code_size: 4
        apply_sigmoid_to_scores: false
        conv_hyperparams {
          activation: RELU_6,
          regularizer {
            l2_regularizer {
              weight: 0.00004
            }
          }
          initializer {
            truncated_normal_initializer {
              stddev: 0.03
              mean: 0.0
            }
          }
          batch_norm {
            train: true,
            scale: true,
            center: true,
            decay: 0.9997,
            epsilon: 0.001,
          }
        }
      }
    }
    feature_extractor {
      type: 'ssd_mobilenet_v1'
      min_depth: 16
      depth_multiplier: 1.0
      conv_hyperparams {
        activation: RELU_6,
        regularizer {
          l2_regularizer {
            weight: 0.00004
          }
        }
        initializer {
          truncated_normal_initializer {
            stddev: 0.03
            mean: 0.0
          }
        }
        batch_norm {
          train: true,
          scale: true,
          center: true,
          decay: 0.9997,
          epsilon: 0.001,
        }
      }
    }
    loss {
      classification_loss {
        weighted_sigmoid {
        }
      }
      localization_loss {
        weighted_smooth_l1 {
        }
      }
      hard_example_miner {
        num_hard_examples: 600
        iou_threshold: 0.99
        loss_type: CLASSIFICATION
        max_negatives_per_positive: 3
        min_negatives_per_image: 0
      }
      classification_weight: 1.0
      localization_weight: 1.0
    }
    normalize_loss_by_num_matches: true
    post_processing {
      batch_non_max_suppression {
        score_threshold: 1e-8
        iou_threshold: 0.6
        max_detections_per_class: 100
        max_total_detections: 100
      }
      score_converter: SIGMOID
    }
  }
}

train_config: {
  batch_size: 96
  optimizer {
    rms_prop_optimizer: {
      learning_rate: {
        exponential_decay_learning_rate {
          initial_learning_rate: 0.004
          decay_steps: 800720
          decay_factor: 0.95
        }
      }
      momentum_optimizer_value: 0.9
      decay: 0.9
      epsilon: 1.0
    }
  }
  fine_tune_checkpoint: "/home/dev1/tensorflow/training/data/checkpoint/ssd_mobilenet_v1_coco_2018_01_28/model.ckpt"
  fine_tune_checkpoint_type:  "detection"
  from_detection_checkpoint: true
  num_steps: 100000
  data_augmentation_options {
    random_horizontal_flip {
    }
  }
  data_augmentation_options {
    ssd_random_crop {
    }
  }
  data_augmentation_options {
    random_adjust_brightness {
    }
  }
}

train_input_reader: {
  tf_record_input_reader {
    input_path: "/home/dev1/tensorflow/training/data/train.record"
  }
  label_map_path: "/home/dev1/tensorflow/training/data/config/label.pbtxt"
}

eval_config: {
  num_examples: 132
  max_evals: 20
}

eval_input_reader: {
  tf_record_input_reader {
    input_path: "/home/dev1/tensorflow/training/data/test.record"
  }
  label_map_path: "/home/dev1/tensorflow/training/data/config/label.pbtxt"
  shuffle: false
  num_readers: 1
}


Solution

  • Providing the solution here (Answer Section) even though it is present in the Comment Section (Thanks to Shayan Tabatabaee), for the benefit of the community.

    This issue was due to high learning rate and it was fixed by decreasing decay_steps from 800720 to 5000.

    Please refer modified config file in below

    model {
      ssd {
        num_classes: 12
        box_coder {
          faster_rcnn_box_coder {
            y_scale: 10.0
            x_scale: 10.0
            height_scale: 5.0
            width_scale: 5.0
          }
        }
        matcher {
          argmax_matcher {
            matched_threshold: 0.5
            unmatched_threshold: 0.5
            ignore_thresholds: false
            negatives_lower_than_unmatched: true
            force_match_for_each_row: true
          }
        }
        similarity_calculator {
          iou_similarity {
          }
        }
        anchor_generator {
          ssd_anchor_generator {
            num_layers: 6
            min_scale: 0.2
            max_scale: 0.95
            aspect_ratios: 1.0
            aspect_ratios: 2.0
            aspect_ratios: 0.5
            aspect_ratios: 3.0
            aspect_ratios: 0.3333
          }
        }
        image_resizer {
          fixed_shape_resizer {
            height: 320
            width: 180
          }
        }
        box_predictor {
          convolutional_box_predictor {
            min_depth: 0
            max_depth: 0
            num_layers_before_predictor: 0
            use_dropout: false
            dropout_keep_probability: 0.8
            kernel_size: 1
            box_code_size: 4
            apply_sigmoid_to_scores: false
            conv_hyperparams {
              activation: RELU_6,
              regularizer {
                l2_regularizer {
                  weight: 0.00004
                }
              }
              initializer {
                truncated_normal_initializer {
                  stddev: 0.03
                  mean: 0.0
                }
              }
              batch_norm {
                train: true,
                scale: true,
                center: true,
                decay: 0.9997,
                epsilon: 0.001,
              }
            }
          }
        }
        feature_extractor {
          type: 'ssd_mobilenet_v1'
          min_depth: 16
          depth_multiplier: 1.0
          conv_hyperparams {
            activation: RELU_6,
            regularizer {
              l2_regularizer {
                weight: 0.00004
              }
            }
            initializer {
              truncated_normal_initializer {
                stddev: 0.03
                mean: 0.0
              }
            }
            batch_norm {
              train: true,
              scale: true,
              center: true,
              decay: 0.9997,
              epsilon: 0.001,
            }
          }
        }
        loss {
          classification_loss {
            weighted_sigmoid {
            }
          }
          localization_loss {
            weighted_smooth_l1 {
            }
          }
          hard_example_miner {
            num_hard_examples: 600
            iou_threshold: 0.99
            loss_type: CLASSIFICATION
            max_negatives_per_positive: 3
            min_negatives_per_image: 0
          }
          classification_weight: 1.0
          localization_weight: 1.0
        }
        normalize_loss_by_num_matches: true
        post_processing {
          batch_non_max_suppression {
            score_threshold: 1e-8
            iou_threshold: 0.6
            max_detections_per_class: 100
            max_total_detections: 100
          }
          score_converter: SIGMOID
        }
      }
    }
    
    train_config: {
      batch_size: 96
      optimizer {
        rms_prop_optimizer: {
          learning_rate: {
            exponential_decay_learning_rate {
              initial_learning_rate: 0.004
              decay_steps: 5000
              decay_factor: 0.95
            }
          }
          momentum_optimizer_value: 0.9
          decay: 0.9
          epsilon: 1.0
        }
      }
      fine_tune_checkpoint: "/home/dev1/tensorflow/training/data/checkpoint/ssd_mobilenet_v1_coco_2018_01_28/model.ckpt"
      fine_tune_checkpoint_type:  "detection"
      from_detection_checkpoint: true
      num_steps: 100000
      data_augmentation_options {
        random_horizontal_flip {
        }
      }
      data_augmentation_options {
        ssd_random_crop {
        }
      }
      data_augmentation_options {
        random_adjust_brightness {
        }
      }
    }
    
    train_input_reader: {
      tf_record_input_reader {
        input_path: "/home/dev1/tensorflow/training/data/train.record"
      }
      label_map_path: "/home/dev1/tensorflow/training/data/config/label.pbtxt"
    }
    
    eval_config: {
      num_examples: 132
      max_evals: 20
    }
    
    eval_input_reader: {
      tf_record_input_reader {
        input_path: "/home/dev1/tensorflow/training/data/test.record"
      }
      label_map_path: "/home/dev1/tensorflow/training/data/config/label.pbtxt"
      shuffle: false
      num_readers: 1
    }