image-processing deep-learning caffe lmdb

The loss remains constant everytime I run the caffe model on the dataset

The initial loss which is computed when my model is run on the image dataset is remaining constant everytime I run caffe. Isn't the behavior strange , since the initial loss should be different (atleast slightly) when we run the model every-time.

Also the loss value is remaining constant across the further iterations if I give SoftmaxWithLoss function in the loss layer. Also, if I give lr_mult=0 in the deconvolution layer, the loss is appearing constant across iterations. If I give lr_mult=1, then the loss value changes , but finally will meet the intial loss value after few thousand iterations.

Any ideas in this regard?

Any help is highly appreciated in this regard.

The following is my solver file:

test_iter: 100
test_interval: 100 # py solving tests
display: 100
#average_loss: 100
lr_policy: "fixed"
base_lr: 0.0000001
momentum: 0.5
iter_size: 1
# base_lr: 1e-9
# momentum: 0.99
# iter_size: 1
max_iter: 1000000
#weight_decay: 0.0005
snapshot: 1000
snapshot_diff: true 
#test_initialization: false
solver_mode: GPU

The following is the architecture of my convolutional neural network.

# Input 128 640 3
# Conv1 (kernel=3) 126 638 64
# ReLU
# Conv2 (kernel=3) 124 636 128
# ReLU
# Conv3 (kernel=3) 122 634 256
# ReLU
# Pool1 (kernel=2) 61 317 256
# Conv4 (kernel=4) 58 314 512
# ReLU
# Conv5 (kernel=4) 55 311 1024
# ReLU
# Conv6 (kernel=4) 52 308 512
# ReLU
# Pool2 (kernel=2) 26 154 512
# Conv7 (kernel=4,stride=2,pad=3) 15 79 5
# ReLU
# Decon1 (kernel=16,stride=8,pad=3) 128 640 5
# ReLU
# Loss

name: "Conv-Deconv-Net"
layer {
  name: "data"
  type: "Data"
  top: "data"
  include {
    phase: TRAIN
  }
  data_param {
    source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/training_lmdb_files/training_files_orig_IMG_LMDB_olpywm"
    batch_size: 4
    backend: LMDB
  }
}
layer {
  name: "label"
  type: "Data"
  top: "label"
  include {
    phase: TRAIN
  }
  data_param {
    source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/training_lmdb_files/training_files_orig_LBL_LMDB_olpywm"
    batch_size: 4
    backend: LMDB
  }
}
layer {
  name: "data"
  type: "Data"
  top: "data"
  include {
    phase: TEST
  }
  data_param {
    source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/testing_lmdb_files/testing_files_IMG_LMDB_olpywm"
    batch_size: 4
    backend: LMDB
  }
}
layer {
  name: "label"
  type: "Data"
  top: "label"
  include {
    phase: TEST
  }
  data_param {
    source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/testing_lmdb_files/testing_files_LBL_LMDB_olpywm"
    batch_size: 4
    backend: LMDB
  }
}
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    stride: 1
    pad: 0
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "conv1"
  top: "relu1"
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "relu1"
  top: "conv2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 0
    kernel_size: 3
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu2"
  type: "ReLU"
  bottom: "conv2"
  top: "relu2"
}
layer {
  name: "conv3"
  type: "Convolution"
  bottom: "relu2"
  top: "conv3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 0
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.001
    }
  }
}
layer {
  name: "relu3"
  type: "ReLU"
  bottom: "conv3"
  top: "relu3"
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "relu3"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}

layer {
  name: "conv4"
  type: "Convolution"
  bottom: "pool1"
  top: "conv4"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 0
    kernel_size: 4
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.001
    }
  }
}
layer {
  name: "relu4"
  type: "ReLU"
  bottom: "conv4"
  top: "relu4"
}

layer {
  name: "conv5"
  type: "Convolution"
  bottom: "relu4"
  top: "conv5"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 0
    kernel_size: 4
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.001
    }
  }
}
layer {
  name: "relu5"
  type: "ReLU"
  bottom: "conv5"
  top: "relu5"
}

layer {
  name: "conv6"
  type: "Convolution"
  bottom: "relu5"
  top: "conv6"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 0
    kernel_size: 4
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.001
    }
  }
}
layer {
  name: "relu6"
  type: "ReLU"
  bottom: "conv6"
  top: "relu6"
}

layer {
  name: "pool2"
  type: "Pooling"
  bottom: "relu6"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}

layer {
  name: "conv7"
  type: "Convolution"
  bottom: "pool2"
  top: "conv7"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 5
    pad: 3
    kernel_size: 4
    stride: 2
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.001
    }
  }
}
layer {
  name: "relu7"
  type: "ReLU"
  bottom: "conv7"
  top: "relu7"
}


layer {
  name: "deconv1"
  type: "Deconvolution"
  bottom: "relu7"
  top: "deconv1"
  param {
    lr_mult: 1
  }
  convolution_param {
    num_output: 5
    bias_term: false
    kernel_size: 16
    stride: 8
    pad: 0
    weight_filler {
      type: "bilinear"
    }
  }
}
layer {
  name: "relu8"
  type: "ReLU"
  bottom: "deconv1"
  top: "relu8"
}


#layer {
#  name: "crop"
#  type: "Crop"
#  bottom: "deconv3"
#  bottom: "data"
#  top: "score"
#}
layer {
  name: "prob"
  type: "SoftmaxWithLoss"
  bottom: "relu8"
  bottom: "label"
  top: "loss"
  loss_param {
    # ignore_label: 255
    # normalize: true
    normalize: false
  }
}

Solution

It seems your base_lr is too small. Keep it at 0.00001 in the beginning and try keeping your momentum at 0.9. If your learning rate is too small, convergence will be very slow and if it is too high gradient descent will overshoot the local minima (that's when you see your loss shoots up). So you have to come at an optimal value, iteratively. There is no magic number for this.