MobileNet-Caffe icon indicating copy to clipboard operation
MobileNet-Caffe copied to clipboard

Loss = -nan

Open 3ntr0phy opened this issue 6 years ago • 3 comments

Hi, I'm trying to train my own data. This is the solver : net: "train_val.prototxt" #test_initialization: false #test_iter: 100 #test_interval: 1000 display: 20 average_loss: 20 base_lr: 0.000001 lr_policy: "poly" power: 1.0 max_iter: 500 momentum: 0.9 weight_decay: 0.0001 snapshot: 100 snapshot_prefix: "mobilenet"

this is the train_val : name: "MOBILENET"

transform_param {

scale: 0.017

mirror: false

crop_size: 224

mean_value: [103.94,116.78,123.68]

}

layer { name: "data" type: "ImageData" top: "data" top: "label" include { phase: TRAIN } transform_param { mean_file: "imagenet_mean.binaryproto" mirror:false } image_data_param { source: "./train.txt" batch_size: 16 new_height: 256 new_width: 256 root_folder: "/" } }

layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 32 bias_term: false pad: 1 kernel_size: 3 stride: 2 weight_filler { type: "msra" } } } layer { name: "conv1/bn" type: "BatchNorm" bottom: "conv1" top: "conv1" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv1/scale" type: "Scale" bottom: "conv1" top: "conv1" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1" } layer { name: "conv2_1/dw" type: "Convolution" bottom: "conv1" top: "conv2_1/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 32 bias_term: false pad: 1 kernel_size: 3 group: 32 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv2_1/dw/bn" type: "BatchNorm" bottom: "conv2_1/dw" top: "conv2_1/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv2_1/dw/scale" type: "Scale" bottom: "conv2_1/dw" top: "conv2_1/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu2_1/dw" type: "ReLU" bottom: "conv2_1/dw" top: "conv2_1/dw" } layer { name: "conv2_1/sep" type: "Convolution" bottom: "conv2_1/dw" top: "conv2_1/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 64 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv2_1/sep/bn" type: "BatchNorm" bottom: "conv2_1/sep" top: "conv2_1/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv2_1/sep/scale" type: "Scale" bottom: "conv2_1/sep" top: "conv2_1/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu2_1/sep" type: "ReLU" bottom: "conv2_1/sep" top: "conv2_1/sep" } layer { name: "conv2_2/dw" type: "Convolution" bottom: "conv2_1/sep" top: "conv2_2/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 64 bias_term: false pad: 1 kernel_size: 3 group: 64 engine: CAFFE stride: 2 weight_filler { type: "msra" } } } layer { name: "conv2_2/dw/bn" type: "BatchNorm" bottom: "conv2_2/dw" top: "conv2_2/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv2_2/dw/scale" type: "Scale" bottom: "conv2_2/dw" top: "conv2_2/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu2_2/dw" type: "ReLU" bottom: "conv2_2/dw" top: "conv2_2/dw" } layer { name: "conv2_2/sep" type: "Convolution" bottom: "conv2_2/dw" top: "conv2_2/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 128 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv2_2/sep/bn" type: "BatchNorm" bottom: "conv2_2/sep" top: "conv2_2/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv2_2/sep/scale" type: "Scale" bottom: "conv2_2/sep" top: "conv2_2/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu2_2/sep" type: "ReLU" bottom: "conv2_2/sep" top: "conv2_2/sep" } layer { name: "conv3_1/dw" type: "Convolution" bottom: "conv2_2/sep" top: "conv3_1/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 128 bias_term: false pad: 1 kernel_size: 3 group: 128 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv3_1/dw/bn" type: "BatchNorm" bottom: "conv3_1/dw" top: "conv3_1/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv3_1/dw/scale" type: "Scale" bottom: "conv3_1/dw" top: "conv3_1/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu3_1/dw" type: "ReLU" bottom: "conv3_1/dw" top: "conv3_1/dw" } layer { name: "conv3_1/sep" type: "Convolution" bottom: "conv3_1/dw" top: "conv3_1/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 128 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv3_1/sep/bn" type: "BatchNorm" bottom: "conv3_1/sep" top: "conv3_1/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv3_1/sep/scale" type: "Scale" bottom: "conv3_1/sep" top: "conv3_1/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu3_1/sep" type: "ReLU" bottom: "conv3_1/sep" top: "conv3_1/sep" } layer { name: "conv3_2/dw" type: "Convolution" bottom: "conv3_1/sep" top: "conv3_2/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 128 bias_term: false pad: 1 kernel_size: 3 group: 128 engine: CAFFE stride: 2 weight_filler { type: "msra" } } } layer { name: "conv3_2/dw/bn" type: "BatchNorm" bottom: "conv3_2/dw" top: "conv3_2/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv3_2/dw/scale" type: "Scale" bottom: "conv3_2/dw" top: "conv3_2/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu3_2/dw" type: "ReLU" bottom: "conv3_2/dw" top: "conv3_2/dw" } layer { name: "conv3_2/sep" type: "Convolution" bottom: "conv3_2/dw" top: "conv3_2/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 256 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv3_2/sep/bn" type: "BatchNorm" bottom: "conv3_2/sep" top: "conv3_2/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv3_2/sep/scale" type: "Scale" bottom: "conv3_2/sep" top: "conv3_2/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu3_2/sep" type: "ReLU" bottom: "conv3_2/sep" top: "conv3_2/sep" } layer { name: "conv4_1/dw" type: "Convolution" bottom: "conv3_2/sep" top: "conv4_1/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 256 bias_term: false pad: 1 kernel_size: 3 group: 256 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv4_1/dw/bn" type: "BatchNorm" bottom: "conv4_1/dw" top: "conv4_1/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv4_1/dw/scale" type: "Scale" bottom: "conv4_1/dw" top: "conv4_1/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu4_1/dw" type: "ReLU" bottom: "conv4_1/dw" top: "conv4_1/dw" } layer { name: "conv4_1/sep" type: "Convolution" bottom: "conv4_1/dw" top: "conv4_1/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 256 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv4_1/sep/bn" type: "BatchNorm" bottom: "conv4_1/sep" top: "conv4_1/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv4_1/sep/scale" type: "Scale" bottom: "conv4_1/sep" top: "conv4_1/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu4_1/sep" type: "ReLU" bottom: "conv4_1/sep" top: "conv4_1/sep" } layer { name: "conv4_2/dw" type: "Convolution" bottom: "conv4_1/sep" top: "conv4_2/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 256 bias_term: false pad: 1 kernel_size: 3 group: 256 engine: CAFFE stride: 2 weight_filler { type: "msra" } } } layer { name: "conv4_2/dw/bn" type: "BatchNorm" bottom: "conv4_2/dw" top: "conv4_2/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv4_2/dw/scale" type: "Scale" bottom: "conv4_2/dw" top: "conv4_2/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu4_2/dw" type: "ReLU" bottom: "conv4_2/dw" top: "conv4_2/dw" } layer { name: "conv4_2/sep" type: "Convolution" bottom: "conv4_2/dw" top: "conv4_2/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv4_2/sep/bn" type: "BatchNorm" bottom: "conv4_2/sep" top: "conv4_2/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv4_2/sep/scale" type: "Scale" bottom: "conv4_2/sep" top: "conv4_2/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu4_2/sep" type: "ReLU" bottom: "conv4_2/sep" top: "conv4_2/sep" } layer { name: "conv5_1/dw" type: "Convolution" bottom: "conv4_2/sep" top: "conv5_1/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 group: 512 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_1/dw/bn" type: "BatchNorm" bottom: "conv5_1/dw" top: "conv5_1/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_1/dw/scale" type: "Scale" bottom: "conv5_1/dw" top: "conv5_1/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_1/dw" type: "ReLU" bottom: "conv5_1/dw" top: "conv5_1/dw" } layer { name: "conv5_1/sep" type: "Convolution" bottom: "conv5_1/dw" top: "conv5_1/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_1/sep/bn" type: "BatchNorm" bottom: "conv5_1/sep" top: "conv5_1/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_1/sep/scale" type: "Scale" bottom: "conv5_1/sep" top: "conv5_1/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_1/sep" type: "ReLU" bottom: "conv5_1/sep" top: "conv5_1/sep" } layer { name: "conv5_2/dw" type: "Convolution" bottom: "conv5_1/sep" top: "conv5_2/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 group: 512 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_2/dw/bn" type: "BatchNorm" bottom: "conv5_2/dw" top: "conv5_2/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_2/dw/scale" type: "Scale" bottom: "conv5_2/dw" top: "conv5_2/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_2/dw" type: "ReLU" bottom: "conv5_2/dw" top: "conv5_2/dw" } layer { name: "conv5_2/sep" type: "Convolution" bottom: "conv5_2/dw" top: "conv5_2/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_2/sep/bn" type: "BatchNorm" bottom: "conv5_2/sep" top: "conv5_2/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_2/sep/scale" type: "Scale" bottom: "conv5_2/sep" top: "conv5_2/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_2/sep" type: "ReLU" bottom: "conv5_2/sep" top: "conv5_2/sep" } layer { name: "conv5_3/dw" type: "Convolution" bottom: "conv5_2/sep" top: "conv5_3/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 group: 512 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_3/dw/bn" type: "BatchNorm" bottom: "conv5_3/dw" top: "conv5_3/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_3/dw/scale" type: "Scale" bottom: "conv5_3/dw" top: "conv5_3/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_3/dw" type: "ReLU" bottom: "conv5_3/dw" top: "conv5_3/dw" } layer { name: "conv5_3/sep" type: "Convolution" bottom: "conv5_3/dw" top: "conv5_3/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_3/sep/bn" type: "BatchNorm" bottom: "conv5_3/sep" top: "conv5_3/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_3/sep/scale" type: "Scale" bottom: "conv5_3/sep" top: "conv5_3/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_3/sep" type: "ReLU" bottom: "conv5_3/sep" top: "conv5_3/sep" } layer { name: "conv5_4/dw" type: "Convolution" bottom: "conv5_3/sep" top: "conv5_4/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 group: 512 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_4/dw/bn" type: "BatchNorm" bottom: "conv5_4/dw" top: "conv5_4/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_4/dw/scale" type: "Scale" bottom: "conv5_4/dw" top: "conv5_4/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_4/dw" type: "ReLU" bottom: "conv5_4/dw" top: "conv5_4/dw" } layer { name: "conv5_4/sep" type: "Convolution" bottom: "conv5_4/dw" top: "conv5_4/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_4/sep/bn" type: "BatchNorm" bottom: "conv5_4/sep" top: "conv5_4/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_4/sep/scale" type: "Scale" bottom: "conv5_4/sep" top: "conv5_4/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_4/sep" type: "ReLU" bottom: "conv5_4/sep" top: "conv5_4/sep" } layer { name: "conv5_5/dw" type: "Convolution" bottom: "conv5_4/sep" top: "conv5_5/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 group: 512 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_5/dw/bn" type: "BatchNorm" bottom: "conv5_5/dw" top: "conv5_5/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_5/dw/scale" type: "Scale" bottom: "conv5_5/dw" top: "conv5_5/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_5/dw" type: "ReLU" bottom: "conv5_5/dw" top: "conv5_5/dw" } layer { name: "conv5_5/sep" type: "Convolution" bottom: "conv5_5/dw" top: "conv5_5/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_5/sep/bn" type: "BatchNorm" bottom: "conv5_5/sep" top: "conv5_5/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_5/sep/scale" type: "Scale" bottom: "conv5_5/sep" top: "conv5_5/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_5/sep" type: "ReLU" bottom: "conv5_5/sep" top: "conv5_5/sep" } layer { name: "conv5_6/dw" type: "Convolution" bottom: "conv5_5/sep" top: "conv5_6/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 group: 512 engine: CAFFE stride: 2 weight_filler { type: "msra" } } } layer { name: "conv5_6/dw/bn" type: "BatchNorm" bottom: "conv5_6/dw" top: "conv5_6/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_6/dw/scale" type: "Scale" bottom: "conv5_6/dw" top: "conv5_6/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_6/dw" type: "ReLU" bottom: "conv5_6/dw" top: "conv5_6/dw" } layer { name: "conv5_6/sep" type: "Convolution" bottom: "conv5_6/dw" top: "conv5_6/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 1024 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_6/sep/bn" type: "BatchNorm" bottom: "conv5_6/sep" top: "conv5_6/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_6/sep/scale" type: "Scale" bottom: "conv5_6/sep" top: "conv5_6/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_6/sep" type: "ReLU" bottom: "conv5_6/sep" top: "conv5_6/sep" } layer { name: "conv6/dw" type: "Convolution" bottom: "conv5_6/sep" top: "conv6/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 1024 bias_term: false pad: 1 kernel_size: 3 group: 1024 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv6/dw/bn" type: "BatchNorm" bottom: "conv6/dw" top: "conv6/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv6/dw/scale" type: "Scale" bottom: "conv6/dw" top: "conv6/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu6/dw" type: "ReLU" bottom: "conv6/dw" top: "conv6/dw" } layer { name: "conv6/sep" type: "Convolution" bottom: "conv6/dw" top: "conv6/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 1024 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv6/sep/bn" type: "BatchNorm" bottom: "conv6/sep" top: "conv6/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv6/sep/scale" type: "Scale" bottom: "conv6/sep" top: "conv6/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu6/sep" type: "ReLU" bottom: "conv6/sep" top: "conv6/sep" } layer { name: "pool6" type: "Pooling" bottom: "conv6/sep" top: "pool6" pooling_param { pool: AVE global_pooling: true } } layer { name: "fc7" type: "Convolution" bottom: "pool6" top: "fc7" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 42 kernel_size: 1 weight_filler { type: "msra" } bias_filler { type: "constant" value: 0 } } } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc7" bottom: "label" top: "loss" } layer { name: "top1/acc" type: "Accuracy" bottom: "fc7" bottom: "label" top: "top1/acc" include { phase: TEST } } layer { name: "top5/acc" type: "Accuracy" bottom: "fc7" bottom: "label" top: "top5/acc" include { phase: TEST } accuracy_param { top_k: 5 } }

I tried also to replace the mean as you said in your tutorial but also I get

I0421 13:23:50.226541 2808 solver.cpp:218] Iteration 0 (-1.82169e-44 iter/s, 40.202s/20 iters), loss = -nan I0421 13:23:50.226686 2808 solver.cpp:237] Train net output #0: loss = -nan (* 1 = -nan loss) I0421 13:23:50.226697 2808 sgd_solver.cpp:105] Iteration 0, lr = 1e-06

I really don't understand why loss goes to -nan

3ntr0phy avatar Apr 21 '18 20:04 3ntr0phy

I got this problem too, have you find the reason?

zhangnn016 avatar May 18 '18 02:05 zhangnn016

@Jacoppy @zhangnn016 try to remove all use_global_stats: true in the batch_norm_param, let the BatchNorm layers be in a default value

TerryBryant avatar May 22 '18 13:05 TerryBryant

@Jacoppy @zhangnn016 try to remove all use_global_stats: true in the batch_norm_param, let the BatchNorm layers be in a default value

Thank you very much, it works for me!

NarcissusInMirror avatar Oct 21 '19 02:10 NarcissusInMirror