DIGITS NVCaffe's BatchNormLayer is incompatible with BVLC caffe

While loading the pretrained Yolo v2 model (based on @gklz1982's work) which has BatchNormLayer on it, I got that error message.

So, I thought there might be some difference between Nvidia Caffe and BVLC Caffe, especially for Batch Norm layer.

On BVLC Caffe, Batch Normalization only use blobs with size 3,

void BatchNormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  BatchNormParameter param = this->layer_param_.batch_norm_param();
  moving_average_fraction_ = param.moving_average_fraction();
  use_global_stats_ = this->phase_ == TEST;
  if (param.has_use_global_stats())
    use_global_stats_ = param.use_global_stats();
  if (bottom[0]->num_axes() == 1)
    channels_ = 1;
  else
    channels_ = bottom[0]->shape(1);
  eps_ = param.eps();
  if (this->blobs_.size() > 0) {
    LOG(INFO) << "Skipping parameter initialization";
  } else {
    this->blobs_.resize(3);
    vector<int> sz;
    sz.push_back(channels_);
    this->blobs_[0].reset(new Blob<Dtype>(sz));
    this->blobs_[1].reset(new Blob<Dtype>(sz));
    sz[0] = 1;
    this->blobs_[2].reset(new Blob<Dtype>(sz));
    for (int i = 0; i < 3; ++i) {
      caffe_set(this->blobs_[i]->count(), Dtype(0),
                this->blobs_[i]->mutable_cpu_data());
    }
  }
  // Mask statistics from optimization by setting local learning rates
  // for mean, variance, and the bias correction to zero.
  for (int i = 0; i < this->blobs_.size(); ++i) {
    if (this->layer_param_.param_size() == i) {
      ParamSpec* fixed_param_spec = this->layer_param_.add_param();
      fixed_param_spec->set_lr_mult(0.f);
    } else {
      CHECK_EQ(this->layer_param_.param(i).lr_mult(), 0.f)
          << "Cannot configure batch normalization statistics as layer "
          << "parameters.";
    }
  }
}

However, for Nvidia Caffe, it requires blobs (or this->blobs_) size of 5.

template<typename Ftype, typename Btype>
void
BatchNormLayer<Ftype, Btype>::LayerSetUp(const vector<Blob*>& bottom, const vector<Blob*>& top) {
  BatchNormParameter param = this->layer_param_.batch_norm_param();
  moving_average_fraction_ = param.moving_average_fraction();

  clip_variance_ = false;
  //use_global_stats_ = false;
  use_global_stats_= param.use_global_stats();

  if (bottom[0]->num_axes() == 1)
    channels_ = 1;
  else
    channels_ = bottom[0]->shape(1);
  eps_ = std::max<float>(param.eps(), 0.00001f);

  scale_bias_ = false;
  scale_bias_ = param.scale_bias(); // by default = false;
  if (param.has_scale_filler() || param.has_bias_filler()) { // implicit set
    scale_bias_ = true;
  }

  if (this->blobs_.size() > 0) {
    LOG(INFO) << "Skipping parameter initialization";
  } else {
    if (scale_bias_)
      this->blobs_.resize(5);
    else
      this->blobs_.resize(3);

    const Type btype = blobs_type();
    const vector<int> shape { channels_ };
    const vector<int> shape1 { 1 };
    this->blobs_[0] = Blob::create(btype, btype);  // mean
    this->blobs_[0]->Reshape(shape);
    this->blobs_[0]->set_data(0.);
    this->blobs_[1] = Blob::create(btype, btype);  // variance1
    this->blobs_[1]->Reshape(shape);
    this->blobs_[1]->set_data(0.);
    this->blobs_[2] = Blob::create(btype, btype);  // variance correction
    this->blobs_[2]->Reshape(shape1);
    this->blobs_[2]->set_data(1.);
    if (scale_bias_) {
      this->blobs_[3] = Blob::create(btype, btype);  // scale
      this->blobs_[3]->Reshape(shape);
      this->blobs_[4] = Blob::create(btype, btype);  // bias
      this->blobs_[4]->Reshape(shape);
      if (param.has_scale_filler()) {
        // TODO
        if (btype == tp<Ftype>()) {
          shared_ptr<Filler<Ftype>> scale_filler(
              GetFiller<Ftype>(this->layer_param_.batch_norm_param().scale_filler()));
          scale_filler->Fill(this->blobs_[3].get());
        } else {
          shared_ptr<Filler<float>> scale_filler(
              GetFiller<float>(this->layer_param_.batch_norm_param().scale_filler()));
          scale_filler->Fill(this->blobs_[3].get());
        }
      } else {
        this->blobs_[3]->set_data(1.);
      }
      if (param.has_bias_filler()) {
        // TODO
        if (btype == tp<Ftype>()) {
          shared_ptr<Filler<Ftype>> bias_filler(
              GetFiller<Ftype>(this->layer_param_.batch_norm_param().bias_filler()));
          bias_filler->Fill(this->blobs_[4].get());
        } else {
          shared_ptr<Filler<float>> bias_filler(
              GetFiller<float>(this->layer_param_.batch_norm_param().bias_filler()));
          bias_filler->Fill(this->blobs_[4].get());
        }
      } else {
        this->blobs_[4]->set_data(0.);
      }
    }
    iter_ = 0;
  }

I thought I can block Nvidia Caffe to access on this->blobs variable's 4th and 5th elements by setting scale_bias to false. Also, on NVCaffe Document, it is written that there are scale_bias Setting under batch_norm_param.

However, I got error message that there is no field named "scale_bias" under "caffe.BatchNormParameter".

This is my prototxt for classification with darknet 19 (which is the base network for yolo v2).

Is there anyone who resolve this issue???

name: "YOLONET"

layer{
  name: "train-data"
  type: "Data"
  top: "data"
  top: "label"
  data_param {
    batch_size: 16
  }
  image_data_param {
    shuffle: true
    new_height: 416
    new_width: 416
  }
  transform_param {
    mirror: true
  }
  include: { stage: "train" }
}

layer{
  name: "val-data"
  type: "Data"
  top: "data"
  top: "label"
  data_param {
    batch_size: 16
  }
  image_data_param {
    shuffle: true
    new_height: 416
    new_width: 416
  }
  transform_param {
    mirror: true
  }
  include: { stage: "val"  }
}

layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 32
    kernel_size: 3
    pad: 1
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn1"
  type: "BatchNorm"
  bottom: "conv1"
  top: "bn1"
  batch_norm_param {
    scale_bias: false
  }
}
layer {
  name: "scale1"
  type: "Scale"
  bottom: "bn1"
  top: "scale1"
  scale_param {
    bias_term: false
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "scale1"
  top: "scale1"
  relu_param{
    negative_slope: 0.1
  }		
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "scale1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer{
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn2"
  type: "BatchNorm"
  bottom: "conv2"
  top: "bn2"
}
layer {
  name: "scale2"
  type: "Scale"
  bottom: "bn2"
  top: "scale2"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu2"
  type: "ReLU"
  bottom: "scale2"
  top: "scale2"
  relu_param{
    negative_slope: 0.1
  }   
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "scale2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}

layer{
  name: "conv3"
  type: "Convolution"
  bottom: "pool2"
  top: "conv3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn3"
  type: "BatchNorm"
  bottom: "conv3"
  top: "bn3"
}
layer {
  name: "scale3"
  type: "Scale"
  bottom: "bn3"
  top: "scale3"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu3"
  type: "ReLU"
  bottom: "scale3"
  top: "scale3"
  relu_param{
    negative_slope: 0.1
  }		
}


layer{
  name: "conv4"
  type: "Convolution"
  bottom: "scale3"
  top: "conv4"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 64
    kernel_size: 1
    pad: 0 #??
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn4"
  type: "BatchNorm"
  bottom: "conv4"
  top: "bn4"
}
layer {
  name: "scale4"
  type: "Scale"
  bottom: "bn4"
  top: "scale4"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu4"
  type: "ReLU"
  bottom: "scale4"
  top: "scale4"
  relu_param{
    negative_slope: 0.1
  }		
}

layer{
  name: "conv5"
  type: "Convolution"
  bottom: "scale4"
  top: "conv5"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn5"
  type: "BatchNorm"
  bottom: "conv5"
  top: "bn5"
}
layer {
  name: "scale5"
  type: "Scale"
  bottom: "bn5"
  top: "scale5"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu5"
  type: "ReLU"
  bottom: "scale5"
  top: "scale5"
  relu_param{
    negative_slope: 0.1
  }		
}
layer {
  name: "pool5"
  type: "Pooling"
  bottom: "scale5"
  top: "pool5"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}

layer{
  name: "conv6"
  type: "Convolution"
  bottom: "pool5"
  top: "conv6"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn6"
  type: "BatchNorm"
  bottom: "conv6"
  top: "bn6"
}
layer {
  name: "scale6"
  type: "Scale"
  bottom: "bn6"
  top: "scale6"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu6"
  type: "ReLU"
  bottom: "scale6"
  top: "scale6"
  relu_param{
    negative_slope: 0.1
  }		
}

layer{
  name: "conv7"
  type: "Convolution"
  bottom: "scale6"
  top: "conv7"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 128
    kernel_size: 1
    pad: 0
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn7"
  type: "BatchNorm"
  bottom: "conv7"
  top: "bn7"
}
layer {
  name: "scale7"
  type: "Scale"
  bottom: "bn7"
  top: "scale7"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu7"
  type: "ReLU"
  bottom: "scale7"
  top: "scale7"
  relu_param{
    negative_slope: 0.1
  }		
}

layer{
  name: "conv8"
  type: "Convolution"
  bottom: "scale7"
  top: "conv8"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn8"
  type: "BatchNorm"
  bottom: "conv8"
  top: "bn8"
}
layer {
  name: "scale8"
  type: "Scale"
  bottom: "bn8"
  top: "scale8"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu8"
  type: "ReLU"
  bottom: "scale8"
  top: "scale8"
  relu_param{
    negative_slope: 0.1
  }		
}
layer {
  name: "pool8"
  type: "Pooling"
  bottom: "scale8"
  top: "pool8"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}

layer{
  name: "conv9"
  type: "Convolution"
  bottom: "pool8"
  top: "conv9"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn9"
  type: "BatchNorm"
  bottom: "conv9"
  top: "bn9"
}
layer {
  name: "scale9"
  type: "Scale"
  bottom: "bn9"
  top: "scale9"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu9"
  type: "ReLU"
  bottom: "scale9"
  top: "scale9"
  relu_param{
    negative_slope: 0.1
  }		
}

layer{
  name: "conv10"
  type: "Convolution"
  bottom: "scale9"
  top: "conv10"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 256
    kernel_size: 1
    pad: 0
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn10"
  type: "BatchNorm"
  bottom: "conv10"
  top: "bn10"
}
layer {
  name: "scale10"
  type: "Scale"
  bottom: "bn10"
  top: "scale10"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu10"
  type: "ReLU"
  bottom: "scale10"
  top: "scale10"
  relu_param{
    negative_slope: 0.1
  }		
}

layer{
  name: "conv11"
  type: "Convolution"
  bottom: "scale10"
  top: "conv11"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn11"
  type: "BatchNorm"
  bottom: "conv11"
  top: "bn11"
}
layer {
  name: "scale11"
  type: "Scale"
  bottom: "bn11"
  top: "scale11"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu11"
  type: "ReLU"
  bottom: "scale11"
  top: "scale11"
  relu_param{
    negative_slope: 0.1
  }		
}

layer{
  name: "conv12"
  type: "Convolution"
  bottom: "scale11"
  top: "conv12"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 256
    kernel_size: 1
    pad: 0
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn12"
  type: "BatchNorm"
  bottom: "conv12"
  top: "bn12"
}
layer {
  name: "scale12"
  type: "Scale"
  bottom: "bn12"
  top: "scale12"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu12"
  type: "ReLU"
  bottom: "scale12"
  top: "scale12"
  relu_param{
    negative_slope: 0.1
  }		
}

layer{
  name: "conv13"
  type: "Convolution"
  bottom: "scale12"
  top: "conv13"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn13"
  type: "BatchNorm"
  bottom: "conv13"
  top: "bn13"
}
layer {
  name: "scale13"
  type: "Scale"
  bottom: "bn13"
  top: "scale13"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu13"
  type: "ReLU"
  bottom: "scale13"
  top: "scale13"
  relu_param{
    negative_slope: 0.1
  }		
}
layer {
  name: "pool13"
  type: "Pooling"
  bottom: "scale13"
  top: "pool13"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}

layer{
  name: "conv14"
  type: "Convolution"
  bottom: "pool13"
  top: "conv14"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 1024
    kernel_size: 3
    pad: 1
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn14"
  type: "BatchNorm"
  bottom: "conv14"
  top: "bn14"
}
layer {
  name: "scale14"
  type: "Scale"
  bottom: "bn14"
  top: "scale14"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu14"
  type: "ReLU"
  bottom: "scale14"
  top: "scale14"
  relu_param{
    negative_slope: 0.1
  }		
}

layer{
  name: "conv15"
  type: "Convolution"
  bottom: "scale14"
  top: "conv15"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 512
    kernel_size: 1
    pad: 0
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn15"
  type: "BatchNorm"
  bottom: "conv15"
  top: "bn15"
}
layer {
  name: "scale15"
  type: "Scale"
  bottom: "bn15"
  top: "scale15"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu15"
  type: "ReLU"
  bottom: "scale15"
  top: "scale15"
  relu_param{
    negative_slope: 0.1
  }		
}


layer{
  name: "conv16"
  type: "Convolution"
  bottom: "scale15"
  top: "conv16"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 1024
    kernel_size: 3
    pad: 1
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn16"
  type: "BatchNorm"
  bottom: "conv16"
  top: "bn16"
}
layer {
  name: "scale16"
  type: "Scale"
  bottom: "bn16"
  top: "scale16"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu16"
  type: "ReLU"
  bottom: "scale16"
  top: "scale16"
  relu_param{
    negative_slope: 0.1
  }		
}

layer{
  name: "conv17"
  type: "Convolution"
  bottom: "scale16"
  top: "conv17"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 512
    kernel_size: 1
    pad: 0
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn17"
  type: "BatchNorm"
  bottom: "conv17"
  top: "bn17"
}
layer {
  name: "scale17"
  type: "Scale"
  bottom: "bn17"
  top: "scale17"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu17"
  type: "ReLU"
  bottom: "scale17"
  top: "scale17"
  relu_param{
    negative_slope: 0.1
  }		
}


layer{
  name: "conv18"
  type: "Convolution"
  bottom: "scale17"
  top: "conv18"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  convolution_param {
    num_output: 1024
    kernel_size: 3
    pad: 1
    stride: 1
    bias_term: false
    weight_filler {
      type: "xavier"
      std: 0.01
    }
  }
}
layer {
  name: "bn18"
  type: "BatchNorm"
  bottom: "conv18"
  top: "bn18"
}
layer {
  name: "scale18"
  type: "Scale"
  bottom: "bn18"
  top: "scale18"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu18"
  type: "ReLU"
  bottom: "scale18"
  top: "scale18"
  relu_param{
    negative_slope: 0.1
  }		
}

layer {
  name: "scale19"
  type: "InnerProduct"
  bottom: "scale18"
  top: "scale19"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    # Since num_output is unset, DIGITS will automatically set it to the
    #   number of classes in your dataset.
    # Uncomment this line to set it explicitly:
    num_output: 10
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}


layer{
  name: "loss3/loss"
  type: "SoftmaxWithLoss"
  bottom: "scale19"
  bottom: "label"
  top: "loss"
  loss_weight: 1
  exclude { stage: "deploy" }
}

layer{
  name: "loss3/top"
  type: "Accuracy"
  bottom: "scale19"
  bottom: "label"
  top: "accuracy"
  include { stage: "val" }
}
layer{
  name: "loss3/top-5"
  type: "Accuracy"
  bottom: "scale19"
  bottom: "label"
  top: "accuracy-top5"
  include { stage: "val" }
  accuracy_param {
    top_k: 5
  }
}
layer{
  name: "softmax"
  type: "Softmax"
  bottom: "scale19"
  top: "softmax"
  include { stage: "deploy" }
}

Feb 07 '18 06:02 ghryou

i have met the same problem, exactly like yours. And I have no idea where is this nvcaffe. I only installed digits cuda and cudnn. is it possible nvcaffe was compiled with any of them? waiting for answers.

Jun 14 '18 02:06 yorkjohn

This problem seems to be related to a NVIDIA/caffe version 0.15. It looks like NVIDIA fixed it in the later releases.

Jun 04 '19 16:06 wilderfield

DIGITS DIGITS copied to clipboard

NVCaffe's BatchNormLayer is incompatible with BVLC caffe

DIGITS
DIGITS copied to clipboard