DIGITS
DIGITS copied to clipboard
NVCaffe's BatchNormLayer is incompatible with BVLC caffe
While loading the pretrained Yolo v2 model (based on @gklz1982's work) which has BatchNormLayer on it, I got that error message.
So, I thought there might be some difference between Nvidia Caffe and BVLC Caffe, especially for Batch Norm layer.
On BVLC Caffe, Batch Normalization only use blobs with size 3,
void BatchNormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
BatchNormParameter param = this->layer_param_.batch_norm_param();
moving_average_fraction_ = param.moving_average_fraction();
use_global_stats_ = this->phase_ == TEST;
if (param.has_use_global_stats())
use_global_stats_ = param.use_global_stats();
if (bottom[0]->num_axes() == 1)
channels_ = 1;
else
channels_ = bottom[0]->shape(1);
eps_ = param.eps();
if (this->blobs_.size() > 0) {
LOG(INFO) << "Skipping parameter initialization";
} else {
this->blobs_.resize(3);
vector<int> sz;
sz.push_back(channels_);
this->blobs_[0].reset(new Blob<Dtype>(sz));
this->blobs_[1].reset(new Blob<Dtype>(sz));
sz[0] = 1;
this->blobs_[2].reset(new Blob<Dtype>(sz));
for (int i = 0; i < 3; ++i) {
caffe_set(this->blobs_[i]->count(), Dtype(0),
this->blobs_[i]->mutable_cpu_data());
}
}
// Mask statistics from optimization by setting local learning rates
// for mean, variance, and the bias correction to zero.
for (int i = 0; i < this->blobs_.size(); ++i) {
if (this->layer_param_.param_size() == i) {
ParamSpec* fixed_param_spec = this->layer_param_.add_param();
fixed_param_spec->set_lr_mult(0.f);
} else {
CHECK_EQ(this->layer_param_.param(i).lr_mult(), 0.f)
<< "Cannot configure batch normalization statistics as layer "
<< "parameters.";
}
}
}
However, for Nvidia Caffe, it requires blobs (or this->blobs_) size of 5.
template<typename Ftype, typename Btype>
void
BatchNormLayer<Ftype, Btype>::LayerSetUp(const vector<Blob*>& bottom, const vector<Blob*>& top) {
BatchNormParameter param = this->layer_param_.batch_norm_param();
moving_average_fraction_ = param.moving_average_fraction();
clip_variance_ = false;
//use_global_stats_ = false;
use_global_stats_= param.use_global_stats();
if (bottom[0]->num_axes() == 1)
channels_ = 1;
else
channels_ = bottom[0]->shape(1);
eps_ = std::max<float>(param.eps(), 0.00001f);
scale_bias_ = false;
scale_bias_ = param.scale_bias(); // by default = false;
if (param.has_scale_filler() || param.has_bias_filler()) { // implicit set
scale_bias_ = true;
}
if (this->blobs_.size() > 0) {
LOG(INFO) << "Skipping parameter initialization";
} else {
if (scale_bias_)
this->blobs_.resize(5);
else
this->blobs_.resize(3);
const Type btype = blobs_type();
const vector<int> shape { channels_ };
const vector<int> shape1 { 1 };
this->blobs_[0] = Blob::create(btype, btype); // mean
this->blobs_[0]->Reshape(shape);
this->blobs_[0]->set_data(0.);
this->blobs_[1] = Blob::create(btype, btype); // variance1
this->blobs_[1]->Reshape(shape);
this->blobs_[1]->set_data(0.);
this->blobs_[2] = Blob::create(btype, btype); // variance correction
this->blobs_[2]->Reshape(shape1);
this->blobs_[2]->set_data(1.);
if (scale_bias_) {
this->blobs_[3] = Blob::create(btype, btype); // scale
this->blobs_[3]->Reshape(shape);
this->blobs_[4] = Blob::create(btype, btype); // bias
this->blobs_[4]->Reshape(shape);
if (param.has_scale_filler()) {
// TODO
if (btype == tp<Ftype>()) {
shared_ptr<Filler<Ftype>> scale_filler(
GetFiller<Ftype>(this->layer_param_.batch_norm_param().scale_filler()));
scale_filler->Fill(this->blobs_[3].get());
} else {
shared_ptr<Filler<float>> scale_filler(
GetFiller<float>(this->layer_param_.batch_norm_param().scale_filler()));
scale_filler->Fill(this->blobs_[3].get());
}
} else {
this->blobs_[3]->set_data(1.);
}
if (param.has_bias_filler()) {
// TODO
if (btype == tp<Ftype>()) {
shared_ptr<Filler<Ftype>> bias_filler(
GetFiller<Ftype>(this->layer_param_.batch_norm_param().bias_filler()));
bias_filler->Fill(this->blobs_[4].get());
} else {
shared_ptr<Filler<float>> bias_filler(
GetFiller<float>(this->layer_param_.batch_norm_param().bias_filler()));
bias_filler->Fill(this->blobs_[4].get());
}
} else {
this->blobs_[4]->set_data(0.);
}
}
iter_ = 0;
}
I thought I can block Nvidia Caffe to access on this->blobs variable's 4th and 5th elements by setting scale_bias to false. Also, on NVCaffe Document, it is written that there are scale_bias Setting under batch_norm_param.
However, I got error message that there is no field named "scale_bias" under "caffe.BatchNormParameter".
This is my prototxt for classification with darknet 19 (which is the base network for yolo v2).
Is there anyone who resolve this issue???
name: "YOLONET"
layer{
name: "train-data"
type: "Data"
top: "data"
top: "label"
data_param {
batch_size: 16
}
image_data_param {
shuffle: true
new_height: 416
new_width: 416
}
transform_param {
mirror: true
}
include: { stage: "train" }
}
layer{
name: "val-data"
type: "Data"
top: "data"
top: "label"
data_param {
batch_size: 16
}
image_data_param {
shuffle: true
new_height: 416
new_width: 416
}
transform_param {
mirror: true
}
include: { stage: "val" }
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 32
kernel_size: 3
pad: 1
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn1"
type: "BatchNorm"
bottom: "conv1"
top: "bn1"
batch_norm_param {
scale_bias: false
}
}
layer {
name: "scale1"
type: "Scale"
bottom: "bn1"
top: "scale1"
scale_param {
bias_term: false
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "scale1"
top: "scale1"
relu_param{
negative_slope: 0.1
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "scale1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer{
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn2"
type: "BatchNorm"
bottom: "conv2"
top: "bn2"
}
layer {
name: "scale2"
type: "Scale"
bottom: "bn2"
top: "scale2"
scale_param {
bias_term: true
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "scale2"
top: "scale2"
relu_param{
negative_slope: 0.1
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "scale2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer{
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn3"
type: "BatchNorm"
bottom: "conv3"
top: "bn3"
}
layer {
name: "scale3"
type: "Scale"
bottom: "bn3"
top: "scale3"
scale_param {
bias_term: true
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "scale3"
top: "scale3"
relu_param{
negative_slope: 0.1
}
}
layer{
name: "conv4"
type: "Convolution"
bottom: "scale3"
top: "conv4"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 64
kernel_size: 1
pad: 0 #??
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn4"
type: "BatchNorm"
bottom: "conv4"
top: "bn4"
}
layer {
name: "scale4"
type: "Scale"
bottom: "bn4"
top: "scale4"
scale_param {
bias_term: true
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "scale4"
top: "scale4"
relu_param{
negative_slope: 0.1
}
}
layer{
name: "conv5"
type: "Convolution"
bottom: "scale4"
top: "conv5"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn5"
type: "BatchNorm"
bottom: "conv5"
top: "bn5"
}
layer {
name: "scale5"
type: "Scale"
bottom: "bn5"
top: "scale5"
scale_param {
bias_term: true
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "scale5"
top: "scale5"
relu_param{
negative_slope: 0.1
}
}
layer {
name: "pool5"
type: "Pooling"
bottom: "scale5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer{
name: "conv6"
type: "Convolution"
bottom: "pool5"
top: "conv6"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn6"
type: "BatchNorm"
bottom: "conv6"
top: "bn6"
}
layer {
name: "scale6"
type: "Scale"
bottom: "bn6"
top: "scale6"
scale_param {
bias_term: true
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "scale6"
top: "scale6"
relu_param{
negative_slope: 0.1
}
}
layer{
name: "conv7"
type: "Convolution"
bottom: "scale6"
top: "conv7"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 128
kernel_size: 1
pad: 0
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn7"
type: "BatchNorm"
bottom: "conv7"
top: "bn7"
}
layer {
name: "scale7"
type: "Scale"
bottom: "bn7"
top: "scale7"
scale_param {
bias_term: true
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "scale7"
top: "scale7"
relu_param{
negative_slope: 0.1
}
}
layer{
name: "conv8"
type: "Convolution"
bottom: "scale7"
top: "conv8"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn8"
type: "BatchNorm"
bottom: "conv8"
top: "bn8"
}
layer {
name: "scale8"
type: "Scale"
bottom: "bn8"
top: "scale8"
scale_param {
bias_term: true
}
}
layer {
name: "relu8"
type: "ReLU"
bottom: "scale8"
top: "scale8"
relu_param{
negative_slope: 0.1
}
}
layer {
name: "pool8"
type: "Pooling"
bottom: "scale8"
top: "pool8"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer{
name: "conv9"
type: "Convolution"
bottom: "pool8"
top: "conv9"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn9"
type: "BatchNorm"
bottom: "conv9"
top: "bn9"
}
layer {
name: "scale9"
type: "Scale"
bottom: "bn9"
top: "scale9"
scale_param {
bias_term: true
}
}
layer {
name: "relu9"
type: "ReLU"
bottom: "scale9"
top: "scale9"
relu_param{
negative_slope: 0.1
}
}
layer{
name: "conv10"
type: "Convolution"
bottom: "scale9"
top: "conv10"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 256
kernel_size: 1
pad: 0
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn10"
type: "BatchNorm"
bottom: "conv10"
top: "bn10"
}
layer {
name: "scale10"
type: "Scale"
bottom: "bn10"
top: "scale10"
scale_param {
bias_term: true
}
}
layer {
name: "relu10"
type: "ReLU"
bottom: "scale10"
top: "scale10"
relu_param{
negative_slope: 0.1
}
}
layer{
name: "conv11"
type: "Convolution"
bottom: "scale10"
top: "conv11"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn11"
type: "BatchNorm"
bottom: "conv11"
top: "bn11"
}
layer {
name: "scale11"
type: "Scale"
bottom: "bn11"
top: "scale11"
scale_param {
bias_term: true
}
}
layer {
name: "relu11"
type: "ReLU"
bottom: "scale11"
top: "scale11"
relu_param{
negative_slope: 0.1
}
}
layer{
name: "conv12"
type: "Convolution"
bottom: "scale11"
top: "conv12"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 256
kernel_size: 1
pad: 0
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn12"
type: "BatchNorm"
bottom: "conv12"
top: "bn12"
}
layer {
name: "scale12"
type: "Scale"
bottom: "bn12"
top: "scale12"
scale_param {
bias_term: true
}
}
layer {
name: "relu12"
type: "ReLU"
bottom: "scale12"
top: "scale12"
relu_param{
negative_slope: 0.1
}
}
layer{
name: "conv13"
type: "Convolution"
bottom: "scale12"
top: "conv13"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn13"
type: "BatchNorm"
bottom: "conv13"
top: "bn13"
}
layer {
name: "scale13"
type: "Scale"
bottom: "bn13"
top: "scale13"
scale_param {
bias_term: true
}
}
layer {
name: "relu13"
type: "ReLU"
bottom: "scale13"
top: "scale13"
relu_param{
negative_slope: 0.1
}
}
layer {
name: "pool13"
type: "Pooling"
bottom: "scale13"
top: "pool13"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer{
name: "conv14"
type: "Convolution"
bottom: "pool13"
top: "conv14"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 1024
kernel_size: 3
pad: 1
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn14"
type: "BatchNorm"
bottom: "conv14"
top: "bn14"
}
layer {
name: "scale14"
type: "Scale"
bottom: "bn14"
top: "scale14"
scale_param {
bias_term: true
}
}
layer {
name: "relu14"
type: "ReLU"
bottom: "scale14"
top: "scale14"
relu_param{
negative_slope: 0.1
}
}
layer{
name: "conv15"
type: "Convolution"
bottom: "scale14"
top: "conv15"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
kernel_size: 1
pad: 0
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn15"
type: "BatchNorm"
bottom: "conv15"
top: "bn15"
}
layer {
name: "scale15"
type: "Scale"
bottom: "bn15"
top: "scale15"
scale_param {
bias_term: true
}
}
layer {
name: "relu15"
type: "ReLU"
bottom: "scale15"
top: "scale15"
relu_param{
negative_slope: 0.1
}
}
layer{
name: "conv16"
type: "Convolution"
bottom: "scale15"
top: "conv16"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 1024
kernel_size: 3
pad: 1
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn16"
type: "BatchNorm"
bottom: "conv16"
top: "bn16"
}
layer {
name: "scale16"
type: "Scale"
bottom: "bn16"
top: "scale16"
scale_param {
bias_term: true
}
}
layer {
name: "relu16"
type: "ReLU"
bottom: "scale16"
top: "scale16"
relu_param{
negative_slope: 0.1
}
}
layer{
name: "conv17"
type: "Convolution"
bottom: "scale16"
top: "conv17"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
kernel_size: 1
pad: 0
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn17"
type: "BatchNorm"
bottom: "conv17"
top: "bn17"
}
layer {
name: "scale17"
type: "Scale"
bottom: "bn17"
top: "scale17"
scale_param {
bias_term: true
}
}
layer {
name: "relu17"
type: "ReLU"
bottom: "scale17"
top: "scale17"
relu_param{
negative_slope: 0.1
}
}
layer{
name: "conv18"
type: "Convolution"
bottom: "scale17"
top: "conv18"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 1024
kernel_size: 3
pad: 1
stride: 1
bias_term: false
weight_filler {
type: "xavier"
std: 0.01
}
}
}
layer {
name: "bn18"
type: "BatchNorm"
bottom: "conv18"
top: "bn18"
}
layer {
name: "scale18"
type: "Scale"
bottom: "bn18"
top: "scale18"
scale_param {
bias_term: true
}
}
layer {
name: "relu18"
type: "ReLU"
bottom: "scale18"
top: "scale18"
relu_param{
negative_slope: 0.1
}
}
layer {
name: "scale19"
type: "InnerProduct"
bottom: "scale18"
top: "scale19"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
# Since num_output is unset, DIGITS will automatically set it to the
# number of classes in your dataset.
# Uncomment this line to set it explicitly:
num_output: 10
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer{
name: "loss3/loss"
type: "SoftmaxWithLoss"
bottom: "scale19"
bottom: "label"
top: "loss"
loss_weight: 1
exclude { stage: "deploy" }
}
layer{
name: "loss3/top"
type: "Accuracy"
bottom: "scale19"
bottom: "label"
top: "accuracy"
include { stage: "val" }
}
layer{
name: "loss3/top-5"
type: "Accuracy"
bottom: "scale19"
bottom: "label"
top: "accuracy-top5"
include { stage: "val" }
accuracy_param {
top_k: 5
}
}
layer{
name: "softmax"
type: "Softmax"
bottom: "scale19"
top: "softmax"
include { stage: "deploy" }
}
i have met the same problem, exactly like yours. And I have no idea where is this nvcaffe. I only installed digits cuda and cudnn. is it possible nvcaffe was compiled with any of them? waiting for answers.
This problem seems to be related to a NVIDIA/caffe version 0.15. It looks like NVIDIA fixed it in the later releases.