caffe
caffe copied to clipboard
No Learning for NiN on cifar10
Issue summary
When I define my network according to Network in Network architecture without using SSL regularization (similar to lenet_train_test.prototxt, which no SSL regularization has been used), the network can not train, however I played with LR and weight decays. Loss value during the training iterations is always constant value : Train net output #0: loss = 2.30259 (* 1 = 2.30259 loss) Could you please help me with that?
Steps to reproduce
I am using docker container and here is my Dockerfile, which the image has been built from it: `FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04 LABEL maintainer [email protected]
RUN apt-get update && apt-get install -y --no-install-recommends
build-essential
cmake
git
wget
libatlas-base-dev
libboost-all-dev
libgflags-dev
libgoogle-glog-dev
libhdf5-serial-dev
libleveldb-dev
liblmdb-dev
libopencv-dev
libprotobuf-dev
libsnappy-dev
protobuf-compiler
python-dev
python-numpy
python-pip
python-setuptools
python-scipy &&
rm -rf /var/lib/apt/lists/*
ENV CAFFE_ROOT=/opt/caffe WORKDIR $CAFFE_ROOT
ENV CLONE_TAG=1.0
RUN git clone -b scnn --depth 1 https://github.com/wenwei202/caffe.git . &&
pip install --upgrade pip &&
cd python && for req in $(cat requirements.txt) pydot; do pip install $req; done && cd .. &&
git clone https://github.com/NVIDIA/nccl.git && cd nccl && make -j install && cd .. && rm -rf nccl
#mkdir build && cd build &&
#cmake .. && \
RUN cp Makefile.config.example Makefile.config &&
echo 'INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial/' >>./Makefile.config &&
echo 'LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu/hdf5/serial/' >>./Makefile.config &&
make -j"$(nproc)" &&
make pycaffe -j"$(nproc)"
RUN pip install lmdb
ENV PYCAFFE_ROOT $CAFFE_ROOT/python ENV PYTHONPATH $PYCAFFE_ROOT:$PYTHONPATH ENV PATH $CAFFE_ROOT/build/tools:$PYCAFFE_ROOT:$PATH RUN echo "$CAFFE_ROOT/build/lib" >> /etc/ld.so.conf.d/caffe.conf && ldconfig
WORKDIR /workspace `
Your system configuration
Operating system: Ubuntu 16.04 LTS Compiler: CUDA version (if applicable): V8.0.61 CUDNN version (if applicable): BLAS: I can not find it by grep OPENBLAS_VERSION /usr/local/include/openblas_config.h Python or MATLAB version (for pycaffe and matcaffe respectively): python 2.7
Seems it is an issue related to your net and solver? Copy them here might help.
Thanks for your reply, Here is mnist_NiN.prototxt
`name: "MNIST_NiN" layer { name: "mnist" type: "Data" top: "data" top: "label" include { phase: TRAIN } transform_param { scale: 0.00390625 } data_param { source: "examples/mnist/mnist_train_lmdb" batch_size: 64 backend: LMDB } }
layer { name: "mnist" type: "Data" top: "data" top: "label" include { phase: TEST } transform_param { scale: 0.00390625 } data_param { source: "examples/mnist/mnist_test_lmdb" batch_size: 100 backend: LMDB } }
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 192
pad: 2
kernel_size: 5
weight_filler {
type: "gaussian"
std: 0.05
}
bias_filler {
type: "constant"
}
} }
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "cccp1"
type: "Convolution"
bottom: "conv1"
top: "cccp1" param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 160
group: 1
kernel_size: 1
weight_filler { type: "gaussian" std: 0.05 } bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "relu_cccp1"
type: "ReLU"
bottom: "cccp1"
top: "cccp1"
}
layer {
name: "cccp2"
type: "Convolution"
bottom: "cccp1"
top: "cccp2" param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 1
group: 1
weight_filler {
type: "gaussian"
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu_cccp2"
type: "ReLU"
bottom: "cccp2"
top: "cccp2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "cccp2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
engine: CAFFE
}
}
layer {
name: "drop3"
type: "Dropout"
bottom: "pool1"
top: "pool1"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 192
pad: 2
kernel_size: 5
}
}
layer {
name: "relu2_1"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "cccp3"
type: "Convolution"
bottom: "conv2"
top: "cccp3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 192
kernel_size: 1
group: 1
weight_filler { type: "gaussian" std: 0.05 } bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "relu_cccp3"
type: "ReLU"
bottom: "cccp3"
top: "cccp3"
}
layer {
name: "cccp4"
type: "Convolution"
bottom: "cccp3"
top: "cccp4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 192
kernel_size: 1
group: 1
weight_filler {
type: "gaussian"
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu_cccp4"
type: "ReLU"
bottom: "cccp4"
top: "cccp4"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "cccp4"
top: "pool2"
pooling_param {
pool: AVE
kernel_size: 3
stride: 2
engine: CAFFE
}
}
layer {
name: "drop6"
type: "Dropout"
bottom: "pool2"
top: "pool2"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 192
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "cccp5"
type: "Convolution"
bottom: "conv3"
top: "cccp5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 192
kernel_size: 1
group: 1
weight_filler { type: "gaussian" std: 0.05 } bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "relu_cccp5"
type: "ReLU"
bottom: "cccp5"
top: "cccp5"
}
layer {
name: "cccp6"
type: "Convolution"
bottom: "cccp5"
top: "cccp6"
param {
lr_mult: 0.1
decay_mult: 1
}
param {
lr_mult: 0.1
decay_mult: 0
}
convolution_param {
num_output: 10
kernel_size: 1
group:1
weight_filler { type: "gaussian" std: 0.05 } bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "relu_cccp6"
type: "ReLU"
bottom: "cccp6"
top: "cccp6"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "cccp6"
top: "pool3"
pooling_param {
pool: AVE
kernel_size: 7
stride: 1
engine: CAFFE
}
}
layer { name: "accuracy" type: "Accuracy" bottom: "pool3" bottom: "label" top: "accuracy" include { phase: TEST } }
layer { name: "loss" type: "SoftmaxWithLoss" bottom: "pool3" bottom: "label" top: "loss" }
net: "examples/mnist/mnist_NiN.prototxt" test_iter: 100 test_interval: 500 base_lr: 0.1 momentum: 0.9 weight_decay: 0.0005 lr_policy: "inv" gamma: 0.0001 power: 0.75 display: 100 max_iter: 10000 snapshot: 5000 snapshot_prefix: "examples/mnist/NiN" solver_mode: GPU