C3D
C3D copied to clipboard
Loss is so high when using SIGMOID_CROSS_ENTROPY_LOSS or EUCLIDEAN_LOSS
Hi,
I want to predict saliency values of a voxel, using V2V. The output and truth have 1 channel, and the truth has labels 0 and 1. The model is finetuned from C3D.
I don't know why loss is too high when I use SIGMOID_CROSS_ENTROPY_LOSS or (SIGMOID + EUCLIDEAN_LOSS), even when I change base_lr from 0.5 to 0.00005. The loss is larger than 10,000. Can you help me?
Regards,
@ltnghia this may be due to the prediction size is too big (the same as input size, which is K x 16 x 112 x 112) which is 200704 K (K is the # channels of your prediction). In https://github.com/facebook/C3D/blob/master/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu#L32, the loss is only normalized by the mini batch size. To make it is more readable, you can further normalize it by / (11211216) which will become very small.
Thank you for your help.
So do I change https://github.com/facebook/C3D/blob/master/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu#L23 and https://github.com/facebook/C3D/blob/master/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu#L41 to : num = count ?
Even when I change prediction size and base_lr (from 0.5 to 0.0000005), the network cannot learn anything. Outputs are always one value for all pixels.
I finetune the network from C3D weight file.
Can you help me? Regards,
layers {
name: "conv1a"
type: CONVOLUTION3D
bottom: "data"
top: "conv1a"
blobs_lr: 0
blobs_lr: 0
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 64
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu1a"
type: RELU
bottom: "conv1a"
top: "conv1a"
}
layers {
name: "pool1"
type: POOLING3D
bottom: "conv1a"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
kernel_depth: 1
stride: 2
temporal_stride: 1
}
}
layers {
name: "conv2a"
type: CONVOLUTION3D
bottom: "pool1"
top: "conv2a"
blobs_lr: 0
blobs_lr: 0
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 128
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu2a"
type: RELU
bottom: "conv2a"
top: "conv2a"
}
layers {
name: "pool2"
type: POOLING3D
bottom: "conv2a"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
kernel_depth: 2
stride: 2
temporal_stride: 2
}
}
layers {
name: "conv3a"
type: CONVOLUTION3D
bottom: "pool2"
top: "conv3a"
blobs_lr: 0
blobs_lr: 0
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu3a"
type: RELU
bottom: "conv3a"
top: "conv3a"
}
layers {
name: "conv3b"
type: CONVOLUTION3D
bottom: "conv3a"
top: "conv3b"
blobs_lr: 0
blobs_lr: 0
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu3b"
type: RELU
bottom: "conv3b"
top: "conv3b"
}
layers {
name: "pool3"
type: POOLING3D
bottom: "conv3b"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
kernel_depth: 2
stride: 2
temporal_stride: 2
}
}
layers {
name: "conv4a"
type: CONVOLUTION3D
bottom: "pool3"
top: "conv4a"
blobs_lr: 0
blobs_lr: 0
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 512
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu4a"
type: RELU
bottom: "conv4a"
top: "conv4a"
}
layers {
name: "conv4b"
type: CONVOLUTION3D
bottom: "conv4a"
top: "conv4b"
blobs_lr: 0
blobs_lr: 0
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 512
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu4b"
type: RELU
bottom: "conv4b"
top: "conv4b"
}
layers {
name: "pool4"
type: POOLING3D
bottom: "conv4b"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
kernel_depth: 2
stride: 2
temporal_stride: 2
}
}
layers {
name: "conv5a"
type: CONVOLUTION3D
bottom: "pool4"
top: "conv5a"
blobs_lr: 0
blobs_lr: 0
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 512
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu5a"
type: RELU
bottom: "conv5a"
top: "conv5a"
}
layers {
name: "conv5b"
type: CONVOLUTION3D
bottom: "conv5a"
top: "conv5b"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 512
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu5b"
type: RELU
bottom: "conv5b"
top: "conv5b"
}
layers {
name: "deconv5b"
type: DECONVOLUTION3D
bottom: "conv5b"
top: "deconv5b"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 64
kernel_size: 4
kernel_depth: 4
pad: 1
temporal_pad: 1
stride: 2
temporal_stride: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu5b-de"
type: RELU
bottom: "deconv5b"
top: "deconv5b"
}
layers {
name: "predict4b"
type: CONVOLUTION3D
bottom: "conv4b"
top: "predict4b"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 64
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "concat1"
type: CONCAT
bottom: "deconv5b"
bottom: "predict4b"
top: "concat1"
concat_param {
concat_dim: 1
}
}
layers {
name: "deconv4b"
type: DECONVOLUTION3D
bottom: "concat1"
top: "deconv4b"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 64
kernel_size: 4
kernel_depth: 4
pad: 1
temporal_pad: 1
stride: 2
temporal_stride: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu4b-de"
type: RELU
bottom: "deconv4b"
top: "deconv4b"
}
layers {
name: "predict3b"
type: CONVOLUTION3D
bottom: "conv3b"
top: "predict3b"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 64
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "concat2"
type: CONCAT
bottom: "deconv4b"
bottom: "predict3b"
top: "concat2"
concat_param {
concat_dim: 1
}
}
layers {
name: "deconv3b"
type: DECONVOLUTION3D
bottom: "concat2"
top: "deconv3b"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 64
kernel_size: 8
kernel_depth: 4
pad: 2
temporal_pad: 1
stride: 4
temporal_stride: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu3b-de"
type: RELU
bottom: "deconv3b"
top: "deconv3b"
}
layers {
name: "predict"
type: CONVOLUTION3D
bottom: "deconv3b"
top: "predict"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 1
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "loss"
type: SIGMOID_CROSS_ENTROPY_LOSS
bottom: "predict"
bottom: "truth"
}
and
train_net: "V2V.prototxt"
base_lr: 0.0005
lr_policy: "step"
gamma: 0.1
stepsize: 10000
momentum: 0.9
weight_decay: 0.005
display: 100
max_iter: 100000
snapshot: 10000
snapshot_prefix: "V2V"
solver_mode: GPU
device_id: 0
I think maybe it's your video data layers problem,can you show your xx.lst and data layer?