DeepCORAL
DeepCORAL copied to clipboard
Low Accuracy for SHVN ---> MNIST
I got this accuracy for SHVN -----> MNIST
###Test Source: Epoch: 2460, avg_loss: 0.0003, Accuracy: 73255/73257 (100.00%) ###Test Target: Epoch: 2460, avg_loss: 21.5493, Accuracy: 35242/60000 (58.74%)
That means data loading does not matter to get good accuracy. Because for SHVN---->MNIST, I just use default data processing.
@deep0learning good observation. For SHVN ---> MNIST using LeNet without adaptation the accuracy is 60%.
And using CORAL it should be 79%.
The Alexnet (one weird trick) used in this repository is different from the Alexnet (original) used by CORAL. Also the better accuracy in the paper is due to an additional mean loss function they used in the Caffe prototxt:
layer { name: "mean_loss" type: "EuclideanLoss" bottom: "mean_source" bottom: "mean_target" top: "mean_loss" loss_weight: 0 include { phase: TRAIN } }
@debasmitdas
How you find this information?
In D-CORAL they did not use any mean loss/ EuclideanLoss .
Check the .prototxt file in https://github.com/VisionLearningGroup/CORAL/tree/master/code after unzipping.
Did not find any loss that you have mentioned. See .prototxt file.
labelled source data
layer { name: "data" type: "ImageData" top: "data" top: "label" include { phase: TRAIN } transform_param { mirror: true crop_size: 227 mean_value: 104.0 mean_value: 116.7 mean_value: 122.7 } image_data_param { source: "examples/office31/amazon.txt" batch_size: 128 shuffle: true new_width: 256 new_height: 256 } }
unlabelled target data
layer { name: "data_t" type: "ImageData" top: "data_t" top: "label_t" include { phase: TRAIN } transform_param { mirror: true crop_size: 227 mean_value: 104.0 mean_value: 116.7 mean_value: 122.7 } image_data_param { source: "examples/office31/webcam.txt" batch_size: 128 shuffle: true new_width: 256 new_height: 256 } }
add silence to suppress output of labels
layer { name: "silence_target_label" type: "Silence" bottom: "label_t" include { phase: TRAIN } }
target data for testing
layer { name: "data" type: "ImageData" top: "data" top: "label" include { phase: TEST } transform_param { mirror: true crop_size: 227 mean_value: 104.0 mean_value: 116.7 mean_value: 122.7 } image_data_param { source: "examples/office31/webcam.txt" batch_size: 53 # 53*15 = 795 size of the target dataset shuffle: true new_width: 256 new_height: 256 } }
conv1
layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { name: "sharedweights_conv1" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv1" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 96 pad: 0 kernel_size: 11 group: 1 stride: 4 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "conv1_t" type: "Convolution" bottom: "data_t" top: "conv1_t" include { phase: TRAIN } param { name: "sharedweights_conv1" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv1" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 96 pad: 0 kernel_size: 11 group: 1 stride: 4 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }
relu1
layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1" #param: "sharedweights_relu1" } layer { name: "relu1_t" type: "ReLU" bottom: "conv1_t" top: "conv1_t" include { phase: TRAIN } #param: "sharedweights_relu1" }
pool1
layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" #param: "sharedweights_pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "pool1_t" type: "Pooling" bottom: "conv1_t" top: "pool1_t" include { phase: TRAIN } #param: "sharedweights_pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } }
norm1
layer { name: "norm1" type: "LRN" bottom: "pool1" top: "norm1" #param: "sharedweights_norm1" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "norm1_t" type: "LRN" bottom: "pool1_t" top: "norm1_t" include { phase: TRAIN } #param: "sharedweights_norm1" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } }
conv2
layer { name: "conv2" type: "Convolution" bottom: "norm1" top: "conv2" param { name: "sharedweights_conv2" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv2" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 256 pad: 2 kernel_size: 5 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "conv2_t" type: "Convolution" bottom: "norm1_t" top: "conv2_t" include { phase: TRAIN } param { name: "sharedweights_conv2" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv2" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 256 pad: 2 kernel_size: 5 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }
relu2
layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2" #param: "sharedweights_relu2" } layer { name: "relu2_t" type: "ReLU" bottom: "conv2_t" top: "conv2_t" include { phase: TRAIN } #param: "sharedweights_relu2" }
pool2
layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" #param: "sharedweights_pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "pool2_t" type: "Pooling" bottom: "conv2_t" top: "pool2_t" include { phase: TRAIN } #param: "sharedweights_pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } }
norm2
layer { name: "norm2" type: "LRN" bottom: "pool2" top: "norm2" #param: "sharedweights_norm2" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "norm2_t" type: "LRN" bottom: "pool2_t" top: "norm2_t" include { phase: TRAIN } #param: "sharedweights_norm2" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } }
conv3
layer { name: "conv3" type: "Convolution" bottom: "norm2" top: "conv3" param { name: "sharedweights_conv3" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv3" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 1 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "conv3_t" type: "Convolution" bottom: "norm2_t" top: "conv3_t" include { phase: TRAIN } param { name: "sharedweights_conv3" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv3" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 1 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }
relu3
layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3" #param: "sharedweights_relu3" } layer { name: "relu3_t" type: "ReLU" bottom: "conv3_t" top: "conv3_t" include { phase: TRAIN } #param: "sharedweights_relu3" }
conv4
layer { name: "conv4" type: "Convolution" bottom: "conv3" top: "conv4" param { name: "sharedweights_conv4" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv4" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "conv4_t" type: "Convolution" bottom: "conv3_t" top: "conv4_t" include { phase: TRAIN } param { name: "sharedweights_conv4" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv4" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }
relu4
layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4" #param: "sharedweights_relu4" } layer { name: "relu4_t" type: "ReLU" bottom: "conv4_t" top: "conv4_t" include { phase: TRAIN } #param: "sharedweights_relu4" }
conv5
layer { name: "conv5" type: "Convolution" bottom: "conv4" top: "conv5" param { name: "sharedweights_conv5" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv5" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "conv5_t" type: "Convolution" bottom: "conv4_t" top: "conv5_t" include { phase: TRAIN } param { name: "sharedweights_conv5" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv5" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }
relu5
layer { name: "relu5" type: "ReLU" bottom: "conv5" top: "conv5" #param: "sharedweights_relu5" } layer { name: "relu5_t" type: "ReLU" bottom: "conv5_t" top: "conv5_t" include { phase: TRAIN } #param: "sharedweights_relu5" }
pool5
layer { name: "pool5" type: "Pooling" bottom: "conv5" top: "pool5" #param: "sharedweights_pool5" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "pool5_t" type: "Pooling" bottom: "conv5_t" top: "pool5_t" include { phase: TRAIN } #param: "sharedweights_pool5" pooling_param { pool: MAX kernel_size: 3 stride: 2 } }
fc6
layer { name: "fc6" type: "InnerProduct" bottom: "pool5" top: "fc6" param { name: "sharedweights_fc6" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_fc6" lr_mult: 2.0 decay_mult: 0.0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "fc6_t" type: "InnerProduct" bottom: "pool5_t" top: "fc6_t" include { phase: TRAIN } param { name: "sharedweights_fc6" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_fc6" lr_mult: 2.0 decay_mult: 0.0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }
relu6
layer { name: "relu6" type: "ReLU" bottom: "fc6" top: "fc6" #param: "sharedweights_relu6" } layer { name: "relu6_t" type: "ReLU" bottom: "fc6_t" top: "fc6_t" include { phase: TRAIN } #param: "sharedweights_relu6" }
drop6
layer { name: "drop6" type: "Dropout" bottom: "fc6" top: "fc6" #param: "sharedweights_drop6" dropout_param { dropout_ratio: 0.5 } } layer { name: "drop6_t" type: "Dropout" bottom: "fc6_t" top: "fc6_t" include { phase: TRAIN } #param: "sharedweights_drop6" dropout_param { dropout_ratio: 0.5 } }
fc7
layer { name: "fc7" type: "InnerProduct" bottom: "fc6" top: "fc7" param { name: "sharedweights_fc7" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_fc7" lr_mult: 2.0 decay_mult: 0.0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "fc7_t" type: "InnerProduct" bottom: "fc6_t" top: "fc7_t" include { phase: TRAIN } param { name: "sharedweights_fc7" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_fc7" lr_mult: 2.0 decay_mult: 0.0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }
relu7
layer { name: "relu7" type: "ReLU" bottom: "fc7" top: "fc7" #param: "sharedweights_relu7" } layer { name: "relu7_t" type: "ReLU" bottom: "fc7_t" top: "fc7_t" include { phase: TRAIN } #param: "sharedweights_relu7" }
drop7
layer { name: "drop7" type: "Dropout" bottom: "fc7" top: "fc7" #param: "sharedweights_drop7" dropout_param { dropout_ratio: 0.5 } } layer { name: "drop7_t" type: "Dropout" bottom: "fc7_t" top: "fc7_t" include { phase: TRAIN } #param: "sharedweights_drop7" dropout_param { dropout_ratio: 0.5 } } #fc8 layer { name: "fc8_office" type: "InnerProduct" bottom: "fc7" top: "fc8_office" param { name: "sharedweights_fc8" lr_mult: 10.0 decay_mult: 1.0 } param { name: "sharedbias_fc8" lr_mult: 20.0 decay_mult: 0.0 } inner_product_param { num_output: 31 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "fc8_office_t" type: "InnerProduct" bottom: "fc7_t" top: "fc8_office_t" include { phase: TRAIN } param { name: "sharedweights_fc8" lr_mult: 10.0 decay_mult: 1.0 } param { name: "sharedbias_fc8" lr_mult: 20.0 decay_mult: 0.0 } inner_product_param { num_output: 31 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }
layer { type: 'CORALLoss' name: 'coral_fc8' top: 'coral_fc8' bottom: 'fc8_office' bottom: 'fc8_office_t' include { phase: TRAIN } loss_weight: 0.8 }
add silence to suppress output of fc8_office_t
layer { name: "silence_target_fc8" type: "Silence" bottom: "fc8_office_t" include { phase: TRAIN } } #Classification Loss layer { name: "classification_loss" top: "classification_loss" type: "SoftmaxWithLoss" bottom: "fc8_office" bottom: "label" loss_weight: 1 include { phase: TRAIN } } layer { name: "accuracy" type: "Accuracy" bottom: "fc8_office" bottom: "label" top: "accuracy" }