UWSOD icon indicating copy to clipboard operation
UWSOD copied to clipboard

How to load backbone checkpoint without detectron?

Open vadimkantorov opened this issue 2 years ago • 1 comments

How do I map pickle checkpoint keys to PyTorch state dict keys?

It seems that norm layers have different set of parameters. Was something fused?

I have loaded state_dict = pickle.load(open('data/common/resnet50_ws_model_120_d2.pk', 'rb')) It has keys:

dict_keys(['stem_conv1_w',
'stem_conv1_bn_s',
'stem_conv1_bn_b',
'stem_conv2_w',
'stem_conv2_bn_s',
'stem_conv2_bn_b',
'stem_conv3_w',
'stem_conv3_bn_s',
'stem_conv3_bn_b',
'res2_0_branch2a_w',
'res2_0_branch2a_bn_s',
'res2_0_branch2a_bn_b',
'res2_0_branch2b_w',
'res2_0_branch2b_bn_s',
'res2_0_branch2b_bn_b',
'res2_0_branch2c_w',
'res2_0_branch2c_bn_s',
'res2_0_branch2c_bn_b',
'res2_0_branch1_w',
'res2_0_branch1_bn_s',
'res2_0_branch1_bn_b',
'res2_1_branch2a_w',
'res2_1_branch2a_bn_s',
'res2_1_branch2a_bn_b',
'res2_1_branch2b_w',
'res2_1_branch2b_bn_s',
'res2_1_branch2b_bn_b',
'res2_1_branch2c_w',
'res2_1_branch2c_bn_s',
'res2_1_branch2c_bn_b',
'res2_2_branch2a_w',
'res2_2_branch2a_bn_s',
'res2_2_branch2a_bn_b',
'res2_2_branch2b_w',
'res2_2_branch2b_bn_s',
'res2_2_branch2b_bn_b',
'res2_2_branch2c_w',
'res2_2_branch2c_bn_s',
'res2_2_branch2c_bn_b',
'res3_0_branch2a_w',
'res3_0_branch2a_bn_s',
'res3_0_branch2a_bn_b',
'res3_0_branch2b_w',
'res3_0_branch2b_bn_s',
'res3_0_branch2b_bn_b',
'res3_0_branch2c_w',
'res3_0_branch2c_bn_s',
'res3_0_branch2c_bn_b',
'res3_0_branch1_w',
'res3_0_branch1_bn_s',
'res3_0_branch1_bn_b',
'res3_1_branch2a_w',
'res3_1_branch2a_bn_s',
'res3_1_branch2a_bn_b',
'res3_1_branch2b_w',
'res3_1_branch2b_bn_s',
'res3_1_branch2b_bn_b',
'res3_1_branch2c_w',
'res3_1_branch2c_bn_s',
'res3_1_branch2c_bn_b',
'res3_2_branch2a_w',
'res3_2_branch2a_bn_s',
'res3_2_branch2a_bn_b',
'res3_2_branch2b_w',
'res3_2_branch2b_bn_s',
'res3_2_branch2b_bn_b',
'res3_2_branch2c_w',
'res3_2_branch2c_bn_s',
'res3_2_branch2c_bn_b',
'res3_3_branch2a_w',
'res3_3_branch2a_bn_s',
'res3_3_branch2a_bn_b',
'res3_3_branch2b_w',
'res3_3_branch2b_bn_s',
'res3_3_branch2b_bn_b',
'res3_3_branch2c_w',
'res3_3_branch2c_bn_s',
'res3_3_branch2c_bn_b',
'res4_0_branch2a_w',
'res4_0_branch2a_bn_s',
'res4_0_branch2a_bn_b',
'res4_0_branch2b_w',
'res4_0_branch2b_bn_s',
'res4_0_branch2b_bn_b',
'res4_0_branch2c_w',
'res4_0_branch2c_bn_s',
'res4_0_branch2c_bn_b',
'res4_0_branch1_w',
'res4_0_branch1_bn_s',
'res4_0_branch1_bn_b',
'res4_1_branch2a_w',
'res4_1_branch2a_bn_s',
'res4_1_branch2a_bn_b',
'res4_1_branch2b_w',
'res4_1_branch2b_bn_s',
'res4_1_branch2b_bn_b',
'res4_1_branch2c_w',
'res4_1_branch2c_bn_s',
'res4_1_branch2c_bn_b',
'res4_2_branch2a_w',
'res4_2_branch2a_bn_s',
'res4_2_branch2a_bn_b',
'res4_2_branch2b_w',
'res4_2_branch2b_bn_s',
'res4_2_branch2b_bn_b',
'res4_2_branch2c_w',
'res4_2_branch2c_bn_s',
'res4_2_branch2c_bn_b',
'res4_3_branch2a_w',
'res4_3_branch2a_bn_s',
'res4_3_branch2a_bn_b',
'res4_3_branch2b_w',
'res4_3_branch2b_bn_s',
'res4_3_branch2b_bn_b',
'res4_3_branch2c_w',
'res4_3_branch2c_bn_s',
'res4_3_branch2c_bn_b',
'res4_4_branch2a_w',
'res4_4_branch2a_bn_s',
'res4_4_branch2a_bn_b',
'res4_4_branch2b_w',
'res4_4_branch2b_bn_s',
'res4_4_branch2b_bn_b',
'res4_4_branch2c_w',
'res4_4_branch2c_bn_s',
'res4_4_branch2c_bn_b',
'res4_5_branch2a_w',
'res4_5_branch2a_bn_s',
'res4_5_branch2a_bn_b',
'res4_5_branch2b_w',
'res4_5_branch2b_bn_s',
'res4_5_branch2b_bn_b',
'res4_5_branch2c_w',
'res4_5_branch2c_bn_s',
'res4_5_branch2c_bn_b',
'res5_0_branch2a_w',
'res5_0_branch2a_bn_s',
'res5_0_branch2a_bn_b',
'res5_0_branch2b_w',
'res5_0_branch2b_bn_s',
'res5_0_branch2b_bn_b',
'res5_0_branch2c_w',
'res5_0_branch2c_bn_s',
'res5_0_branch2c_bn_b',
'res5_0_branch1_w',
'res5_0_branch1_bn_s',
'res5_0_branch1_bn_b',
'res5_1_branch2a_w',
'res5_1_branch2a_bn_s',
'res5_1_branch2a_bn_b',
'res5_1_branch2b_w',
'res5_1_branch2b_bn_s',
'res5_1_branch2b_bn_b',
'res5_1_branch2c_w',
'res5_1_branch2c_bn_s',
'res5_1_branch2c_bn_b',
'res5_2_branch2a_w',
'res5_2_branch2a_bn_s',
'res5_2_branch2a_bn_b',
'res5_2_branch2b_w',
'res5_2_branch2b_bn_s',
'res5_2_branch2b_bn_b',
'res5_2_branch2c_w',
'res5_2_branch2c_bn_s',
'res5_2_branch2c_bn_b',
'fc1_w',
'fc1_b',
'fc2_w',
'fc2_b',
'last_out_L1000_w',
'last_out_L1000_b'])

My renset50_ws model has following keys:

dict_keys([
'stem.conv1.weight',
'stem.conv1.norm.weight',
'stem.conv1.norm.bias',
'stem.conv1.norm.running_mean',
'stem.conv1.norm.running_var',
'stem.conv2.weight',
'stem.conv2.norm.weight',
'stem.conv2.norm.bias',
'stem.conv2.norm.running_mean',
'stem.conv2.norm.running_var',
'stem.conv3.weight',
'stem.conv3.norm.weight',
'stem.conv3.norm.bias',
'stem.conv3.norm.running_mean',
'stem.conv3.norm.running_var',
'res2.0.shortcut.weight',
'res2.0.shortcut.norm.weight',
'res2.0.shortcut.norm.bias',
'res2.0.shortcut.norm.running_mean',
'res2.0.shortcut.norm.running_var',
'res2.0.conv1.weight',
'res2.0.conv1.norm.weight',
'res2.0.conv1.norm.bias',
'res2.0.conv1.norm.running_mean',
'res2.0.conv1.norm.running_var',
'res2.0.conv2.weight',
'res2.0.conv2.norm.weight',
'res2.0.conv2.norm.bias',
'res2.0.conv2.norm.running_mean',
'res2.0.conv2.norm.running_var',
'res2.0.conv3.weight',
'res2.0.conv3.norm.weight',
'res2.0.conv3.norm.bias',
'res2.0.conv3.norm.running_mean',
'res2.0.conv3.norm.running_var',
'res2.1.conv1.weight',
'res2.1.conv1.norm.weight',
'res2.1.conv1.norm.bias',
'res2.1.conv1.norm.running_mean',
'res2.1.conv1.norm.running_var',
'res2.1.conv2.weight',
'res2.1.conv2.norm.weight',
'res2.1.conv2.norm.bias',
'res2.1.conv2.norm.running_mean',
'res2.1.conv2.norm.running_var',
'res2.1.conv3.weight',
'res2.1.conv3.norm.weight',
'res2.1.conv3.norm.bias',
'res2.1.conv3.norm.running_mean',
'res2.1.conv3.norm.running_var',
'res2.2.conv1.weight',
'res2.2.conv1.norm.weight',
'res2.2.conv1.norm.bias',
'res2.2.conv1.norm.running_mean',
'res2.2.conv1.norm.running_var',
'res2.2.conv2.weight',
'res2.2.conv2.norm.weight',
'res2.2.conv2.norm.bias',
'res2.2.conv2.norm.running_mean',
'res2.2.conv2.norm.running_var',
'res2.2.conv3.weight',
'res2.2.conv3.norm.weight',
'res2.2.conv3.norm.bias',
'res2.2.conv3.norm.running_mean',
'res2.2.conv3.norm.running_var',
'res3.0.shortcut.weight',
'res3.0.shortcut.norm.weight',
'res3.0.shortcut.norm.bias',
'res3.0.shortcut.norm.running_mean',
'res3.0.shortcut.norm.running_var',
'res3.0.conv1.weight',
'res3.0.conv1.norm.weight',
'res3.0.conv1.norm.bias',
'res3.0.conv1.norm.running_mean',
'res3.0.conv1.norm.running_var',
'res3.0.conv2.weight',
'res3.0.conv2.norm.weight',
'res3.0.conv2.norm.bias',
'res3.0.conv2.norm.running_mean',
'res3.0.conv2.norm.running_var',
'res3.0.conv3.weight',
'res3.0.conv3.norm.weight',
'res3.0.conv3.norm.bias',
'res3.0.conv3.norm.running_mean',
'res3.0.conv3.norm.running_var',
'res3.1.conv1.weight',
'res3.1.conv1.norm.weight',
'res3.1.conv1.norm.bias',
'res3.1.conv1.norm.running_mean',
'res3.1.conv1.norm.running_var',
'res3.1.conv2.weight',
'res3.1.conv2.norm.weight',
'res3.1.c
onv2.norm.bias',
'res3.1.conv2.norm.running_mean',
'res3.1.conv2.norm.running_var',
'res3.1.conv3.weight',
'res3.1.conv3.norm.weight',
'res3.1.conv3.norm.bias',
'res3.1.conv3.norm.running_mean',
'res3.1.conv3.norm.running_var',
'res3.2.conv1.weight',
'res3.2.conv1.norm.weight',
'res3.2.conv1.norm.bias',
'res3.2.conv1.norm.running_mean',
'res3.2.conv1.norm.running_var',
'res3.2.conv2.weight',
'res3.2.conv2.norm.weight',
'res3.2.conv2.norm.bias',
'res3.2.conv2.norm.running_mean',
'res3.2.conv2.norm.running_var',
'res3.2.conv3.weight',
'res3.2.conv3.norm.weight',
'res3.2.conv3.norm.bias',
'res3.2.conv3.norm.running_mean',
'res3.2.conv3.norm.running_var',
'res3.3.conv1.weight',
'res3.3.conv1.norm.weight',
'res3.3.conv1.norm.bias',
'res3.3.conv1.norm.running_mean',
'res3.3.conv1.norm.running_var',
'res3.3.conv2.weight',
'res3.3.conv2.norm.weight',
'res3.3.conv2.norm.bias',
'res3.3.conv2.norm.running_mean',
'res3.3.conv2.norm.running_var',
'res3.3.conv3.weight',
'res3.3.conv3.norm.weight',
'res3.3.conv3.norm.bias',
'res3.3.conv3.norm.running_mean',
'res3.3.conv3.norm.running_var',
'res4.0.shortcut.weight',
'res4.0.shortcut.norm.weight',
'res4.0.shortcut.norm.bias',
'res4.0.shortcut.norm.running_mean',
'res4.0.shortcut.norm.running_var',
'res4.0.conv1.weight',
'res4.0.conv1.norm.weight',
'res4.0.conv1.norm.bias',
'res4.0.conv1.norm.running_mean',
'res4.0.conv1.norm.running_var',
'res4.0.conv2.weight',
'res4.0.conv2.norm.weight',
'res4.0.conv2.norm.bias',
'res4.0.conv2.norm.running_mean',
'res4.0.conv2.norm.running_var',
'res4.0.conv3.weight',
'res4.0.conv3.norm.weight',
'res4.0.conv3.norm.bias',
'res4.0.conv3.norm.running_mean',
'res4.0.conv3.norm.running_var',
'res4.1.conv1.weight',
'res4.1.conv1.norm.weight',
'res4.1.conv1.norm.bias',
'res4.1.conv1.norm.running_mean',
'res4.1.conv1.norm.running_var',
'res4.1.conv2.weight',
'res4.1.conv2.norm.weight',
'res4.1.conv2.norm.bias',
'res4.1.conv2.norm.running_mean',
'res4.1.conv2.norm.running_var',
'res4.1.conv3.weight',
'res4.1.conv3.norm.weight',
'res4.1.conv3.norm.bias',
'res4.1.conv3.norm.running_mean',
'res4.1.conv3.norm.running_var',
'res4.2.conv1.weight',
'res4.2.conv1.norm.weight',
'res4.2.conv1.norm.bias',
'res4.2.conv1.norm.running_mean',
'res4.2.conv1.norm.running_var',
'res4.2.conv2.weight',
'res4.2.conv2.norm.weight',
'res4.2.conv2.norm.bias',
'res4.2.conv2.norm.running_mean',
'res4.2.conv2.norm.running_var',
'res4.2.conv3.weight',
'res4.2.conv3.norm.weight',
'res4.2.conv3.norm.bias',
'res4.2.conv3.norm.running_mean',
'res4.2.conv3.norm.running_var',
'res4.3.conv1.weight',
'res4.3.conv1.norm.weight',
'res4.3.conv1.norm.bias',
'res4.3.conv1.norm.running_mean',
'res4.3.conv1.norm.running_var',
'res4.3.conv2.weight',
'res4.3.conv2.norm.weight',
'res4.3.conv2.norm.bias',
'res4.3.conv2.norm.running_mean',
'res4.3.conv2.norm.running_var',
'res4.3.conv3.weight',
'res4.3.conv3.norm.weight',
'res4.3.conv3.norm.bias',
'res4.3.conv3.norm.running_mean',
'res4.3.conv3.norm.running_var',
'res4.4.conv1.weight',
'res4.4.conv1.norm.weight',
'res4.4.conv1.norm.bias',
'res4.4.conv1.norm.running_mean',
'res4.4.conv1.norm.running_var',
'res4.4.conv2.weight',
'res4.4.conv2.norm.weight',
'res4.4.conv2.norm.bias',
'res4.4.conv2.norm.running_mean',
'res4.4.conv2.norm.running_var',
'res4.4.conv3.weight',
'res4.4.conv3.norm.weight',
'res4.4.conv3.norm.bias',
'res4.4.conv3.norm.running_mean',
'res4.4.conv3.norm.running_var',
'res4.5.conv1.weight',
'res4.5.conv1.norm.weight',
'res4.5.conv1.norm.bias',
'res4.5.conv1.norm.running_mean',
'res4.5.conv1.norm.running_var',
'res4.5.conv2.weight',
'res4.5.conv2.norm.weight',
'res4.5.conv2.norm.bias',
'res4.5.conv2.norm.running_mean',
'res4.5.conv2.norm.running_var',
'res4.5.conv3.weight',
'res4.5.conv3.norm.weight',
'res4.5.conv3.norm.bias',
'res4.5.conv3.norm.running_mean',
'res4.5.conv3.norm.running_var',
'res5.0.shortcut.weight',
'res5.0.shortcut.norm.weight',
'res5.0.shortcut.norm.bias',
'res5.0.shortcut.norm.running_mean',
'res5.0.shortcut.norm.running_var',
'res5.0.conv1.weight',
'res5.0.conv1.norm.weight',
'res5.0.conv1.norm.bias',
'res5.0.conv1.norm.running_mean',
'res5.0.conv1.norm.running_var',
'res5.0.conv2.weight',
'res5.0.conv2.norm.weight',
'res5.0.conv2.norm.bias',
'res5.0.conv2.norm.running_mean',
'res5.0.conv2.norm.running_var',
'res5.0.conv3.weight',
'res5.0.conv3.norm.weight',
'res5.0.conv3.norm.bias',
'res5.0.conv3.norm.running_mean',
'res5.0.conv3.norm.running_var',
'res5.1.conv1.weight',
'res5.1.conv1.norm.weight',
'res5.1.conv1.norm.bias',
'res5.1.conv1.norm.running_mean',
'res5.1.conv1.norm.running_var',
'res5.1.conv2.weight',
'res5.1.conv2.norm.weight',
'res5.1.conv2.norm.bias',
'res5.1.conv2.norm.running_mean',
'res5.1.conv2.norm.running_var',
'res5.1.conv3.weight',
'res5.1.conv3.norm.weight',
'res5.1.conv3.norm.bias',
'res5.1.conv3.norm.running_mean',
'res5.1.conv3.norm.running_var',
'res5.2.conv1.weight',
'res5.2.conv1.norm.weight',
'res5.2.conv1.norm.bias',
'res5.2.conv1.norm.running_mean',
'res5.2.conv1.norm.running_var',
'res5.2.conv2.weight',
'res5.2.conv2.norm.weight',
'res5.2.conv2.norm.bias',
'res5.2.conv2.norm.running_mean',
'res5.2.conv2.norm.running_var',
'res5.2.conv3.weight',
'res5.2.conv3.norm.weight',
'res5.2.conv3.norm.bias',
'res5.2.conv3.norm.running_mean',
'res5.2.conv3.norm.running_var'])

vadimkantorov avatar Oct 29 '21 15:10 vadimkantorov

Yes, some layer in caffe2 model is merged when converting to pytorch weights, following here.

We can use the matching heuristics in d2 to map the keys.

shenyunhang avatar Mar 25 '22 08:03 shenyunhang