icevision
icevision copied to clipboard
`Learner.freeze` doesn't work as expected in fastai
🐛 Bug
Describe the bug
Learner.freeze_to doesn't actually freeze the layer groups
This has been discussed in more depth on the Discord forums here
To Reproduce
# Imports
import torchvision.models as models
from fastai.vision.all import *
from torchvision.models.detection.backbone_utils import BackboneWithFPN
from icevision.backbones.mobilenet import mobilenet_param_groups
# Define parameter groups. 5 groups are created for what will become
# `Learner.model.backbone`
def mobilenet_fpn_param_groups(model: nn.Module) -> List[List[nn.Parameter]]:
"""
"""
body = model.body
layers = []
layers += [nn.Sequential(getattr(body, str(0)))]
layers += [nn.Sequential(*[getattr(body, str(i)) for i in range(1,3)])]
layers += [nn.Sequential(*[getattr(body, str(i)) for i in range(3,12)])]
layers += [nn.Sequential(*[getattr(body, str(i)) for i in range(12,len(body))])]
layers += [model.fpn]
_param_groups = [list(layer.parameters()) for layer in layers]
check_all_model_params_in_groups2(model, _param_groups)
return _param_groups
# Create mobilenet FPN and assign param groups
body = models.mobilenet_v2(pretrained=True).features
body.out_channels = 1280
fpn_layer_map_mobilenet = {'3' : '0',
'6' : '1',
'12': '2',
'18': '3'}
backbone = BackboneWithFPN(body, fpn_layer_map_mobilenet, [24, 32, 96, 1280], 256)
backbone.param_groups = MethodType(partial(mobilenet_fpn_param_groups, freeze_body_bn=True), backbone)
Now, when you call Learner.freeze_to, it doesn't actually freeze the model.
learn = mask_rcnn.fastai.learner(dls=[train_dl, valid_dl], model=model)
learn.freeze_to(4)
I've modified a helper function borrowed from the PyTorch forums to test this
import copy
untrained_model = copy.deepcopy(learn.model)
def models_equal(m1: nn.Module,
m2: nn.Module,
verbose: bool = True):
"Check if `m1` is identical to `m2`, layer by layer, weight by weight"
models_differ = 0
for key_item_1, key_item_2 in zip(m1.state_dict().items(), m2.state_dict().items()):
if torch.equal(key_item_1[1], key_item_2[1]):
pass
else:
models_differ += 1
if (key_item_1[0] == key_item_2[0]):
if verbose: print(f'Mismtach found at {key_item_1[0]}')
else:
raise Exception
if verbose: print('Models being compared have different architectures')
if models_differ == 0:
if verbose: print('Models match perfectly')
return True
return False
Train & Compare -- This test fails, and all the names of the layers where the weight doesn't match are printed out
learn.fit(1)
models_equal(learn.model.cpu().backbone.body,
untrained_model.backbone.body)
Expected behavior The first 4 parameter groups should have been frozen
Desktop (please complete the following information):
- OS: Ubuntu 18.04
@potipot have you ever noticed this behavior?