icevision icon indicating copy to clipboard operation
icevision copied to clipboard

`Learner.freeze` doesn't work as expected in fastai

Open rsomani95 opened this issue 5 years ago • 1 comments

🐛 Bug

Describe the bug Learner.freeze_to doesn't actually freeze the layer groups This has been discussed in more depth on the Discord forums here

To Reproduce

# Imports
import torchvision.models as models
from fastai.vision.all import *
from torchvision.models.detection.backbone_utils import BackboneWithFPN
from icevision.backbones.mobilenet import mobilenet_param_groups

# Define parameter groups. 5 groups are created for what will become
# `Learner.model.backbone`
def mobilenet_fpn_param_groups(model: nn.Module) -> List[List[nn.Parameter]]:
    """
    """
    body = model.body
    layers =  []
    layers += [nn.Sequential(getattr(body, str(0)))]
    layers += [nn.Sequential(*[getattr(body, str(i)) for i in range(1,3)])]
    layers += [nn.Sequential(*[getattr(body, str(i)) for i in range(3,12)])]
    layers += [nn.Sequential(*[getattr(body, str(i)) for i in range(12,len(body))])]
    layers += [model.fpn]
    
    _param_groups = [list(layer.parameters()) for layer in layers]
    check_all_model_params_in_groups2(model, _param_groups)
    
    return _param_groups

# Create mobilenet FPN and assign param groups
body = models.mobilenet_v2(pretrained=True).features
body.out_channels = 1280
fpn_layer_map_mobilenet = {'3' : '0',
                           '6' : '1',
                           '12': '2',
                           '18': '3'}
backbone = BackboneWithFPN(body, fpn_layer_map_mobilenet, [24, 32, 96, 1280], 256)
backbone.param_groups = MethodType(partial(mobilenet_fpn_param_groups, freeze_body_bn=True), backbone)

Now, when you call Learner.freeze_to, it doesn't actually freeze the model.

learn = mask_rcnn.fastai.learner(dls=[train_dl, valid_dl], model=model)
learn.freeze_to(4)

I've modified a helper function borrowed from the PyTorch forums to test this

import copy
untrained_model = copy.deepcopy(learn.model)

def models_equal(m1: nn.Module,
                 m2: nn.Module,
                 verbose: bool = True):
    "Check if `m1` is identical to `m2`, layer by layer, weight by weight"
    models_differ = 0
    for key_item_1, key_item_2 in zip(m1.state_dict().items(), m2.state_dict().items()):
        if torch.equal(key_item_1[1], key_item_2[1]):
            pass
        else:
            models_differ += 1
            if (key_item_1[0] == key_item_2[0]):
                if verbose: print(f'Mismtach found at {key_item_1[0]}')
            else:
                raise Exception
                if verbose: print('Models being compared have different architectures')
    if models_differ == 0:
        if verbose: print('Models match perfectly')
        return True
    return False

Train & Compare -- This test fails, and all the names of the layers where the weight doesn't match are printed out

learn.fit(1)
models_equal(learn.model.cpu().backbone.body,
             untrained_model.backbone.body)

Expected behavior The first 4 parameter groups should have been frozen

Desktop (please complete the following information):

  • OS: Ubuntu 18.04

rsomani95 avatar Dec 09 '20 05:12 rsomani95

@potipot have you ever noticed this behavior?

FraPochetti avatar Dec 20 '21 22:12 FraPochetti