person_reID_DualNorm icon indicating copy to clipboard operation
person_reID_DualNorm copied to clipboard

Loading the pretrained model

Open jkoubele opened this issue 4 years ago • 4 comments

Hi, thanks for sharing the code and pretrained model! I have a question regarding the usage of pretrained model. It seems that the structure of model in net_MobileNet_IFN.pth is different that the expected structure given by class MobileNetV2_IFN(nn.Module). When running

model_structure = models.init_model(name='mobilenet_ifn', num_classes=18530, training=False, use_gpu=use_gpu) model_structure = model_structure.cuda() model = load_network(model_structure)

I got the following error:

RuntimeError: Error(s) in loading state_dict for MobileNetV2_IFN: Missing key(s) in state_dict: "fn.weight", "fn.bias", "fn.running_mean", "fn.running_var". Unexpected key(s) in state_dict: "bnneck.weight", "bnneck.bias", "bnneck.running_mean", "bnneck.running_var", "bnneck.num_batches_tracked".

Can you please clarify how to use the pretrained model? Thanks, -Jakub

jkoubele avatar Mar 10 '20 11:03 jkoubele

Ok I solved that by dropping the unexpected items from saved models and modyfying the model class such that it produces the 1280-d vector from pooling layer, thus the missing weights are not needed.

jkoubele avatar Mar 11 '20 08:03 jkoubele

Hi @jkoubele,

Can you show me your modified script?. I have the same error. Thanks

pribadihcr avatar Jun 07 '20 10:06 pribadihcr

Hi @jkoubele,

Can you show me your modified script?. I have the same error. Thanks

Hi, here you go:

import math
import os
from typing import List

import cv2
import numpy as np
import torch
from PIL import Image
from torchvision import transforms



class DualNormNet():
    """
    Model embedding the bounding box with person to 1280-dimensional space. The model is described by
    Jia, J., Ruan, Q., & Hospedales, T. M. (2019): Frustratingly easy person re-identification: Generalizing person
    re-id in practice (https://arxiv.org/abs/1905.03422).
    The implementation and trained models are taken from https://github.com/BJTUJia/person_reID_DualNorm
    """

    def __init__(self, model_file_path: str = 'dual_norm_mobile_net_ifp.pth'):
        self._gpu_available = torch.cuda.is_available()
        self.model = MobileNetV2IFN()
        self.model.eval()
        if self._gpu_available:
            self.model.cuda()
        self.model.load_weights(model_file_path)

        self.data_transforms = transforms.Compose([
            transforms.Resize((256, 128), interpolation=3),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

   
    @staticmethod
    def horizontal_flip(img: torch.Tensor) -> torch.Tensor:
        """
        Performs horizontal flip of the input image.
        """
        inv_idx = torch.arange(img.size(3) - 1, -1, -1).long()
        img_flip = img.index_select(3, inv_idx)
        return img_flip
    
    def encode(self, image: np.ndarray) -> np.ndarray:
        """
        :param frame: BGR image as a numpy array        
        """
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        batch = torch.zeros(1, 3, 256, 128)            
        img = Image.fromarray(image_rgb)
        img = self.data_transforms(img)
        img = torch.reshape(img, (1, 3, 256, 128))
        # TODO: right now using batch of size 1, can be extended to larger batch size
        batch[0, :, :, :] = img

        embedding = torch.FloatTensor(1, 1280).zero_()

        for i in range(2):
            if i == 1:
                batch = self.horizontal_flip(batch)
            input_batch = torch.tensor(batch)
            if self._gpu_available:
                input_batch = input_batch.cuda()
            output = self.model(input_batch)
            embedding += output.data.cpu()

        fnorm = torch.norm(embedding, p=2, dim=1, keepdim=True)
        embedding = embedding.div(fnorm.expand_as(embedding)).numpy()
        return embedding
    


class MobileNetV2IFN(torch.nn.Module):
    """
    MobileNetV2 described by Sandler et al. MobileNetV2: Inverted Residuals and Linear Bottlenecks. CVPR 2018.
    """

    def __init__(self) -> None:
        super(MobileNetV2IFN, self).__init__()
        self.loss = {'xent'}

        self.conv1 = torch.nn.Conv2d(3, 32, 3, stride=2, padding=1)
        self.in1 = torch.nn.InstanceNorm2d(32, affine=True)

        self.block2 = Bottleneck(32, 16, 1, 1, instance_normalization=True)
        self.block3 = torch.nn.Sequential(
            Bottleneck(16, 24, 6, 2, instance_normalization=True),
            Bottleneck(24, 24, 6, 1, instance_normalization=True),
        )
        self.block4 = torch.nn.Sequential(
            Bottleneck(24, 32, 6, 2, instance_normalization=True),
            Bottleneck(32, 32, 6, 1, instance_normalization=True),
            Bottleneck(32, 32, 6, 1, instance_normalization=True),
        )
        self.block5 = torch.nn.Sequential(
            Bottleneck(32, 64, 6, 2, instance_normalization=True),
            Bottleneck(64, 64, 6, 1, instance_normalization=True),
            Bottleneck(64, 64, 6, 1, instance_normalization=True),
            Bottleneck(64, 64, 6, 1, instance_normalization=True),
        )
        self.block6 = torch.nn.Sequential(
            Bottleneck(64, 96, 6, 1, instance_normalization=True),
            Bottleneck(96, 96, 6, 1, instance_normalization=True),
            Bottleneck(96, 96, 6, 1, instance_normalization=True),
        )
        self.block7 = torch.nn.Sequential(
            Bottleneck(96, 160, 6, 2),
            Bottleneck(160, 160, 6, 1),
            Bottleneck(160, 160, 6, 1),
        )
        self.block8 = Bottleneck(160, 320, 6, 1)
        self.conv9 = ConvBlock(320, 1280, 1)

        self.global_avgpool = torch.nn.AdaptiveAvgPool2d(1)

    def forward(self, x):
        x = torch.nn.functional.relu6(self.in1(self.conv1(x)))
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        x = self.block6(x)
        x = self.block7(x)
        x = self.block8(x)
        x = self.conv9(x)
        x = self.global_avgpool(x)
        x = x.view(x.size(0), -1)
        return x

    def load_weights(self, model_path: str) -> None:
        checkpoint = torch.load(model_path)

        # The pretrained model from https://github.com/BJTUJia/person_reID_DualNorm contains unused weights
        # which cause error in load_state_dict(), we delete them to avoid the error.
        for k in ["bnneck.weight", "bnneck.bias", "bnneck.running_mean", "bnneck.running_var",
                  "bnneck.num_batches_tracked", "classifier.weight"]:
            del checkpoint[k]
        self.load_state_dict(checkpoint)
        
    


class ConvBlock(torch.nn.Module):
    """Basic convolutional block:
    convolution (bias discarded) + batch normalization + relu6.

    Args (following http://pytorch.org/docs/master/nn.html#torch.nn.Conv2d):
        in_c (int): number of input channels.
        out_c (int): number of output channels.
        k (int or tuple): kernel size.
        s (int or tuple): stride.
        p (int or tuple): padding.
        g (int): number of blocked connections from input channels
                 to output channels (default: 1).
    """

    def __init__(self, in_c, out_c, k, s=1, p=0, g=1):
        super(ConvBlock, self).__init__()
        self.conv = torch.nn.Conv2d(in_c, out_c, k, stride=s, padding=p, bias=False, groups=g)
        self.bn = torch.nn.BatchNorm2d(out_c)

    def forward(self, x):
        return torch.nn.functional.relu6(self.bn(self.conv(x)))


class Bottleneck(torch.nn.Module):
    def __init__(self, in_channels, out_channels, expansion_factor, stride, instance_normalization=False):
        super(Bottleneck, self).__init__()
        mid_channels = in_channels * expansion_factor
        self.use_residual = stride == 1 and in_channels == out_channels
        self.conv1 = ConvBlock(in_channels, mid_channels, 1)
        self.dwconv2 = ConvBlock(mid_channels, mid_channels, 3, stride, 1, g=mid_channels)

        self.conv3 = torch.nn.Sequential(
            torch.nn.Conv2d(mid_channels, out_channels, 1, bias=False),
            torch.nn.BatchNorm2d(out_channels)
        )

        self.IN = None

        if instance_normalization:
            self.IN = torch.nn.InstanceNorm2d(out_channels, affine=True)

    def forward(self, x):
        m = self.conv1(x)
        m = self.dwconv2(m)
        m = self.conv3(m)

        if self.use_residual:
            out = x + m
        else:
            out = m

        if self.IN is not None:
            return self.IN(out)
        else:
            return out
        
if __name__ == "__main__":
    dual_norm_net = DualNormNet()
    img = cv2.imread("some_image.png")
    embedding = dual_norm_net.encode(img)

jkoubele avatar Jun 07 '20 12:06 jkoubele

Thanks.

On Sun, Jun 7, 2020, 8:19 PM jkoubele [email protected] wrote:

Hi @jkoubele https://github.com/jkoubele,

Can you show me your modified script?. I have the same error. Thanks

Hi, here you go:

import math import os from typing import List

import cv2 import numpy as np import torch from PIL import Image from torchvision import transforms

class DualNormNet(): """ Model embedding the bounding box with person to 1280-dimensional space. The model is described by Jia, J., Ruan, Q., & Hospedales, T. M. (2019): Frustratingly easy person re-identification: Generalizing person re-id in practice (https://arxiv.org/abs/1905.03422). The implementation and trained models are taken from https://github.com/BJTUJia/person_reID_DualNorm """

def __init__(self, model_file_path: str = 'dual_norm_mobile_net_ifp.pth'):
    self._gpu_available = torch.cuda.is_available()
    self.model = MobileNetV2IFN()
    self.model.eval()
    if self._gpu_available:
        self.model.cuda()
    self.model.load_weights(model_file_path)

    self.data_transforms = transforms.Compose([
        transforms.Resize((256, 128), interpolation=3),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])


@staticmethod
def horizontal_flip(img: torch.Tensor) -> torch.Tensor:
    """
    Performs horizontal flip of the input image.
    """
    inv_idx = torch.arange(img.size(3) - 1, -1, -1).long()
    img_flip = img.index_select(3, inv_idx)
    return img_flip

def encode(self, image: np.ndarray) -> np.ndarray:
    """
    :param frame: BGR image as a numpy array
    """
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    batch = torch.zeros(1, 3, 256, 128)
    img = Image.fromarray(image_rgb)
    img = self.data_transforms(img)
    img = torch.reshape(img, (1, 3, 256, 128))
    # TODO: right now using batch of size 1, can be extended to larger batch size
    batch[0, :, :, :] = img

    embedding = torch.FloatTensor(1, 1280).zero_()

    for i in range(2):
        if i == 1:
            batch = self.horizontal_flip(batch)
        input_batch = torch.tensor(batch)
        if self._gpu_available:
            input_batch = input_batch.cuda()
        output = self.model(input_batch)
        embedding += output.data.cpu()

    fnorm = torch.norm(embedding, p=2, dim=1, keepdim=True)
    embedding = embedding.div(fnorm.expand_as(embedding)).numpy()
    return embedding

class MobileNetV2IFN(torch.nn.Module): """ MobileNetV2 described by Sandler et al. MobileNetV2: Inverted Residuals and Linear Bottlenecks. CVPR 2018. """

def __init__(self) -> None:
    super(MobileNetV2IFN, self).__init__()
    self.loss = {'xent'}

    self.conv1 = torch.nn.Conv2d(3, 32, 3, stride=2, padding=1)
    self.in1 = torch.nn.InstanceNorm2d(32, affine=True)

    self.block2 = Bottleneck(32, 16, 1, 1, instance_normalization=True)
    self.block3 = torch.nn.Sequential(
        Bottleneck(16, 24, 6, 2, instance_normalization=True),
        Bottleneck(24, 24, 6, 1, instance_normalization=True),
    )
    self.block4 = torch.nn.Sequential(
        Bottleneck(24, 32, 6, 2, instance_normalization=True),
        Bottleneck(32, 32, 6, 1, instance_normalization=True),
        Bottleneck(32, 32, 6, 1, instance_normalization=True),
    )
    self.block5 = torch.nn.Sequential(
        Bottleneck(32, 64, 6, 2, instance_normalization=True),
        Bottleneck(64, 64, 6, 1, instance_normalization=True),
        Bottleneck(64, 64, 6, 1, instance_normalization=True),
        Bottleneck(64, 64, 6, 1, instance_normalization=True),
    )
    self.block6 = torch.nn.Sequential(
        Bottleneck(64, 96, 6, 1, instance_normalization=True),
        Bottleneck(96, 96, 6, 1, instance_normalization=True),
        Bottleneck(96, 96, 6, 1, instance_normalization=True),
    )
    self.block7 = torch.nn.Sequential(
        Bottleneck(96, 160, 6, 2),
        Bottleneck(160, 160, 6, 1),
        Bottleneck(160, 160, 6, 1),
    )
    self.block8 = Bottleneck(160, 320, 6, 1)
    self.conv9 = ConvBlock(320, 1280, 1)

    self.global_avgpool = torch.nn.AdaptiveAvgPool2d(1)

def forward(self, x):
    x = torch.nn.functional.relu6(self.in1(self.conv1(x)))
    x = self.block2(x)
    x = self.block3(x)
    x = self.block4(x)
    x = self.block5(x)
    x = self.block6(x)
    x = self.block7(x)
    x = self.block8(x)
    x = self.conv9(x)
    x = self.global_avgpool(x)
    x = x.view(x.size(0), -1)
    return x

def load_weights(self, model_path: str) -> None:
    checkpoint = torch.load(model_path)

    # The pretrained model from https://github.com/BJTUJia/person_reID_DualNorm contains unused weights
    # which cause error in load_state_dict(), we delete them to avoid the error.
    for k in ["bnneck.weight", "bnneck.bias", "bnneck.running_mean", "bnneck.running_var",
              "bnneck.num_batches_tracked", "classifier.weight"]:
        del checkpoint[k]
    self.load_state_dict(checkpoint)

class ConvBlock(torch.nn.Module): """Basic convolutional block: convolution (bias discarded) + batch normalization + relu6.

Args (following http://pytorch.org/docs/master/nn.html#torch.nn.Conv2d):
    in_c (int): number of input channels.
    out_c (int): number of output channels.
    k (int or tuple): kernel size.
    s (int or tuple): stride.
    p (int or tuple): padding.
    g (int): number of blocked connections from input channels
             to output channels (default: 1).
"""

def __init__(self, in_c, out_c, k, s=1, p=0, g=1):
    super(ConvBlock, self).__init__()
    self.conv = torch.nn.Conv2d(in_c, out_c, k, stride=s, padding=p, bias=False, groups=g)
    self.bn = torch.nn.BatchNorm2d(out_c)

def forward(self, x):
    return torch.nn.functional.relu6(self.bn(self.conv(x)))

class Bottleneck(torch.nn.Module): def init(self, in_channels, out_channels, expansion_factor, stride, instance_normalization=False): super(Bottleneck, self).init() mid_channels = in_channels * expansion_factor self.use_residual = stride == 1 and in_channels == out_channels self.conv1 = ConvBlock(in_channels, mid_channels, 1) self.dwconv2 = ConvBlock(mid_channels, mid_channels, 3, stride, 1, g=mid_channels)

    self.conv3 = torch.nn.Sequential(
        torch.nn.Conv2d(mid_channels, out_channels, 1, bias=False),
        torch.nn.BatchNorm2d(out_channels)
    )

    self.IN = None

    if instance_normalization:
        self.IN = torch.nn.InstanceNorm2d(out_channels, affine=True)

def forward(self, x):
    m = self.conv1(x)
    m = self.dwconv2(m)
    m = self.conv3(m)

    if self.use_residual:
        out = x + m
    else:
        out = m

    if self.IN is not None:
        return self.IN(out)
    else:
        return out

if name == "main": dual_norm_net = DualNormNet() img = cv2.imread("some_image.png") embedding = dual_norm_net.encode(img)

— You are receiving this because you commented. Reply to this email directly, view it on GitHub https://github.com/BJTUJia/person_reID_DualNorm/issues/7#issuecomment-640209753, or unsubscribe https://github.com/notifications/unsubscribe-auth/ADDWP7T2E6D4YQ6MUZUHAOLRVOAVBANCNFSM4LE4HRMQ .

pribadihcr avatar Jun 08 '20 00:06 pribadihcr