Yolo-Fastest icon indicating copy to clipboard operation
Yolo-Fastest copied to clipboard

How to load your pretrain model in pytorch?

Open jajajajaja121 opened this issue 4 years ago • 0 comments

I am trying to convert your darknet model to pytorch, but I find that the size of your weight is mismatch with my model, here is my convert code:

def load_darknet_weights(self, weights_path):
     """Parses and loads the weights stored in 'weights_path'"""

     # Open the weights file
     with open(weights_path, "rb") as f:
         header = np.fromfile(f, dtype=np.int32, count=5)  # First five are header values
         print(header)
         self.header_info = header  # Needed to write header when saving weights
         self.seen = header[3]  # number of images seen during training
         weights = np.fromfile(f, dtype=np.float32)  # The rest are weights

     # Establish cutoff for loading backbone weights
     cutoff = None
     if "darknet53.conv.74" in weights_path:
         cutoff = 75

     ptr = 0
     for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
         if i == cutoff:
             break
         if module_def["type"] in ["convolutional","normal_convolutional"]:
             conv_layer = module[0]
             if int(module_def["batch_normalize"])==1:
                 # Load BN bias, weights, running mean and running variance
                 print(module[0].state_dict()["weight"].shape)
                 bn_layer = module[1]
                 # print(bn_layer.state_dict())
                 num_b = bn_layer.bias.numel()  # Number of biases
                 print(num_b)
                 # Bias
                 bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)
                 bn_layer.bias.data.copy_(bn_b)
                 ptr += num_b
                 # Weight
                 bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)
                 bn_layer.weight.data.copy_(bn_w)
                 ptr += num_b
                 # Running Mean
                 bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)
                 bn_layer.running_mean.data.copy_(bn_rm)
                 ptr += num_b
                 # Running Var
                 bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)
                 bn_layer.running_var.data.copy_(bn_rv)
                 ptr += num_b
                 # Load conv. weights
                 num_w = conv_layer.weight.numel()
                 print(num_w)
                 conv_w = torch.from_numpy(weights[ptr: ptr + num_w]).view_as(conv_layer.weight)
                 conv_layer.weight.data.copy_(conv_w)
                 ptr += num_w
             else:
                 # 对于yolov3.weights,不带bn的卷积层就是YOLO前的卷积层
                 if ".weights" in weights_path:
                     num_b = 255
                     ptr += num_b
                     num_w = int(self.module_defs[i-1]["filters"]) * 255
                     ptr += num_w
                 else:
                     # Load conv. bias
                     num_b = conv_layer.bias.numel()
                     conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)
                     conv_layer.bias.data.copy_(conv_b)
                     ptr += num_b
                     # Load conv. weights
                     num_w = conv_layer.weight.numel()
                     conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)
                     conv_layer.weight.data.copy_(conv_w)
                     ptr += num_w

I guess My convolution layer is different with yours, Here is my convolution layer:

elif module_def["type"] == "normal_convolutional":
            # print("==============input filtrs:",output_filters[-1],"=================")
            # print('convolution')
            bn = int(module_def["batch_normalize"])
            filters = int(module_def["filters"])
            kernel_size = int(module_def["size"])
            stride = int(module_def['stride'])
            pad = int(module_def['pad'])
            groups=int(module_def['groups'])
            modules.add_module(
                f"conv_3{module_i}",
                nn.Conv2d(
                    in_channels=output_filters[-1],
                    out_channels=filters,
                    kernel_size=kernel_size,
                    stride=stride,
                    padding=pad,
                    bias=not bn,
                    groups=groups
                    ),
                )
            # modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5))
            if bn:
                modules.add_module(f"batch_norm_1{module_i}", nn.BatchNorm2d(filters))
            if module_def["activation"] == "relu6":
                modules.add_module(f"leaky_{module_i}", nn.ReLU6(inplace=True))

I don't know where is different, if your know, could you finger out? thanks a lot.

jajajajaja121 avatar Nov 04 '20 01:11 jajajajaja121