ScaledYOLOv4
ScaledYOLOv4 copied to clipboard
convert to ONNX
Hi, Im trying to convert output model of ScaledYOLOv4 to ONNX but i faced with error:
. . . . ) # /yolov4/models/yolo.py:38:0 %603 : Float(1, 3, 18, 18, 12, strides=[11664, 3888, 216, 12, 1], requires_grad=1, device=cpu) = onnx::Transposeperm=[0, 1, 3, 4, 2] # /yolov4/models/yolo.py:38:0 return (%output, %583, %603)
I used models/export.py functio.
# Input
img = torch.randn((opt.batch_size, 3, *opt.img_size)) # image size(1,3,320,192) iDetection
# Load PyTorch model
attempt_download(opt.weights)
model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float()
from onnxsim import simplify
# Update model
for k, m in model.named_modules():
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatability
if isinstance(m, models.common.Conv) and isinstance(m.act, models.common.Mish):
m.act = Mish() # assign activation
if isinstance(m, models.common.BottleneckCSP) or isinstance(m, models.common.BottleneckCSP2) \
or isinstance(m, models.common.SPPCSP):
if isinstance(m.bn, nn.SyncBatchNorm):
bn = nn.BatchNorm2d(m.bn.num_features, eps=m.bn.eps, momentum=m.bn.momentum)
bn.training = False
bn._buffers = m.bn._buffers
bn._non_persistent_buffers_set = set()
m.bn = bn
if isinstance(m.act, models.common.Mish):
m.act = Mish() # assign activation
#if isinstance(m, models.yolo.Detect):
# m.forward = m.forward_export # assign forward (optional)
model.eval()
model.model[-1].export = True # set Detect() layer export=True
#y = model(img) # dry run
import onnx
import onnxruntime
print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
f = opt.weights.replace('.pt', '.onnx') # filename
try:
model.fuse() # only for ONNX
torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['input'])
# Checks
onnx_model = onnx.load(f) # load onnx model
model_simp, check = simplify(onnx_model)
assert check, "Simplified ONNX model could not be validated"
print('ONNX export success, saved as %s' % f)
except Exception as e:
print('ONNX export failure: %s' % e)
@siriasadeddin I noticed the performance degradation while converting nn.SyncBatchNorm this way, not sure why. Also output value for pytorch model and onnx model is changed.
Hi! can you tell me how did you test it so I can also try it. I tried for example
#python test.py --img-size 1536 --weights ./yolov4-p7.pt
sys.path.append('./') # to run '$ python *.py' files in subdirectories
import argparse
import torch
import torch.nn as nn
import models
from models.experimental import attempt_load
from utils.activations import Mish
import numpy as np
from utils.google_utils import attempt_download
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default='./yolov4-p5.pt', help='weights path')
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
opt = parser.parse_args()
opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
print(opt)
# Input
img = torch.randn((opt.batch_size, 3, *opt.img_size)) # image size(1,3,320,192) iDetection
# Load PyTorch model
attempt_download(opt.weights)
model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float()
model1=model
# Update model
for k, m in model.named_modules():
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatability
if isinstance(m, models.common.Conv) and isinstance(m.act, models.common.Mish):
m.act = Mish() # assign activation
if isinstance(m, models.common.BottleneckCSP) or isinstance(m, models.common.BottleneckCSP2) \
or isinstance(m, models.common.SPPCSP):
if isinstance(m.bn, nn.SyncBatchNorm):
bn = nn.BatchNorm2d(m.bn.num_features, eps=m.bn.eps, momentum=m.bn.momentum)
bn.training = False
bn._buffers = m.bn._buffers
bn._non_persistent_buffers_set = set()
m.bn = bn
if isinstance(m.act, models.common.Mish):
m.act = Mish() # assign activation
#if isinstance(m, models.yolo.Detect):
# m.forward = m.forward_export # assign forward (optional)
model.eval()
model1.eval()
def to_numpy(tensor):
return tensor.detach().numpy()
torch_out_rand = model(img)
torch_out_rand = [x for x in torch_out_rand]
torch_out_rand1 = model1(img)
torch_out_rand1 = [x for x in torch_out_rand1]
print(torch_out_rand[0])
print(torch_out_rand1[0])
np.testing.assert_allclose(to_numpy(torch_out_rand[0]), to_numpy(torch_out_rand1[0]), rtol=5e-03, atol=1e-04)
print(np.max(np.abs(to_numpy(torch_out_rand[0])-to_numpy(torch_out_rand1[0]))))```
And seems that model1 and model are giving the same results. I think your differences are from other place.
@RohitKeshari You are right. I use Pytorch1.7 and recover your problem. The export.py comes from here:https://github.com/linghu8812/tensorrt_inference/blob/master/ScaledYOLOv4/export_onnx.py
Actually, It seems like that this code is not working for the newest branch now. Here is my solution:
import argparse
import torch
import torch.nn as nn
import models
from models.experimental import attempt_load
from utils.activations import Mish
from onnxsim import simplify
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default='./weights/yolov4-p5.pt', help='weights path') # from yolov5/models/
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
opt = parser.parse_args()
opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
print(opt)
# Input
img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size(1,3,320,192) iDetection
# Load PyTorch model
model = attempt_load(opt.weights, map_location=torch.device('cuda:0')) # load FP32 model
img = torch.zeros((1, 3, 640, 640), device=torch.device('cuda:0'))
#model = attempt_load(opt.weights, map_location=torch.device('cpu')) # load FP32 model
#img = torch.zeros((1, 3, 640, 640), device=torch.device('cpu'))
# Update model
for k, m in model.named_modules():
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatability
if isinstance(m, models.common.Conv) and isinstance(m.act, models.common.Mish):
m.act = Mish() # assign activation
if isinstance(m, models.common.BottleneckCSP) or isinstance(m, models.common.BottleneckCSP2) \
or isinstance(m, models.common.SPPCSP):
# if isinstance(m.bn, nn.SyncBatchNorm):
# bn = nn.BatchNorm2d(m.bn.num_features, eps=m.bn.eps, momentum=m.bn.momentum)
# bn.training = False
# bn._buffers = m.bn._buffers
# bn._non_persistent_buffers_set = set()
# m.bn = bn
if isinstance(m.act, models.common.Mish):
m.act = Mish() # assign activation
# if isinstance(m, models.yolo.Detect):
# m.forward = m.forward_export # assign forward (optional)
#y = model(img) # dry run
#print(y[0])
model.eval()
model.model[-1].export = True # set Detect() layer export=True
y = model(img) # dry run
print(y)
print(y.shape)
# ONNX export
try:
import onnx
print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
f = opt.weights.replace('.pt', '.onnx') # filename
torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
output_names=['output'])
# Checks
onnx_model = onnx.load(f) # load onnx model
model_simp, check = simplify(onnx_model)
assert check, "Simplified ONNX model could not be validated"
onnx.save(model_simp, f)
# print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
print('ONNX export success, saved as %s' % f)
except Exception as e:
print('ONNX export failure: %s' % e)
import onnx
import onnxruntime
ort_session = onnxruntime.InferenceSession(f)
def to_numpy(tensor):
return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
img = torch.zeros((opt.batch_size, 3, *opt.img_size))
# compute ONNX Runtime output prediction
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(img)}
ort_outs = ort_session.run(None, ort_inputs)
print(ort_outs)
# Finish
print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.')
And you also need to change the class Detect in yolo.py after you finished the training process:
class Detect(nn.Module):
def __init__(self, nc=80, anchors=(), ch=()): # detection layer
super(Detect, self).__init__()
self.stride = None # strides computed during build
self.nc = nc # number of classes
self.no = nc + 5 # number of outputs per anchor
self.nl = len(anchors) # number of detection layers
self.na = len(anchors[0]) // 2 # number of anchors
self.grid = [torch.zeros(1)] * self.nl # init grid
a = torch.tensor(anchors).float().view(self.nl, -1, 2)
self.register_buffer('anchors', a) # shape(nl,na,2)
self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
self.export = False # onnx export
def forward(self, x):
# x = x.copy() # for profiling
z = [] # inference output
self.training |= self.export
for i in range(self.nl):
x[i] = self.m[i](x[i]) # conv
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
y = x[i].sigmoid()
z.append(y.view(bs, -1, self.no))
if not self.training: # inference
if self.grid[i].shape[2:4] != x[i].shape[2:4]:
self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
y = x[i].sigmoid()
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
z.append(y.view(bs, -1, self.no))
return torch.cat(z, 1) if self.training else (torch.cat(z, 1), x)
@staticmethod
def _make_grid(nx=20, ny=20):
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
And you can see the same results both in Pytorch and onnxruntime. Now, you can use: https://github.com/linghu8812/tensorrt_inference/blob/master/ScaledYOLOv4/
to generate the tensorRT engine and finish the inference.
if you only want to use the onnx model, you can added the postprocess into the onnx model as below. But it will generate some ScatterND operations and cause some problem in TensorRT.
class Detect(nn.Module):
def __init__(self, nc=80, anchors=(), ch=()): # detection layer
super(Detect, self).__init__()
self.stride = None # strides computed during build
self.nc = nc # number of classes
self.no = nc + 5 # number of outputs per anchor
self.nl = len(anchors) # number of detection layers
self.na = len(anchors[0]) // 2 # number of anchors
self.grid = [torch.zeros(1)] * self.nl # init grid
a = torch.tensor(anchors).float().view(self.nl, -1, 2)
self.register_buffer('anchors', a) # shape(nl,na,2)
self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
self.export = False # onnx export
def forward(self, x):
# x = x.copy() # for profiling
z = [] # inference output
self.training |= self.export
for i in range(self.nl):
x[i] = self.m[i](x[i]) # conv
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
if self.grid[i].shape[2:4] != x[i].shape[2:4]:
self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
y = x[i].sigmoid()
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
z.append(y.view(bs, -1, self.no))
return torch.cat(z, 1) if self.training else (torch.cat(z, 1), x)
Now your onnx output shape is 1output_bboxes(class_num + 5), just take the output_bbox and use nms, you will get the final detection result.