RT-DETR
RT-DETR copied to clipboard
(pytorch) onnx is slower than pytorch
Describe the bug I trained my custom dataset with rtdetr_r101vd_6x_coco_custom.yml. However, I found onnx is three times slower than pytorch. I just run export_onnx.py in the github and saves model.onnx. Please review my inference code referenced from issue.
result
- onnx
python ./tools/predict_onnx.py -i ./images/D16030_196_Add00407.jpg
torch.Size([1, 3, 640, 640])
Inferece time = 0.421980619430542 s
FPS = 2.3697770796902677
- pytorch
`python ./tools/predict_pytorch.py -c ./configs/rtdetr/rtdetr_r101vd_6x_coco_custom.yml -w ../output/rtdetr_r101vd_6x_coco_custom/checkpoint0004.pth -i ./images/D16030_196_Add00407.jpg
Load PResNet101 state_dict
Inferece time = 0.15229344367980957 s
FPS = 6.566270850782369
- pytorch inference code
import argparse
from pathlib import Path
import time
class ImageReader:
def __init__(self, resize=224, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
self.transform = transforms.Compose([
# transforms.Resize((resize, resize)) if isinstance(resize, int) else transforms.Resize(
# (resize[0], resize[1])),
transforms.ToTensor(),
# transforms.Normalize(mean=mean, std=std),
])
self.resize = resize
self.pil_img = None
def __call__(self, image_path, *args, **kwargs):
self.pil_img = Image.open(image_path).convert('RGB').resize((self.resize, self.resize))
return self.transform(self.pil_img).unsqueeze(0)
class Model(nn.Module):
def __init__(self, confg=None, ckpt="") -> None:
super().__init__()
self.cfg = YAMLConfig(confg, resume=ckpt)
if ckpt:
checkpoint = torch.load(ckpt, map_location='cpu')
if 'ema' in checkpoint:
state = checkpoint['ema']['module']
else:
state = checkpoint['model']
else:
raise AttributeError('only support resume to load model.state_dict by now.')
# NOTE load train mode state -> convert to deploy mode
self.cfg.model.load_state_dict(state)
self.model = self.cfg.model.deploy()
self.postprocessor = self.cfg.postprocessor.deploy()
# print(self.postprocessor.deploy_mode)
def forward(self, images, orig_target_sizes):
outputs = self.model(images)
return self.postprocessor(outputs, orig_target_sizes)
def get_argparser():
parser = argparse.ArgumentParser()
parser.add_argument("--config", '-c', type=str, )
parser.add_argument("--ckpt", '-w', type=str, ) # pth
parser.add_argument("--image", '-i', type=str, )
parser.add_argument("--device", default="cuda:1")
return parser
def main(args):
img_path = Path(args.image)
device = torch.device(args.device)
reader = ImageReader(resize=640)
model = Model(confg=args.config, ckpt=args.ckpt)
model.to(device=device)
img = reader(img_path).to(device)
size = torch.tensor([[img.shape[2], img.shape[3]]]).to(device)
start_time = time.time()
output = model(img, size)
inf_time = time.time() - start_time
fps = float(1/inf_time)
print("Inferece time = {} s".format(inf_time, '.4f'))
print("FPS = {} ".format(fps, '.1f') )
labels, boxes, scores = output
im = reader.pil_img
draw = ImageDraw.Draw(im)
thrh = 0.6
for i in range(img.shape[0]):
scr = scores[i]
lab = labels[i][scr > thrh]
box = boxes[i][scr > thrh]
for b in box:
draw.rectangle(list(b), outline='red', )
draw.text((b[0], b[1]), text=str(lab[i]), fill='blue', )
# save_path = Path(args.output_dir) / img_path.name
file_dir = os.path.dirname(args.image)
new_file_name = os.path.basename(args.image).split('.')[0] + '_torch'+ os.path.splitext(args.image)[1]
new_file_path = file_dir + '/' + new_file_name
print('new_file_path: ', new_file_path)
im.save(new_file_path)
if __name__ == "__main__":
main(get_argparser().parse_args())
- onnx inference code
mport os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
import torch
import onnxruntime as ort
from PIL import Image, ImageDraw, ImageFont
from torchvision.transforms import ToTensor
import argparse
import time
def main(args, ):
im = Image.open(args.img).convert('RGB')
im = im.resize((640, 640))
im_data = ToTensor()(im)[None]
# (width, height) = im.size
print(im_data.shape)
# print(width, height)
# size = torch.tensor([[width, height]])
size = torch.tensor([[640, 640]])
sess = ort.InferenceSession(args.model)
start_time = time.time()
output = sess.run(
# output_names=['labels', 'boxes', 'scores'],
output_names=None,
input_feed={'images': im_data.data.numpy(), "orig_target_sizes": size.data.numpy()}
)
end_time = time.time()
# inf_time = time.time() - start_time
inf_time = end_time - start_time
fps = float(1/inf_time)
print("Inferece time = {} s".format(inf_time, '.4f'))
print("FPS = {} ".format(fps, '.1f') )
#print(type(output))
#print([out.shape for out in output])
labels, boxes, scores = output
draw = ImageDraw.Draw(im) # Draw on the original image
thrh = 0.6
for i in range(im_data.shape[0]):
scr = scores[i]
lab = labels[i][scr > thrh]
box = boxes[i][scr > thrh]
#print(i, sum(scr > thrh))
for b in box:
draw.rectangle(list(b), outline='red',)
# font = ImageFont.truetype("Arial.ttf", 15)
draw.text((b[0], b[1]), text=str(lab[i]), fill='yellow', )
# Save the original image with bounding boxes
file_dir = os.path.dirname(args.img)
new_file_name = os.path.basename(args.img).split('.')[0] + '_onnx'+ os.path.splitext(args.img)[1]
new_file_path = file_dir + '/' + new_file_name
print('new_file_path: ', new_file_path)
im.save(new_file_path)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--img', '-i', type=str, )
parser.add_argument('--model', '-m', type=str, default='model.onnx')
args = parser.parse_args()
main(args)
Please check that your onnxruntime is using GPU.
# pip install onnxruntime-gpu
import onnxruntime as ort
print(ort.get_device())
Thank you for your prompt response. I tried your suggestion, but much slower than before. help me please...
- add my code
providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
sess_options = ort.SessionOptions()
sess = ort.InferenceSession(args.model, sess_options=sess_options, providers=providers)
start_time = time.time()
output = sess.run(
# output_names=['labels', 'boxes', 'scores'],
output_names=None,
input_feed={'images': im_data.data.numpy(), "orig_target_sizes": size.data.numpy()}
)
end_time = time.time()
# inf_time = time.time() - start_time
inf_time = end_time - start_time
fps = float(1/inf_time)
print("Inferece time = {:.4f} s".format(inf_time))
print("FPS = {:.2f} ".format(fps))
- result
python ./tools/predict_onnx.py -i ./images/D16030_196_Add00407.jpg
ort.get_device() GPU
torch.Size([1, 3, 640, 640])
Inferece time = 19.2355 s
FPS = 0.05
Now I found different providers have an effect on FPS. FPS increased by 2. ( 6.5 -> 8.5 ). But I'm not sure if this is right. How about paddle? paddle onnx is faster than pytorch onnx?
- code
providers = [("CUDAExecutionProvider", {"cudnn_conv_algo_search": "DEFAULT"}), "CPUExecutionProvider"]
sess_options = ort.SessionOptions()
sess_options.enable_profiling = True
sess = ort.InferenceSession(args.model, sess_options=sess_options, providers=providers)
- result
python ./tools/predict_onnx.py -i ./images/D16030_196_Add00407.jpg
ort.get_device() GPU
torch.Size([1, 3, 640, 640])
Inferece time = 0.1174 s
FPS = 8.52
start_time = time.time() output = sess.run( # output_names=['labels', 'boxes', 'scores'], output_names=None, input_feed={'images': im_data.data.numpy(), "orig_target_sizes": size.data.numpy()} ) end_time = time.time()
I think you can run this piece of code several times, then compute average time.
tic = time.time()
for _ in range(N)
# code
average_time = (time.time() - tic) / N
Thanks for your advice! I solved the problem. I modified the code, from one img to img directory. pytorch average FPS 23.45, onnx average FPS 28.32 for 10 images!
- pytorch
python ./tools/predict_pytorch.py -c ./configs/rtdetr/rtdetr_r101vd_6x_coco_custom.yml -w ../output/rtdetr_r101vd_6x_coco_custom/checkpoint0004.pth -i ./images/input
img_path: images/input/D16030_196_Add00407.jpg, inf_time: 0.1581, FPS: 6.32
new_file_path: images/output/D16030_196_Add00407_torch.jpg
================================================================================
Load PResNet101 state_dict
img_path: images/input/aihub3.jpg, inf_time: 0.0446, FPS: 22.40
new_file_path: images/output/aihub3_torch.jpg
================================================================================
Load PResNet101 state_dict
img_path: images/input/D16030_196_Add00407_1.jpg, inf_time: 0.0430, FPS: 23.26
new_file_path: images/output/D16030_196_Add00407_1_torch.jpg
================================================================================
.
.
All images count: 10
Average Inferece time = 0.0426 s
Average FPS = 23.45
- onnx
python ./tools/predict_onnx.py -i ./images/input/
img_path: ./images/input//D16030_196_Add00407.jpg, inf_time: 0.1257, FPS: 7.95
new_file_path: images/output/D16030_196_Add00407_onnx.jpg
================================================================================
img_path: ./images/input//aihub3.jpg, inf_time: 0.0415, FPS: 24.09
new_file_path: images/output/aihub3_onnx.jpg
================================================================================
img_path: ./images/input//D16030_196_Add00407_1.jpg, inf_time: 0.0414, FPS: 24.13
new_file_path: images/output/D16030_196_Add00407_1_onnx.jpg
================================================================================
img_path: ./images/input//ytb_SterlingT_Suwon_0_000044_1.jpg, inf_time: 0.0415, FPS: 24.12
new_file_path: images/output/ytb_SterlingT_Suwon_0_000044_1_onnx.jpg
================================================================================
img_path: ./images/input//aihub1.jpg, inf_time: 0.0354, FPS: 28.25
new_file_path: images/output/aihub1_onnx.jpg
================================================================================
img_path: ./images/input//aihub.jpg, inf_time: 0.0352, FPS: 28.44
new_file_path: images/output/aihub_onnx.jpg
.
.
All images count: 10
Average Inferece time = 0.0353 s
Average FPS = 28.32
@SoraJung Could you please send me the complete modified prediction codes again? I may need your codes. Thanks.
@SoraJung Could you please send me the complete modified prediction codes again? I may need your codes. Thanks.
It's final code. Check please ^__^
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
import torch
import onnxruntime as ort
from PIL import Image, ImageDraw, ImageFont
from torchvision.transforms import ToTensor
import argparse
import time
from pathlib import Path
def read_img(img_path):
im = Image.open(img_path).convert('RGB')
im = im.resize((640, 640))
im_data = ToTensor()(im)[None]
# (width, height) = im.size
# print(im_data.shape)
# print(width, height)
# size = torch.tensor([[width, height]])
size = torch.tensor([[640, 640]])
return im, im_data, size
def createDirectory(directory):
try:
if not os.path.exists(directory):
os.makedirs(directory)
except OSError:
print("Error: Failed to create the directory.")
def main(args, ):
print("ort.get_device()", ort.get_device())
providers = [("CUDAExecutionProvider", {"cudnn_conv_algo_search": "DEFAULT"}), "CPUExecutionProvider"]
sess_options = ort.SessionOptions()
sess_options.enable_profiling = True
sess = ort.InferenceSession(args.model, sess_options=sess_options, providers=providers)
img_path_list = []
possible_img_extension = ['.jpg', '.jpeg', '.JPG', '.bmp', '.png'] # μ΄λ―Έμ§ νμ₯μλ€
for (root, dirs, files) in os.walk(args.img):
if len(files) > 0:
for file_name in files:
if os.path.splitext(file_name)[1] in possible_img_extension:
img_path = root + '/' + file_name
img_path_list.append(img_path)
all_inf_time = []
for img_path in img_path_list:
im, im_data, size = read_img(img_path)
tic = time.time()
output = sess.run(
# output_names=['labels', 'boxes', 'scores'],
output_names=None,
input_feed={'images': im_data.data.numpy(), "orig_target_sizes": size.data.numpy()}
)
inf_time = time.time() - tic
fps = float(1/inf_time)
print('img_path: {}, inf_time: {:.4f}, FPS: {:.2f}'.format(img_path, inf_time, fps))
all_inf_time.append(inf_time)
#print(type(output))
#print([out.shape for out in output])
labels, boxes, scores = output
draw = ImageDraw.Draw(im) # Draw on the original image
thrh = 0.6
for i in range(im_data.shape[0]):
scr = scores[i]
lab = labels[i][scr > thrh]
box = boxes[i][scr > thrh]
#print(i, sum(scr > thrh))
for b in box:
draw.rectangle(list(b), outline='red',)
# font = ImageFont.truetype("Arial.ttf", 15)
draw.text((b[0], b[1]), text=str(lab[i]), fill='yellow', )
# Save the original image with bounding boxes
file_dir = Path(img_path).parent.parent / 'output'
createDirectory(file_dir)
new_file_name = os.path.basename(img_path).split('.')[0] + '_onnx'+ os.path.splitext(img_path)[1]
new_file_path = file_dir / new_file_name
print('new_file_path: ', new_file_path)
print("================================================================================")
im.save(new_file_path)
avr_time = sum(all_inf_time) / len(img_path_list)
avr_fps = float(1/avr_time)
print('All images count: {}'.format(len(img_path_list)))
print("Average Inferece time = {:.4f} s".format(inf_time))
print("Average FPS = {:.2f} ".format(fps))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--img', '-i', type=str, ) # dir
parser.add_argument('--model', '-m', type=str, default='model.onnx')
args = parser.parse_args()
main(args)
root@a306fea1701c:/RT-DETR/rtdetr_pytorch# python tools/predict_onnx.py -i configs/dataset/opixray/train/train_image/ -m model.onnx
ort.get_device() GPU
img_path: configs/dataset/opixray/train/train_image//009000.jpg, inf_time: 0.4232, FPS: 2.36
img_path: configs/dataset/opixray/train/train_image//009002.jpg, inf_time: 0.1311, FPS: 7.63
img_path: configs/dataset/opixray/train/train_image//009003.jpg, inf_time: 0.0675, FPS: 14.82
img_path: configs/dataset/opixray/train/train_image//009004.jpg, inf_time: 0.1354, FPS: 7.38
img_path: configs/dataset/opixray/train/train_image//009005.jpg, inf_time: 0.0704, FPS: 14.21
img_path: configs/dataset/opixray/train/train_image//009007.jpg, inf_time: 0.1265, FPS: 7.91
img_path: configs/dataset/opixray/train/train_image//009008.jpg, inf_time: 0.1261, FPS: 7.93
img_path: configs/dataset/opixray/train/train_image//009009.jpg, inf_time: 0.0719, FPS: 13.92
img_path: configs/dataset/opixray/train/train_image//009012.jpg, inf_time: 0.1228, FPS: 8.14
img_path: configs/dataset/opixray/train/train_image//009013.jpg, inf_time: 0.1387, FPS: 7.21
img_path: configs/dataset/opixray/train/train_image//009014.jpg, inf_time: 0.0662, FPS: 15.11
img_path: configs/dataset/opixray/train/train_image//009016.jpg, inf_time: 0.1388, FPS: 7.20
img_path: configs/dataset/opixray/train/train_image//009017.jpg, inf_time: 0.1059, FPS: 9.44
img_path: configs/dataset/opixray/train/train_image//009018.jpg, inf_time: 0.1300, FPS: 7.6
Why I use the predict_onnx.py to cal my fps on my dataset ,the fps is unstable and lower compared with the benchmark ?
root@a306fea1701c:/RT-DETR/rtdetr_pytorch# python tools/predict_onnx.py -i configs/dataset/opixray/train/train_image/ -m model.onnx ort.get_device() GPU img_path: configs/dataset/opixray/train/train_image//009000.jpg, inf_time: 0.4232, FPS: 2.36 img_path: configs/dataset/opixray/train/train_image//009002.jpg, inf_time: 0.1311, FPS: 7.63 img_path: configs/dataset/opixray/train/train_image//009003.jpg, inf_time: 0.0675, FPS: 14.82 img_path: configs/dataset/opixray/train/train_image//009004.jpg, inf_time: 0.1354, FPS: 7.38 img_path: configs/dataset/opixray/train/train_image//009005.jpg, inf_time: 0.0704, FPS: 14.21 img_path: configs/dataset/opixray/train/train_image//009007.jpg, inf_time: 0.1265, FPS: 7.91 img_path: configs/dataset/opixray/train/train_image//009008.jpg, inf_time: 0.1261, FPS: 7.93 img_path: configs/dataset/opixray/train/train_image//009009.jpg, inf_time: 0.0719, FPS: 13.92 img_path: configs/dataset/opixray/train/train_image//009012.jpg, inf_time: 0.1228, FPS: 8.14 img_path: configs/dataset/opixray/train/train_image//009013.jpg, inf_time: 0.1387, FPS: 7.21 img_path: configs/dataset/opixray/train/train_image//009014.jpg, inf_time: 0.0662, FPS: 15.11 img_path: configs/dataset/opixray/train/train_image//009016.jpg, inf_time: 0.1388, FPS: 7.20 img_path: configs/dataset/opixray/train/train_image//009017.jpg, inf_time: 0.1059, FPS: 9.44 img_path: configs/dataset/opixray/train/train_image//009018.jpg, inf_time: 0.1300, FPS: 7.6Why I use the predict_onnx.py to cal my fps on my dataset ,the fps is unstable and lower compared with the benchmark ?
@xyb1314 You are using gpu, It needs longer warm-up time to load data. When comparing with benchmark, try to use same environment and device setting. I would suggest install paddle and convert to trt to get real fps. Onnx is always confusing. Internal ops are not optimized.
Hello @SoraJung and @lyuwenyu,
I used the inference_onnx code provided above to calculate the inference time. I used the rtdetrv2_onnxruntime.py as reference and added the inference time calculation part in the file for inference on batch of images in a provided image directory. The code is as follows:
import torch
import torchvision.transforms as T
import numpy as np
import onnxruntime as ort
from PIL import Image, ImageDraw, ImageFont
import time
import os
import glob
# Disable CPU affinity if needed (for ONNX performance on CPU)
os.environ["ORT_DISABLE_CPU_AFFINITY"] = "1"
label_dict = {
1: "pedestrian",
2: "people",
3: "bicycle",
4: "car",
5: "van",
6: "truck",
7: "tricycle",
8: "awning-tricycle",
9: "bus",
10: "motor",
11: "others",
}
def draw(images, labels, boxes, scores, thrh=0.6, output_filename=None):
"""Draws bounding boxes and labels on the image."""
for i, im in enumerate(images):
draw = ImageDraw.Draw(im)
# Filter by score threshold
scr = scores[i]
lab = labels[i][scr > thrh]
box = boxes[i][scr > thrh]
scrs = scores[i][scr > thrh]
# Draw each bounding box and label
for j, b in enumerate(box):
label_id = lab[j].item()
label_name = label_dict.get(label_id, f"Unknown({label_id})")
draw.rectangle(list(b), outline='red')
draw.text((b[0], b[1]), text=f"{label_name}: {round(scrs[j].item(), 2)}", fill='blue')
# Save the image with a unique filename
if output_filename:
im.save(output_filename)
def main(args):
"""Main function to load ONNX model, perform inference, and measure inference time."""
# Load the ONNX model
sess = ort.InferenceSession(args.onnx_file)
print("***************Summary*******************")
print("DEVICE USED: ", ort.get_device())
#providers = [("CUDAExecutionProvider", {"cudnn_conv_algo_search": "DEFAULT"}), "CPUExecutionProvider"]
#sess_options = ort.SessionOptions()
#sess_options.enable_profiling = True
#sess = ort.InferenceSession(args.onnx_file, providers=["CUDAExecutionProvider"])
# Loop through all image files in the input folder
image_files = glob.glob(os.path.join(args.images_folder, '*.[jp][pn][g]*')) # Looks for .jpg, .jpeg, .png files
#total_inference_time = 0
all_inf_time = []
total_images = len(image_files)
# Ensure the output directory exists
os.makedirs(args.output_dir, exist_ok=True)
for image_path in image_files:
# Open image
im_pil = Image.open(image_path).convert('RGB')
w, h = im_pil.size
orig_size = torch.tensor([w, h])[None]
# Define transformations
transforms = T.Compose([T.Resize((640, 640)), T.ToTensor()])
im_data = transforms(im_pil)[None]
# Measure inference time
#start_time = time.perf_counter()
tic = time.time()
# Run inference
output = sess.run(
output_names=['labels', 'boxes', 'scores'],
input_feed={'images': im_data.data.numpy(), "orig_target_sizes": orig_size.data.numpy()}
)
inf_time = time.time() - tic
fps = float(1/inf_time)
all_inf_time.append(inf_time)
print('img_path: {}, inf_time: {:.4f}, FPS: {:.2f}'.format(image_path, inf_time, fps))
# Get the outputs (labels, boxes, scores)
labels, boxes, scores = output
# Generate unique output filename based on the image filename
base_filename = os.path.basename(image_path)
output_filename = os.path.join(args.output_dir, f'{os.path.splitext(base_filename)[0]}_inference.jpg')
# Draw the results and save images
draw([im_pil], labels, boxes, scores, output_filename=output_filename)
# Measure end time
#end_time = time.perf_counter()
# Calculate inference time for this image in milliseconds
#inference_time_ms = (end_time - start_time) * 1000
#total_inference_time += inference_time_ms
#print(f"Inference time for {image_path}: {inference_time_ms:.2f} ms")
# Calculate and print the average inference time for all images
#average_inference_time = total_inference_time / total_images if total_images > 0 else 0
#print(f"\nAverage Inference Time for {total_images} images: {average_inference_time:.2f} ms")
#Summary of Inference
avr_time = sum(all_inf_time) / total_images
avr_fps = total_images/sum(all_inf_time)
#print("Total Inference Time: {}".format(sum(all_inf_time)))
print('All images count: {}'.format(total_images))
print("Average Inference time: {:.4f}s".format(avr_time))
#print("Average FPS: {:.2f} ".format(avr_fps))
print("******************************************************")
if __name__ == '__main__':
# Parse command-line arguments
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--onnx-file', type=str, required=True, help='Path to ONNX model file')
parser.add_argument('--images-folder', type=str, required=True, help='Path to folder containing images')
parser.add_argument('--output-dir', type=str, required=True, help='Path to folder where output images will be saved')
args = parser.parse_args()
# Run the main function
main(args)
It runs sucessfully, However the inference time is higher in milliseconds and FPS is very low. The result snippet is as follows.
Could you please review the code and see what the issue with the code?
Your help is appreciated.
Regards, Bijay
Thanks for your advice! I solved the problem. I modified the code, from one img to img directory. pytorch average FPS 23.45, onnx average FPS 28.32 for 10 images!
- pytorch
python ./tools/predict_pytorch.py -c ./configs/rtdetr/rtdetr_r101vd_6x_coco_custom.yml -w ../output/rtdetr_r101vd_6x_coco_custom/checkpoint0004.pth -i ./images/input
img_path: images/input/D16030_196_Add00407.jpg, inf_time: 0.1581, FPS: 6.32 new_file_path: images/output/D16030_196_Add00407_torch.jpg
Load PResNet101 state_dict img_path: images/input/aihub3.jpg, inf_time: 0.0446, FPS: 22.40 new_file_path: images/output/aihub3_torch.jpg
Load PResNet101 state_dict img_path: images/input/D16030_196_Add00407_1.jpg, inf_time: 0.0430, FPS: 23.26 new_file_path: images/output/D16030_196_Add00407_1_torch.jpg
. . All images count: 10 Average Inferece time = 0.0426 s Average FPS = 23.45 2. onnx
python ./tools/predict_onnx.py -i ./images/input/
img_path: ./images/input//D16030_196_Add00407.jpg, inf_time: 0.1257, FPS: 7.95 new_file_path: images/output/D16030_196_Add00407_onnx.jpg
img_path: ./images/input//aihub3.jpg, inf_time: 0.0415, FPS: 24.09 new_file_path: images/output/aihub3_onnx.jpg
img_path: ./images/input//D16030_196_Add00407_1.jpg, inf_time: 0.0414, FPS: 24.13 new_file_path: images/output/D16030_196_Add00407_1_onnx.jpg
img_path: ./images/input//ytb_SterlingT_Suwon_0_000044_1.jpg, inf_time: 0.0415, FPS: 24.12 new_file_path: images/output/ytb_SterlingT_Suwon_0_000044_1_onnx.jpg
img_path: ./images/input//aihub1.jpg, inf_time: 0.0354, FPS: 28.25 new_file_path: images/output/aihub1_onnx.jpg
img_path: ./images/input//aihub.jpg, inf_time: 0.0352, FPS: 28.44 new_file_path: images/output/aihub_onnx.jpg . . All images count: 10 Average Inferece time = 0.0353 s Average FPS = 28.32
@SoraJung Could you please send me the complete modified prediction codes again? I may need your codes. Thanks.
It's final code. Check please ^__^
import os import sys sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(file)), '..'))
import torch import onnxruntime as ort from PIL import Image, ImageDraw, ImageFont from torchvision.transforms import ToTensor import argparse import time from pathlib import Path
def read_img(img_path): im = Image.open(img_path).convert('RGB') im = im.resize((640, 640)) im_data = ToTensor()(im)[None] # (width, height) = im.size # print(im_data.shape) # print(width, height) # size = torch.tensor([[width, height]]) size = torch.tensor([[640, 640]]) return im, im_data, size
def createDirectory(directory): try: if not os.path.exists(directory): os.makedirs(directory) except OSError: print("Error: Failed to create the directory.")
def main(args, ):
print("ort.get_device()", ort.get_device()) providers = [("CUDAExecutionProvider", {"cudnn_conv_algo_search": "DEFAULT"}), "CPUExecutionProvider"] sess_options = ort.SessionOptions() sess_options.enable_profiling = True sess = ort.InferenceSession(args.model, sess_options=sess_options, providers=providers) img_path_list = [] possible_img_extension = ['.jpg', '.jpeg', '.JPG', '.bmp', '.png'] # μ΄λ―Έμ§ νμ₯μλ€ for (root, dirs, files) in os.walk(args.img): if len(files) > 0: for file_name in files: if os.path.splitext(file_name)[1] in possible_img_extension: img_path = root + '/' + file_name img_path_list.append(img_path) all_inf_time = [] for img_path in img_path_list: im, im_data, size = read_img(img_path) tic = time.time() output = sess.run( # output_names=['labels', 'boxes', 'scores'], output_names=None, input_feed={'images': im_data.data.numpy(), "orig_target_sizes": size.data.numpy()} ) inf_time = time.time() - tic fps = float(1/inf_time) print('img_path: {}, inf_time: {:.4f}, FPS: {:.2f}'.format(img_path, inf_time, fps)) all_inf_time.append(inf_time) #print(type(output)) #print([out.shape for out in output]) labels, boxes, scores = output draw = ImageDraw.Draw(im) # Draw on the original image thrh = 0.6 for i in range(im_data.shape[0]): scr = scores[i] lab = labels[i][scr > thrh] box = boxes[i][scr > thrh] #print(i, sum(scr > thrh)) for b in box: draw.rectangle(list(b), outline='red',) # font = ImageFont.truetype("Arial.ttf", 15) draw.text((b[0], b[1]), text=str(lab[i]), fill='yellow', ) # Save the original image with bounding boxes file_dir = Path(img_path).parent.parent / 'output' createDirectory(file_dir) new_file_name = os.path.basename(img_path).split('.')[0] + '_onnx'+ os.path.splitext(img_path)[1] new_file_path = file_dir / new_file_name print('new_file_path: ', new_file_path) print("================================================================================") im.save(new_file_path) avr_time = sum(all_inf_time) / len(img_path_list) avr_fps = float(1/avr_time) print('All images count: {}'.format(len(img_path_list))) print("Average Inferece time = {:.4f} s".format(inf_time)) print("Average FPS = {:.2f} ".format(fps))if name == 'main':
parser = argparse.ArgumentParser() parser.add_argument('--img', '-i', type=str, ) # dir parser.add_argument('--model', '-m', type=str, default='model.onnx') args = parser.parse_args() main(args)
Can you share the pytorch version infernce code with us?Thanks!
Thanks for your advice! I solved the problem. I modified the code, from one img to img directory. pytorch average FPS 23.45, onnx average FPS 28.32 for 10 images!
- pytorch
python ./tools/predict_pytorch.py -c ./configs/rtdetr/rtdetr_r101vd_6x_coco_custom.yml -w ../output/rtdetr_r101vd_6x_coco_custom/checkpoint0004.pth -i ./images/input
img_path: images/input/D16030_196_Add00407.jpg, inf_time: 0.1581, FPS: 6.32 new_file_path: images/output/D16030_196_Add00407_torch.jpg
Load PResNet101 state_dict img_path: images/input/aihub3.jpg, inf_time: 0.0446, FPS: 22.40 new_file_path: images/output/aihub3_torch.jpg
Load PResNet101 state_dict img_path: images/input/D16030_196_Add00407_1.jpg, inf_time: 0.0430, FPS: 23.26 new_file_path: images/output/D16030_196_Add00407_1_torch.jpg
. . All images count: 10 Average Inferece time = 0.0426 s Average FPS = 23.45 2. onnx
python ./tools/predict_onnx.py -i ./images/input/
img_path: ./images/input//D16030_196_Add00407.jpg, inf_time: 0.1257, FPS: 7.95 new_file_path: images/output/D16030_196_Add00407_onnx.jpg
img_path: ./images/input//aihub3.jpg, inf_time: 0.0415, FPS: 24.09 new_file_path: images/output/aihub3_onnx.jpg
img_path: ./images/input//D16030_196_Add00407_1.jpg, inf_time: 0.0414, FPS: 24.13 new_file_path: images/output/D16030_196_Add00407_1_onnx.jpg
img_path: ./images/input//ytb_SterlingT_Suwon_0_000044_1.jpg, inf_time: 0.0415, FPS: 24.12 new_file_path: images/output/ytb_SterlingT_Suwon_0_000044_1_onnx.jpg
img_path: ./images/input//aihub1.jpg, inf_time: 0.0354, FPS: 28.25 new_file_path: images/output/aihub1_onnx.jpg
img_path: ./images/input//aihub.jpg, inf_time: 0.0352, FPS: 28.44 new_file_path: images/output/aihub_onnx.jpg . . All images count: 10 Average Inferece time = 0.0353 s Average FPS = 28.32
Could you please send me the complete modified prediction codes on pytorch version? I may need your codes. Thanks.