simpledet
simpledet copied to clipboard
Single image detection
How to do e2e single image detection?Any quick example?
I have same idea
you have solved?
do you have solved? i do single inference as following code:
import os
from core.detection_module import DetModule
from core.detection_input import Loader
from utils.load_model import load_checkpoint
from six.moves import reduce
from six.moves.queue import Queue
from threading import Thread
import argparse
import importlib
import mxnet as mx
import numpy as np
import six.moves.cPickle as pkl
import json
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import time
from collections import namedtuple
import cv2
def parse_args():
parser = argparse.ArgumentParser(description='Test Detection')
# general
parser.add_argument('--config', help='config file path', type=str)
parser.add_argument('--test', help='test file path', type=str)
args = parser.parse_args()
config = importlib.import_module(args.config.replace('.py', '').replace('/', '.'))
return config,args.test
def read_img(img_path):
image_ori = cv2.imread(img_path, cv2.IMREAD_COLOR)
#BGR2RGB
image = image_ori[:, :, ::-1]
print (image.shape)
class ResizeParam:
short = 800
long = 2000
p = ResizeParam
short = min(image.shape[:2])
long = max(image.shape[:2])
scale = min(p.short / short, p.long / long)
image = cv2.resize(image, None, None, scale, scale,interpolation=cv2.INTER_LINEAR)
#HWC2CHW
image = image.transpose((2, 0,1))
image = np.expand_dims(image, axis=0)
return image_ori,image,scale
def do_nms(all_output,thr):
box = all_output['bbox_xyxy']
score = all_output['cls_score']
final_dets = {}
#print (box.shape,score.shape)
for cid in range(score.shape[1]):
score_cls = score[:, cid]
valid_inds = np.where(score_cls >thr)[0]
box_cls = box[valid_inds]
score_cls = score_cls[valid_inds]
if valid_inds.shape[0]==0:
continue
#print (valid_inds.shape,valid_inds,box_cls,score_cls)
det = np.concatenate((box_cls, score_cls.reshape(-1, 1)), axis=1).astype(np.float32)
det = nms(det)
cls=coco.getCatIds()[cid]
final_dets[cls] = det
return final_dets
if __name__ == "__main__":
os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
config,testfile = parse_args()
pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \
transform, data_name, label_name, metric_list = config.get_config(is_train=False)
sym = pModel.test_symbol
sym.save(pTest.model.prefix + "_test.json")
coco = COCO(pTest.coco.annotation)
ctx = mx.gpu(0)
arg_params, aux_params = load_checkpoint(pTest.model.prefix, pTest.model.epoch)
mod = DetModule(sym, data_names=['data','im_info','im_id','rec_id'], context=ctx)
mod.bind(data_shapes=[('data', (1, 3, 600, 899)), ('im_info', (1, 3)), ('im_id', (1,)), ('rec_id', (1,))], for_training=False)
if callable(pTest.nms.type):
nms = pTest.nms.type(pTest.nms.thr)
else:
from operator_py.nms import py_nms_wrapper
nms = py_nms_wrapper(pTest.nms.thr)
with open(testfile,'r') as rf:
for line in rf:
all_outputs = []
im_ori,im_data,scale = read_img(line.strip())
h,w=im_data.shape[-2:]
print (h,w,scale)
im_info,im_id,rec_id = [(h,w,scale)],[1],[1]
data = mx.io.DataBatch(data = [mx.nd.array(im_data),mx.nd.array(im_info),mx.nd.array(im_id),mx.nd.array(rec_id)])
mod.set_params(arg_params, aux_params, allow_extra=False)
mod.forward(data, is_train=False)
output = [x.asnumpy() for x in mod.get_outputs()]
rid, id, info, cls, box = output
rid, id, info, cls, box = rid.squeeze(), id.squeeze(), info.squeeze(), cls.squeeze(), box.squeeze()
cls = cls[:, 1:] # remove background
box = box / scale
output_record = dict(
rec_id=rid,
im_id=id,
im_info=info,
bbox_xyxy=box,
cls_score=cls
)
all_outputs.append(output_record)
all_outputs = pTest.process_output(all_outputs, None)
thr = 0.5
final_result = do_nms(all_outputs[0],thr)
for cid ,bbox in final_result.items():
idx = np.where(bbox[:,-1]>thr)[0]
for i in idx:
final_box=bbox[i][:4]
score=bbox[i][-1]
print ("cls:%s bbox:%s score:%s"%(cid,final_box,score))
aise MXNetError(py_str(_LIB.MXGetLastError())) mxnet.base.MXNetError: [17:38:51] include/mxnet/tuple.h:202: Check failed: i >= 0 && i < ndim(): index = 0 must be in range [0, -1)
Here I wrote a Detector class based on code above
import os, argparse
import importlib
import json
import time
import cv2
import numpy as np
import mxnet as mx
from core.detection_module import DetModule
from utils.load_model import load_checkpoint
coco = (
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
)
class TDNDetector:
def __init__(self, configFn, ctx, outFolder, threshold):
os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
config = importlib.import_module(configFn.replace('.py', '').replace('/', '.'))
_,_,_,_,_,_, self.__pModel,_, self.__pTest,_,_,_,_ = config.get_config(is_train=False)
if callable(self.__pTest.nms.type):
self.__nms = self.__pTest.nms.type(self.__pTest.nms.thr)
else:
from operator_py.nms import py_nms_wrapper
self.__nms = py_nms_wrapper(self.__pTest.nms.thr)
arg_params, aux_params = load_checkpoint(self.__pTest.model.prefix, self.__pTest.model.epoch)
sym = self.__pModel.test_symbol
self.__mod = DetModule(sym, data_names=['data','im_info','im_id','rec_id'], context=ctx)
self.__mod.bind(data_shapes=[('data', (1, 3, 600, 899)),
('im_info', (1, 3)),
('im_id', (1,)),
('rec_id', (1,))], for_training=False)
self.__mod.set_params(arg_params, aux_params, allow_extra=False)
self.__saveSymbol(sym, outFolder, self.__pTest.model.prefix.split('/')[-1])
self.__threshold = threshold
def __call__(self, imgFilename): # detect onto image
img, im_data, scale = self.__readImg(imgFilename)
if img is None: return None, None
h, w = im_data.shape[-2:]
im_info, im_id, rec_id = [(h, w, scale)], [1], [1]
data = mx.io.DataBatch(data=[mx.nd.array(im_data),
mx.nd.array(im_info),
mx.nd.array(im_id),
mx.nd.array(rec_id)])
self.__mod.forward(data, is_train=False)
# extract results
outputs = self.__mod.get_outputs(merge_multi_context=False)
rid, id, info, cls, box = [x[0].asnumpy() for x in outputs]
rid, id, info, cls, box = rid.squeeze(), id.squeeze(), info.squeeze(), cls.squeeze(), box.squeeze()
cls = cls[:, 1:] # remove background
box = box / scale
output_record = dict(rec_id=rid, im_id=id, im_info=info, bbox_xyxy=box, cls_score=cls)
output_record = self.__pTest.process_output([output_record], None)[0]
final_result = self.__do_nms(output_record)
# obtain representable output
detections = []
for cid ,bbox in final_result.items():
idx = np.where(bbox[:,-1] > self.__threshold)[0]
for i in idx:
final_box = bbox[i][:4]
score = bbox[i][-1]
detections.append({'cls':cid, 'box':final_box, 'score':score})
return detections, img
def __do_nms(self, all_output):
box = all_output['bbox_xyxy']
score = all_output['cls_score']
final_dets = {}
for cid in range(score.shape[1]):
score_cls = score[:, cid]
valid_inds = np.where(score_cls > self.__threshold)[0]
box_cls = box[valid_inds]
score_cls = score_cls[valid_inds]
if valid_inds.shape[0]==0:
continue
det = np.concatenate((box_cls, score_cls.reshape(-1, 1)), axis=1).astype(np.float32)
det = self.__nms(det)
cls = coco[cid]
final_dets[cls] = det
return final_dets
def __readImg(self, imgFilename):
image_ori = cv2.imread(imgFilename, cv2.IMREAD_COLOR)
if image_ori is None: return None, None, None
#BGR2RGB
image = image_ori[:, :, ::-1]
resizeParam = (800, 2000)
shorts, longs = min(image.shape[:2]), max(image.shape[:2])
scale = min(resizeParam[0] / shorts, resizeParam[1] / longs)
image = cv2.resize(image, None, None, scale, scale, interpolation=cv2.INTER_LINEAR)
#HWC2CHW
image = image.transpose((2, 0,1))
image = np.expand_dims(image, axis=0)
return image_ori, image, scale
def __saveSymbol(self, sym, outFolder, fnPrefix):
if not os.path.exists(outFolder): os.makedirs(outFolder)
resFilename = os.path.join(outFolder, fnPrefix + "_symbol_test.json")
sym.save(resFilename)
use it like follows:
def parse_args():
parser = argparse.ArgumentParser(description='Test Detection')
parser.add_argument('--config', type=str, required=True, help='config file path')
parser.add_argument('--ctx', type=int, default=0, help='GPU index. Set negative value to use CPU')
parser.add_argument('--inputs', type=str, nargs='+', required=True, help='File(-s) to test')
parser.add_argument('--output', type=str, default='results', help='Where to store results')
parser.add_argument('--threshold', type=float, default=0.5, help='Detector threshold')
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
ctx = mx.gpu(args.ctx) if args.ctx>=0 else args.cpu()
imgFilenames = args.inputs
detector = TDNDetector(args.config, ctx, args.output, args.threshold)
for i, imgFilename in enumerate(imgFilenames):
dets, img = detector(imgFilename)
I make the above code more robust
import os, argparse
import importlib
import json
import time
import cv2
import numpy as np
import mxnet as mx
from core.detection_module import DetModule
from utils.load_model import load_checkpoint
from utils.patch_config import patch_config_as_nothrow
coco = (
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
)
class TDNDetector:
def __init__(self, configFn, ctx, outFolder, threshold):
os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
config = importlib.import_module(configFn.replace('.py', '').replace('/', '.'))
_,_,_,_,_,_, self.__pModel,_, self.__pTest, self.transform,_,_,_ = config.get_config(is_train=False)
self.__pModel = patch_config_as_nothrow(self.__pModel)
self.__pTest = patch_config_as_nothrow(self.__pTest)
self.resizeParam = (800, 1200)
if callable(self.__pTest.nms.type):
self.__nms = self.__pTest.nms.type(self.__pTest.nms.thr)
else:
from operator_py.nms import py_nms_wrapper
self.__nms = py_nms_wrapper(self.__pTest.nms.thr)
arg_params, aux_params = load_checkpoint(self.__pTest.model.prefix, self.__pTest.model.epoch)
sym = self.__pModel.test_symbol
from utils.graph_optimize import merge_bn
sym, arg_params, aux_params = merge_bn(sym, arg_params, aux_params)
self.__mod = DetModule(sym, data_names=['data','im_info','im_id','rec_id'], context=ctx)
self.__mod.bind(data_shapes=[('data', (1, 3, self.resizeParam[0], self.resizeParam[1])),
('im_info', (1, 3)),
('im_id', (1,)),
('rec_id', (1,))], for_training=False)
self.__mod.set_params(arg_params, aux_params, allow_extra=False)
self.__saveSymbol(sym, outFolder, self.__pTest.model.prefix.split('/')[-1])
self.__threshold = threshold
def __call__(self, imgFilename): # detect onto image
roi_record, scale = self.__readImg(imgFilename)
h, w = roi_record['data'][0].shape
im_c1 = roi_record['data'][0].reshape(1,1,h,w)
im_c2 = roi_record['data'][1].reshape(1,1,h,w)
im_c3 = roi_record['data'][2].reshape(1,1,h,w)
im_data = np.concatenate((im_c1, im_c2, im_c3), axis=1)
im_info, im_id, rec_id = [(h, w, scale)], [1], [1]
data = mx.io.DataBatch(data=[mx.nd.array(im_data),
mx.nd.array(im_info),
mx.nd.array(im_id),
mx.nd.array(rec_id)])
self.__mod.forward(data, is_train=False)
# extract results
outputs = self.__mod.get_outputs(merge_multi_context=False)
rid, id, info, cls, box = [x[0].asnumpy() for x in outputs]
rid, id, info, cls, box = rid.squeeze(), id.squeeze(), info.squeeze(), cls.squeeze(), box.squeeze()
cls = cls[:, 1:] # remove background
box = box / scale
output_record = dict(rec_id=rid, im_id=id, im_info=info, bbox_xyxy=box, cls_score=cls)
output_record = self.__pTest.process_output([output_record], None)[0]
final_result = self.__do_nms(output_record)
# obtain representable output
detections = []
for cid ,bbox in final_result.items():
idx = np.where(bbox[:,-1] > self.__threshold)[0]
for i in idx:
final_box = bbox[i][:4]
score = bbox[i][-1]
detections.append({'cls':cid, 'box':final_box, 'score':score})
return detections,None
def __do_nms(self, all_output):
box = all_output['bbox_xyxy']
score = all_output['cls_score']
final_dets = {}
for cid in range(score.shape[1]):
score_cls = score[:, cid]
valid_inds = np.where(score_cls > self.__threshold)[0]
box_cls = box[valid_inds]
score_cls = score_cls[valid_inds]
if valid_inds.shape[0]==0:
continue
det = np.concatenate((box_cls, score_cls.reshape(-1, 1)), axis=1).astype(np.float32)
det = self.__nms(det)
cls = coco[cid]
final_dets[cls] = det
return final_dets
def __readImg(self, imgFilename):
img = cv2.imread(imgFilename, cv2.IMREAD_COLOR)
height, width, channels = img.shape
roi_record = {'gt_bbox': np.array([[0., 0., 0., 0.]]),'gt_class': np.array([0])}
roi_record['image_url'] = imgFilename
roi_record['h'] = height
roi_record['w'] = width
for trans in self.transform:
trans.apply(roi_record)
img_shape = [roi_record['h'], roi_record['w']]
shorts, longs = min(img_shape), max(img_shape)
scale = min(self.resizeParam[0] / shorts, self.resizeParam[1] / longs)
return roi_record, scale
def __saveSymbol(self, sym, outFolder, fnPrefix):
if not os.path.exists(outFolder): os.makedirs(outFolder)
resFilename = os.path.join(outFolder, fnPrefix + "_symbol_test.json")
sym.save(resFilename)
use it like follows:
import mxnet as mx
import argparse
from infer import TDNDetector
def parse_args():
parser = argparse.ArgumentParser(description='Test Detection')
parser.add_argument('--config', type=str, default='config/faster_r101v2c4_c5_256roi_1x.py', help='config file path')
parser.add_argument('--ctx', type=int, default=0, help='GPU index. Set negative value to use CPU')
#parser.add_argument('--inputs', type=str, nargs='+', required=True, default='', help='File(-s) to test')
parser.add_argument('--output', type=str, default='results', help='Where to store results')
parser.add_argument('--threshold', type=float, default=0.5, help='Detector threshold')
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
ctx = mx.gpu(args.ctx) if args.ctx>=0 else args.cpu()
#imgFilenames = args.inputs
imgFilenames = ['car.jpg', 'COCO_val2014_000000581929.jpg']
detector = TDNDetector(args.config, ctx, args.output, args.threshold)
for i, imgFilename in enumerate(imgFilenames):
print(imgFilename)
dets,_= detector(imgFilename)
print(dets)
@Tveek Hi, thanks for your code! But,how to do multi-scale test on one image? I want to ues tridentnet_r101v2c4_c5_multiscale_addminival_3x_fp16.py for training and testing.
@xujingtju @vedrusss Hi, how to do multi-scale test on images ? Thank you very much!
How can you load a model exactly from Model Zoo? Or any other pretrained?
@xujingtju , thanks very much for your code, since other codes posted in this issue all return erros in my environment. based on that, i write a batch test demo, many thanks.