simpledet Single image detection

How to do e2e single image detection?Any quick example?

Feb 02 '19 04:02 wait1988

I have same idea
you have solved?

Apr 17 '19 01:04 scutzhe

do you have solved? i do single inference as following code:

import os
from core.detection_module import DetModule
from core.detection_input import Loader
from utils.load_model import load_checkpoint
from six.moves import reduce
from six.moves.queue import Queue
from threading import Thread
import argparse
import importlib
import mxnet as mx
import numpy as np
import six.moves.cPickle as pkl
import json
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import time
from collections import namedtuple
import cv2

def parse_args():
    parser = argparse.ArgumentParser(description='Test Detection')
    # general
    parser.add_argument('--config', help='config file path', type=str)
    parser.add_argument('--test', help='test file path', type=str)
    args = parser.parse_args()

    config = importlib.import_module(args.config.replace('.py', '').replace('/', '.'))
    return config,args.test

def read_img(img_path):
    image_ori = cv2.imread(img_path, cv2.IMREAD_COLOR)
    #BGR2RGB
    image = image_ori[:, :, ::-1]
    print (image.shape)
    class ResizeParam:
        short = 800
        long = 2000

    p = ResizeParam
    short = min(image.shape[:2])
    long = max(image.shape[:2])
    scale = min(p.short / short, p.long / long)
    image = cv2.resize(image, None, None, scale, scale,interpolation=cv2.INTER_LINEAR)
    #HWC2CHW
    image = image.transpose((2, 0,1))
    image = np.expand_dims(image, axis=0)
    return image_ori,image,scale
    
def do_nms(all_output,thr):
    box = all_output['bbox_xyxy']
    score = all_output['cls_score']
    final_dets = {}
    #print (box.shape,score.shape)
    for cid in range(score.shape[1]):
        score_cls = score[:, cid]
        valid_inds = np.where(score_cls >thr)[0]
        box_cls = box[valid_inds]
        score_cls = score_cls[valid_inds]
        if valid_inds.shape[0]==0:
            continue
            #print (valid_inds.shape,valid_inds,box_cls,score_cls)
        det = np.concatenate((box_cls, score_cls.reshape(-1, 1)), axis=1).astype(np.float32)
        det = nms(det)
        cls=coco.getCatIds()[cid]
        final_dets[cls] = det
    return final_dets

if __name__ == "__main__":
    os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"

    config,testfile = parse_args()

    pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \
    transform, data_name, label_name, metric_list = config.get_config(is_train=False)

    sym = pModel.test_symbol
    sym.save(pTest.model.prefix + "_test.json")
    coco = COCO(pTest.coco.annotation)

    ctx = mx.gpu(0)
    arg_params, aux_params = load_checkpoint(pTest.model.prefix, pTest.model.epoch)
    mod = DetModule(sym, data_names=['data','im_info','im_id','rec_id'], context=ctx)
    mod.bind(data_shapes=[('data', (1, 3, 600, 899)), ('im_info', (1, 3)), ('im_id', (1,)), ('rec_id', (1,))], for_training=False)

    if callable(pTest.nms.type):
        nms = pTest.nms.type(pTest.nms.thr)
    else:
        from operator_py.nms import py_nms_wrapper
        nms = py_nms_wrapper(pTest.nms.thr)

    with open(testfile,'r') as rf:
        for line in rf:
            all_outputs = []
            im_ori,im_data,scale = read_img(line.strip())
            h,w=im_data.shape[-2:]
            print (h,w,scale)
            im_info,im_id,rec_id = [(h,w,scale)],[1],[1]
            data = mx.io.DataBatch(data = [mx.nd.array(im_data),mx.nd.array(im_info),mx.nd.array(im_id),mx.nd.array(rec_id)])
            mod.set_params(arg_params, aux_params, allow_extra=False)
            mod.forward(data, is_train=False)
            output = [x.asnumpy() for x in mod.get_outputs()]
            rid, id, info, cls, box = output
            rid, id, info, cls, box = rid.squeeze(), id.squeeze(), info.squeeze(), cls.squeeze(), box.squeeze()
            cls = cls[:, 1:]   # remove background
            box = box / scale

            output_record = dict(
            rec_id=rid,
            im_id=id,
            im_info=info,
            bbox_xyxy=box, 
            cls_score=cls 
             )
            all_outputs.append(output_record)
            all_outputs = pTest.process_output(all_outputs, None)
            thr = 0.5
            final_result = do_nms(all_outputs[0],thr)
            for cid ,bbox in final_result.items():
                idx = np.where(bbox[:,-1]>thr)[0] 
                for i in idx:
                    final_box=bbox[i][:4]
                    score=bbox[i][-1]
                    print ("cls:%s bbox:%s score:%s"%(cid,final_box,score))

Apr 30 '19 03:04 xujingtju

aise MXNetError(py_str(_LIB.MXGetLastError())) mxnet.base.MXNetError: [17:38:51] include/mxnet/tuple.h:202: Check failed: i >= 0 && i < ndim(): index = 0 must be in range [0, -1)

May 14 '19 09:05 feixiangdekaka

Here I wrote a Detector class based on code above

import os, argparse
import importlib
import json
import time
import cv2
import numpy as np
import mxnet as mx
from   core.detection_module import DetModule
from   utils.load_model      import load_checkpoint

coco = (
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
)

class TDNDetector:
    def __init__(self, configFn, ctx, outFolder, threshold):
        os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
        config = importlib.import_module(configFn.replace('.py', '').replace('/', '.'))
        _,_,_,_,_,_, self.__pModel,_, self.__pTest,_,_,_,_ = config.get_config(is_train=False)
        if callable(self.__pTest.nms.type):
            self.__nms = self.__pTest.nms.type(self.__pTest.nms.thr)
        else:
            from operator_py.nms import py_nms_wrapper
            self.__nms = py_nms_wrapper(self.__pTest.nms.thr)
        arg_params, aux_params = load_checkpoint(self.__pTest.model.prefix, self.__pTest.model.epoch)
        sym = self.__pModel.test_symbol
        self.__mod = DetModule(sym, data_names=['data','im_info','im_id','rec_id'], context=ctx)
        self.__mod.bind(data_shapes=[('data', (1, 3, 600, 899)), 
                                     ('im_info', (1, 3)),
                                     ('im_id', (1,)),
                                     ('rec_id', (1,))], for_training=False)
        self.__mod.set_params(arg_params, aux_params, allow_extra=False)
        self.__saveSymbol(sym, outFolder, self.__pTest.model.prefix.split('/')[-1])
        self.__threshold = threshold

    def __call__(self, imgFilename): # detect onto image
        img, im_data, scale = self.__readImg(imgFilename)
        if img is None: return None, None
        h, w = im_data.shape[-2:]
        im_info, im_id, rec_id = [(h, w, scale)], [1], [1] 
        data = mx.io.DataBatch(data=[mx.nd.array(im_data),
                                     mx.nd.array(im_info),
                                     mx.nd.array(im_id),
                                     mx.nd.array(rec_id)])
        self.__mod.forward(data, is_train=False)
        # extract results
        outputs = self.__mod.get_outputs(merge_multi_context=False)
        rid, id, info, cls, box = [x[0].asnumpy() for x in outputs]
        rid, id, info, cls, box = rid.squeeze(), id.squeeze(), info.squeeze(), cls.squeeze(), box.squeeze()
        cls = cls[:, 1:]   # remove background
        box = box / scale
        output_record = dict(rec_id=rid, im_id=id, im_info=info, bbox_xyxy=box, cls_score=cls)
        output_record = self.__pTest.process_output([output_record], None)[0]
        final_result  = self.__do_nms(output_record)
        # obtain representable output
        detections = []
        for cid ,bbox in final_result.items():
            idx = np.where(bbox[:,-1] > self.__threshold)[0] 
            for i in idx:
                final_box = bbox[i][:4]
                score = bbox[i][-1]
                detections.append({'cls':cid, 'box':final_box, 'score':score})
        return detections, img

    def __do_nms(self, all_output):
        box   = all_output['bbox_xyxy']
        score = all_output['cls_score']
        final_dets = {}
        for cid in range(score.shape[1]):
            score_cls = score[:, cid]
            valid_inds = np.where(score_cls > self.__threshold)[0]
            box_cls = box[valid_inds]
            score_cls = score_cls[valid_inds]
            if valid_inds.shape[0]==0:
                continue
            det = np.concatenate((box_cls, score_cls.reshape(-1, 1)), axis=1).astype(np.float32)
            det = self.__nms(det)
            cls = coco[cid]
            final_dets[cls] = det
        return final_dets

    def __readImg(self, imgFilename):
        image_ori = cv2.imread(imgFilename, cv2.IMREAD_COLOR)
        if image_ori is None: return None, None, None
        #BGR2RGB
        image = image_ori[:, :, ::-1]
        resizeParam   = (800, 2000)
        shorts, longs = min(image.shape[:2]), max(image.shape[:2])
        scale = min(resizeParam[0] / shorts, resizeParam[1] / longs)
        image = cv2.resize(image, None, None, scale, scale, interpolation=cv2.INTER_LINEAR)
        #HWC2CHW
        image = image.transpose((2, 0,1))
        image = np.expand_dims(image, axis=0)
        return image_ori, image, scale

    def __saveSymbol(self, sym, outFolder, fnPrefix):
        if not os.path.exists(outFolder): os.makedirs(outFolder)
        resFilename = os.path.join(outFolder, fnPrefix + "_symbol_test.json")
        sym.save(resFilename)

use it like follows:

def parse_args():
    parser = argparse.ArgumentParser(description='Test Detection')
    parser.add_argument('--config', type=str, required=True, help='config file path')
    parser.add_argument('--ctx',    type=int, default=0,     help='GPU index. Set negative value to use CPU')
    parser.add_argument('--inputs', type=str, nargs='+', required=True, help='File(-s) to test')
    parser.add_argument('--output', type=str, default='results', help='Where to store results')
    parser.add_argument('--threshold', type=float, default=0.5,  help='Detector threshold')
    return parser.parse_args()

if __name__ == "__main__":
    args   = parse_args()   
    ctx = mx.gpu(args.ctx) if args.ctx>=0 else args.cpu()
    imgFilenames = args.inputs
    detector = TDNDetector(args.config, ctx, args.output, args.threshold)
    for i, imgFilename in enumerate(imgFilenames):
            dets, img = detector(imgFilename)

Jun 08 '19 13:06 vedrusss

I make the above code more robust

import os, argparse
import importlib
import json
import time
import cv2
import numpy as np
import mxnet as mx
from   core.detection_module import DetModule
from   utils.load_model      import load_checkpoint
from utils.patch_config import patch_config_as_nothrow


coco = (
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
)

class TDNDetector:
    def __init__(self, configFn, ctx, outFolder, threshold):
        os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
        config = importlib.import_module(configFn.replace('.py', '').replace('/', '.'))
        _,_,_,_,_,_, self.__pModel,_, self.__pTest, self.transform,_,_,_ = config.get_config(is_train=False)
        self.__pModel = patch_config_as_nothrow(self.__pModel)
        self.__pTest = patch_config_as_nothrow(self.__pTest)
        self.resizeParam = (800, 1200)
        if callable(self.__pTest.nms.type):
            self.__nms = self.__pTest.nms.type(self.__pTest.nms.thr)
        else:
            from operator_py.nms import py_nms_wrapper
            self.__nms = py_nms_wrapper(self.__pTest.nms.thr)
        arg_params, aux_params = load_checkpoint(self.__pTest.model.prefix, self.__pTest.model.epoch)
        sym = self.__pModel.test_symbol
        from utils.graph_optimize import merge_bn
        sym, arg_params, aux_params = merge_bn(sym, arg_params, aux_params)
        self.__mod = DetModule(sym, data_names=['data','im_info','im_id','rec_id'], context=ctx)
        self.__mod.bind(data_shapes=[('data', (1, 3, self.resizeParam[0], self.resizeParam[1])), 
                                     ('im_info', (1, 3)),
                                     ('im_id', (1,)),
                                     ('rec_id', (1,))], for_training=False)
        self.__mod.set_params(arg_params, aux_params, allow_extra=False)
        self.__saveSymbol(sym, outFolder, self.__pTest.model.prefix.split('/')[-1])
        self.__threshold = threshold

    def __call__(self, imgFilename): # detect onto image
        roi_record, scale = self.__readImg(imgFilename)
        h, w = roi_record['data'][0].shape

        im_c1 = roi_record['data'][0].reshape(1,1,h,w)
        im_c2 = roi_record['data'][1].reshape(1,1,h,w)
        im_c3 = roi_record['data'][2].reshape(1,1,h,w)
        im_data = np.concatenate((im_c1, im_c2, im_c3), axis=1)

        im_info, im_id, rec_id = [(h, w, scale)], [1], [1] 
        data = mx.io.DataBatch(data=[mx.nd.array(im_data),
                                     mx.nd.array(im_info),
                                     mx.nd.array(im_id),
                                     mx.nd.array(rec_id)])
        self.__mod.forward(data, is_train=False)
        # extract results
        outputs = self.__mod.get_outputs(merge_multi_context=False)
        rid, id, info, cls, box = [x[0].asnumpy() for x in outputs]
        rid, id, info, cls, box = rid.squeeze(), id.squeeze(), info.squeeze(), cls.squeeze(), box.squeeze()
        cls = cls[:, 1:]   # remove background
        box = box / scale
        output_record = dict(rec_id=rid, im_id=id, im_info=info, bbox_xyxy=box, cls_score=cls)
        output_record = self.__pTest.process_output([output_record], None)[0]
        final_result  = self.__do_nms(output_record)
        # obtain representable output
        detections = []
        for cid ,bbox in final_result.items():
            idx = np.where(bbox[:,-1] > self.__threshold)[0] 
            for i in idx:
                final_box = bbox[i][:4]
                score = bbox[i][-1]
                detections.append({'cls':cid, 'box':final_box, 'score':score})
        return detections,None

    def __do_nms(self, all_output):
        box   = all_output['bbox_xyxy']
        score = all_output['cls_score']
        final_dets = {}
        for cid in range(score.shape[1]):
            score_cls = score[:, cid]
            valid_inds = np.where(score_cls > self.__threshold)[0]
            box_cls = box[valid_inds]
            score_cls = score_cls[valid_inds]
            if valid_inds.shape[0]==0:
                continue
            det = np.concatenate((box_cls, score_cls.reshape(-1, 1)), axis=1).astype(np.float32)
            det = self.__nms(det)
            cls = coco[cid]
            final_dets[cls] = det
        return final_dets

    def __readImg(self, imgFilename):
        img = cv2.imread(imgFilename, cv2.IMREAD_COLOR)
        height, width, channels = img.shape
        roi_record = {'gt_bbox': np.array([[0., 0., 0., 0.]]),'gt_class': np.array([0])}
        roi_record['image_url'] = imgFilename
        roi_record['h'] = height
        roi_record['w'] = width
 
        for trans in self.transform:
            trans.apply(roi_record)
        img_shape = [roi_record['h'], roi_record['w']]
        shorts, longs = min(img_shape), max(img_shape)
        scale = min(self.resizeParam[0] / shorts, self.resizeParam[1] / longs)

        return roi_record, scale

    def __saveSymbol(self, sym, outFolder, fnPrefix):
        if not os.path.exists(outFolder): os.makedirs(outFolder)
        resFilename = os.path.join(outFolder, fnPrefix + "_symbol_test.json")
        sym.save(resFilename)

use it like follows:

import mxnet as mx
import argparse
from infer import TDNDetector


def parse_args():
    parser = argparse.ArgumentParser(description='Test Detection')
    parser.add_argument('--config', type=str, default='config/faster_r101v2c4_c5_256roi_1x.py', help='config file path')
    parser.add_argument('--ctx',    type=int, default=0,     help='GPU index. Set negative value to use CPU')
    #parser.add_argument('--inputs', type=str, nargs='+', required=True, default='', help='File(-s) to test')
    parser.add_argument('--output', type=str, default='results', help='Where to store results')
    parser.add_argument('--threshold', type=float, default=0.5,  help='Detector threshold')
    return parser.parse_args()

if __name__ == "__main__":
    args = parse_args()   
    ctx = mx.gpu(args.ctx) if args.ctx>=0 else args.cpu()
    #imgFilenames = args.inputs
    imgFilenames = ['car.jpg', 'COCO_val2014_000000581929.jpg']
    detector = TDNDetector(args.config, ctx, args.output, args.threshold)
    for i, imgFilename in enumerate(imgFilenames):
            print(imgFilename)
            dets,_= detector(imgFilename)
            print(dets)

Aug 30 '19 12:08 Tveek

@Tveek Hi, thanks for your code! But,how to do multi-scale test on one image? I want to ues tridentnet_r101v2c4_c5_multiscale_addminival_3x_fp16.py for training and testing.

Sep 18 '19 01:09 louielu1027

@xujingtju @vedrusss Hi, how to do multi-scale test on images ? Thank you very much!

Sep 18 '19 02:09 louielu1027

How can you load a model exactly from Model Zoo? Or any other pretrained?

Oct 29 '19 20:10 tiberium24

@xujingtju , thanks very much for your code, since other codes posted in this issue all return erros in my environment. based on that, i write a batch test demo, many thanks.

Jan 21 '20 08:01 mad-fogs

simpledet simpledet copied to clipboard

Single image detection

simpledet
simpledet copied to clipboard