models face detection耗时太多

face detection耗时太多

Open burness opened this issue 5 years ago • 5 comments

face detection里面的模块，inference的时候耗时太多如下图，单位为秒，有能提升的建议吗？

Feb 26 '19 07:02 burness

@burness 了解下，您是如何预测的？使用的什么CPU还是GPU？使用的是如下代码吗？

https://github.com/PaddlePaddle/models/blob/82fa5276d552026ddb636fb5b3ea17ec6ef0070f/fluid/PaddleCV/face_detection/widerface_eval.py#L39-L55

这里提供的脚本face_detection/widerface_eval.py采用的是multi-scale方式infer的，实际使用可以不用multi-scale，比如只输入原图即可。

如果不是上面脚本也麻烦说下是如何预测？PyramidBox这个模型backbone是VGG，加上后面FPN,CMP等模块确实比较耗时的。

Feb 26 '19 11:02 qingqing01

是在这个脚本上面改的，我在p40上做inference的，代码如下：

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import time
import numpy as np
import argparse
import functools
from PIL import Image

import paddle.fluid as fluid
import reader
from pyramidbox import PyramidBox
from visualize import draw_bboxes
from utility import add_arguments, print_arguments
import time
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
import cv2


# yapf: disable
add_arg('use_gpu',         bool,  True,                              "Whether use GPU or not.")
add_arg('use_pyramidbox',  bool,  True,                              "Whether use PyramidBox model.")
add_arg('data_dir',        str,   'data/WIDER_val/images/',          "The validation dataset path.")
add_arg('model_dir',       str,   '',                                "The model path.")
add_arg('pred_dir',        str,   'pred',                            "The path to save the evaluation results.")
add_arg('file_list',       str,   'data/wider_face_split/wider_face_val_bbx_gt.txt', "The validation dataset path.")
add_arg('infer',           bool,  False,                             "Whether do infer or eval.")
add_arg('confs_threshold', float, 0.15,                              "Confidence threshold to draw bbox.")
add_arg('is_video',        bool,  False,                             "Whether is for video infer")
add_arg('video_path',      str,   '',                                 "face detection video")
add_arg('result_dir',      str,    '',                               "detected face save dir")
# yapf: enable


def infer(args, config):
    model_dir = args.model_dir
    pred_dir = args.pred_dir
    print("enter infer")
    if not os.path.exists(model_dir):
        raise ValueError("The model path [%s] does not exist." % (model_dir))

    if args.infer:
        if args.is_video:
            video_path = args.video_path
            cap = cv2.VideoCapture(video_path)
            frame_num = 0
            ret, frame = cap.read()
            all_time_start = time.time()
        
            while True:
                ret, frame = cap.read()
                print("processing {0}th frame".format(frame_num))
                each_process_start = time.time()
                frame_num += 1
                cv2_im = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
                image = Image.fromarray(cv2_im)
                if image.mode == 'L':
                    image = img.convert('RGB')
                shrink, max_shrink = get_shrink(image.size[1], image.size[0])

                det0 = detect_face(image, shrink)
                det1 = flip_test(image, shrink)
                [det2, det3] = multi_scale_test(image, max_shrink)
                det4 = multi_scale_test_pyramid(image, max_shrink)
                det = np.row_stack((det0, det1, det2, det3, det4))
                dets = bbox_vote(det)

                keep_index = np.where(dets[:, 4] >= args.confs_threshold)[0]
                dets = dets[keep_index, :]
                print("dets: {0}".format(dets))
                for i in range(len(dets)):
                    crop_img = image.crop(dets[i, 0:4])
                    crop_img = crop_img.resize((64, 64),Image.ANTIALIAS)
                    crop_img.save(os.path.join("./{0}/".format(args.result_dir), str(i)+"_"+str(frame_num)+".jpg"))
                    print("save to {0} successfully".format(os.path.join("./{0}/".format(args.result_dir), str(i)+"_"+str(frame_num)+".jpg")))
                print("process {0}th img, take time: {1}".format(frame_num, time.time() - each_process_start))
            print("all time detection faces takes {0} seconds".format(time.time() - all_time_start))


        # if not args.is_multi_images:
        #     image_path = args.image_path
        #     image = Image.open(image_path)
        #     if image.mode == 'L':
        #         image = img.convert('RGB')
        #     shrink, max_shrink = get_shrink(image.size[1], image.size[0])

        #     det0 = detect_face(image, shrink)
        #     det1 = flip_test(image, shrink)
        #     [det2, det3] = multi_scale_test(image, max_shrink)
        #     det4 = multi_scale_test_pyramid(image, max_shrink)
        #     det = np.row_stack((det0, det1, det2, det3, det4))
        #     dets = bbox_vote(det)

        #     keep_index = np.where(dets[:, 4] >= args.confs_threshold)[0]
        #     dets = dets[keep_index, :]
        #     # clip the face part
        #     draw_bboxes(image_path, dets[:, 0:4])
        # else:
        #     image_list = [i for i in os.listdir(args.image_dir) if i.endswith('.jpg') ]
        #     if not os.path.exists(os.path.join(args.image_dir+"/crop/")):
        #         os.mkdir(os.path.join(args.image_dir+"/crop/"))
        #     all_time_start = time.time()
        #     for index, img in enumerate(image_list):
        #         each_process_start = time.time()
        #         print("processing {0} img ".format(index))
        #         img_path = os.path.join(args.image_dir, img)
        #         image = Image.open(img_path)
        #         if image.mode == 'L':
        #             image = img.convert('RGB')
        #         shrink, max_shrink = get_shrink(image.size[1], image.size[0])

        #         det0 = detect_face(image, shrink)
        #         det1 = flip_test(image, shrink)
        #         [det2, det3] = multi_scale_test(image, max_shrink)
        #         det4 = multi_scale_test_pyramid(image, max_shrink)
        #         det = np.row_stack((det0, det1, det2, det3, det4))
        #         dets = bbox_vote(det)

        #         keep_index = np.where(dets[:, 4] >= args.confs_threshold)[0]
        #         dets = dets[keep_index, :]
        #         print("dets: {0}".format(dets))
        #         for i in range(len(dets)):
        #             crop_img = image.crop(dets[i, 0:4])
        #             crop_img = crop_img.resize((64, 64),Image.ANTIALIAS)
        #             crop_img.save(os.path.join(args.image_dir+"/crop/", img))
        #             print("save to {0} successfully".format(os.path.join(args.image_dir+"/crop/", str(i)+"_"+img)))
        #         print("process {0}th img, take time: {1}".format(index, time.time() - each_process_start))
        #     print("all time detection faces takes {0} seconds".format(time.time() - all_time_start))

    else:
        test_reader = reader.test(config, args.file_list)
        for image, image_path in test_reader():
            shrink, max_shrink = get_shrink(image.size[1], image.size[0])

            det0 = detect_face(image, shrink)
            det1 = flip_test(image, shrink)
            [det2, det3] = multi_scale_test(image, max_shrink)
            det4 = multi_scale_test_pyramid(image, max_shrink)
            det = np.row_stack((det0, det1, det2, det3, det4))
            dets = bbox_vote(det)

            save_widerface_bboxes(image_path, dets, pred_dir)

        print("Finish evaluation.")


def save_widerface_bboxes(image_path, bboxes_scores, output_dir):
    """
    Save predicted results, including bbox and score into text file.
    Args:
        image_path (string): file name.
        bboxes_scores (np.array|list): the predicted bboxed and scores, layout
            is (xmin, ymin, xmax, ymax, score)
        output_dir (string): output directory.
    """
    image_name = image_path.split('/')[-1]
    image_class = image_path.split('/')[-2]

    odir = os.path.join(output_dir, image_class)
    if not os.path.exists(odir):
        os.makedirs(odir)

    ofname = os.path.join(odir, '%s.txt' % (image_name[:-4]))
    f = open(ofname, 'w')
    f.write('{:s}\n'.format(image_class + '/' + image_name))
    f.write('{:d}\n'.format(bboxes_scores.shape[0]))
    for box_score in bboxes_scores:
        xmin, ymin, xmax, ymax, score = box_score
        f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(xmin, ymin, (
            xmax - xmin + 1), (ymax - ymin + 1), score))
    f.close()
    print("The predicted result is saved as {}".format(ofname))


def detect_face(image, shrink):
    image_shape = [3, image.size[1], image.size[0]]
    if shrink != 1:
        h, w = int(image_shape[1] * shrink), int(image_shape[2] * shrink)
        image = image.resize((w, h), Image.ANTIALIAS)
        image_shape = [3, h, w]

    img = np.array(image)
    img = reader.to_chw_bgr(img)
    mean = [104., 117., 123.]
    scale = 0.007843
    img = img.astype('float32')
    img -= np.array(mean)[:, np.newaxis, np.newaxis].astype('float32')
    img = img * scale
    img = [img]
    img = np.array(img)

    detection, = exe.run(infer_program,
                         feed={'image': img},
                         fetch_list=fetches,
                         return_numpy=False)
    detection = np.array(detection)
    # layout: xmin, ymin, xmax. ymax, score
    if np.prod(detection.shape) == 1:
        print("No face detected")
        return np.array([[0, 0, 0, 0, 0]])
    det_conf = detection[:, 1]
    det_xmin = image_shape[2] * detection[:, 2] / shrink
    det_ymin = image_shape[1] * detection[:, 3] / shrink
    det_xmax = image_shape[2] * detection[:, 4] / shrink
    det_ymax = image_shape[1] * detection[:, 5] / shrink

    det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf))
    return det


def bbox_vote(det):
    order = det[:, 4].ravel().argsort()[::-1]
    det = det[order, :]
    if det.shape[0] == 0:
        dets = np.array([[10, 10, 20, 20, 0.002]])
        det = np.empty(shape=[0, 5])
    while det.shape[0] > 0:
        # IOU
        area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
        xx1 = np.maximum(det[0, 0], det[:, 0])
        yy1 = np.maximum(det[0, 1], det[:, 1])
        xx2 = np.minimum(det[0, 2], det[:, 2])
        yy2 = np.minimum(det[0, 3], det[:, 3])
        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        o = inter / (area[0] + area[:] - inter)

        # nms
        merge_index = np.where(o >= 0.3)[0]
        det_accu = det[merge_index, :]
        det = np.delete(det, merge_index, 0)
        if merge_index.shape[0] <= 1:
            if det.shape[0] == 0:
                try:
                    dets = np.row_stack((dets, det_accu))
                except:
                    dets = det_accu
            continue
        det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))
        max_score = np.max(det_accu[:, 4])
        det_accu_sum = np.zeros((1, 5))
        det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4],
                                      axis=0) / np.sum(det_accu[:, -1:])
        det_accu_sum[:, 4] = max_score
        try:
            dets = np.row_stack((dets, det_accu_sum))
        except:
            dets = det_accu_sum
    dets = dets[0:750, :]
    return dets


def flip_test(image, shrink):
    img = image.transpose(Image.FLIP_LEFT_RIGHT)
    det_f = detect_face(img, shrink)
    det_t = np.zeros(det_f.shape)
    # image.size: [width, height]
    det_t[:, 0] = image.size[0] - det_f[:, 2]
    det_t[:, 1] = det_f[:, 1]
    det_t[:, 2] = image.size[0] - det_f[:, 0]
    det_t[:, 3] = det_f[:, 3]
    det_t[:, 4] = det_f[:, 4]
    return det_t


def multi_scale_test(image, max_shrink):
    # Shrink detecting is only used to detect big faces
    st = 0.5 if max_shrink >= 0.75 else 0.5 * max_shrink
    det_s = detect_face(image, st)
    index = np.where(
        np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1)
        > 30)[0]
    det_s = det_s[index, :]
    # Enlarge one times
    bt = min(2, max_shrink) if max_shrink > 1 else (st + max_shrink) / 2
    det_b = detect_face(image, bt)

    # Enlarge small image x times for small faces
    if max_shrink > 2:
        bt *= 2
        while bt < max_shrink:
            det_b = np.row_stack((det_b, detect_face(image, bt)))
            bt *= 2
        det_b = np.row_stack((det_b, detect_face(image, max_shrink)))

    # Enlarged images are only used to detect small faces.
    if bt > 1:
        index = np.where(
            np.minimum(det_b[:, 2] - det_b[:, 0] + 1,
                       det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]
        det_b = det_b[index, :]
    # Shrinked images are only used to detect big faces.
    else:
        index = np.where(
            np.maximum(det_b[:, 2] - det_b[:, 0] + 1,
                       det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]
        det_b = det_b[index, :]
    return det_s, det_b


def multi_scale_test_pyramid(image, max_shrink):
    # Use image pyramids to detect faces
    det_b = detect_face(image, 0.25)
    index = np.where(
        np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1)
        > 30)[0]
    det_b = det_b[index, :]

    st = [0.75, 1.25, 1.5, 1.75]
    for i in range(len(st)):
        if (st[i] <= max_shrink):
            det_temp = detect_face(image, st[i])
            # Enlarged images are only used to detect small faces.
            if st[i] > 1:
                index = np.where(
                    np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1,
                               det_temp[:, 3] - det_temp[:, 1] + 1) < 100)[0]
                det_temp = det_temp[index, :]
            # Shrinked images are only used to detect big faces.
            else:
                index = np.where(
                    np.maximum(det_temp[:, 2] - det_temp[:, 0] + 1,
                               det_temp[:, 3] - det_temp[:, 1] + 1) > 30)[0]
                det_temp = det_temp[index, :]
            det_b = np.row_stack((det_b, det_temp))
    return det_b


def get_shrink(height, width):
    """
    Args:
        height (int): image height.
        width (int): image width.
    """
    # avoid out of memory
    max_shrink_v1 = (0x7fffffff / 577.0 / (height * width))**0.5
    max_shrink_v2 = ((678 * 1024 * 2.0 * 2.0) / (height * width))**0.5

    def get_round(x, loc):
        str_x = str(x)
        if '.' in str_x:
            str_before, str_after = str_x.split('.')
            len_after = len(str_after)
            if len_after >= 3:
                str_final = str_before + '.' + str_after[0:loc]
                return float(str_final)
            else:
                return x

    max_shrink = get_round(min(max_shrink_v1, max_shrink_v2), 2) - 0.3
    if max_shrink >= 1.5 and max_shrink < 2:
        max_shrink = max_shrink - 0.1
    elif max_shrink >= 2 and max_shrink < 3:
        max_shrink = max_shrink - 0.2
    elif max_shrink >= 3 and max_shrink < 4:
        max_shrink = max_shrink - 0.3
    elif max_shrink >= 4 and max_shrink < 5:
        max_shrink = max_shrink - 0.4
    elif max_shrink >= 5:
        max_shrink = max_shrink - 0.5

    shrink = max_shrink if max_shrink < 1 else 1
    return shrink, max_shrink


if __name__ == '__main__':
    args = parser.parse_args()
    print_arguments(args)
    config = reader.Settings(data_dir=args.data_dir)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    main_program = fluid.Program()
    startup_program = fluid.Program()
    image_shape = [3, 1024, 1024]
    with fluid.program_guard(main_program, startup_program):
        network = PyramidBox(
            data_shape=image_shape,
            sub_network=args.use_pyramidbox,
            is_infer=True)
        infer_program, nmsed_out = network.infer(main_program)
        fetches = [nmsed_out]
        fluid.io.load_persistables(
            exe, args.model_dir, main_program=infer_program)
        # save model and program
        #fluid.io.save_inference_model('pyramidbox_model',
        #    ['image'], [nmsed_out], exe, main_program=infer_program,
        #    model_filename='model', params_filename='params')
    infer(args, config)

multi-scale 这个怎么关？只运行其中一个吗？

Feb 27 '19 02:02 burness

下面代码是各种测试，可以只留第一个det0：

                det0 = detect_face(image, shrink)
                det1 = flip_test(image, shrink)
                [det2, det3] = multi_scale_test(image, max_shrink)
                det4 = multi_scale_test_pyramid(image, max_shrink)
                det = np.row_stack((det0, det1, det2, det3, det4))
                dets = bbox_vote(det)

Feb 27 '19 03:02 qingqing01

@qingqing01 多谢

Mar 05 '19 08:03 burness

@burness hello, I'm trying to implement video mode into the code just like yours. Besides modifying widerface_eval.py, which else files and how did you modify? just by running that, I have an error like this:

Traceback (most recent call last): File "widerface_eval_video.py", line 334, in is_infer=True) File "/mnt/net/i2x256-ai01/hotel/sihwang/live/ppmodels/PaddleCV/face_detection/pyramidbox.py", line 103, in init self._input() File "/mnt/net/i2x256-ai01/hotel/sihwang/live/ppmodels/PaddleCV/face_detection/pyramidbox.py", line 114, in _input name='image', shape=self.data_shape, dtype='float32') File "", line 2, in data File "/home/sihwang/.local/lib/python3.6/site-packages/paddle/fluid/wrapped_decorator.py", line 25, in impl return wrapped_func(*args, **kwargs) File "/home/sihwang/.local/lib/python3.6/site-packages/paddle/fluid/framework.py", line 235, in impl ), "In PaddlePaddle 2.x, we turn on dynamic graph mode by default, and '%s()' is only supported in static graph mode. So if you want to use this api, please call 'paddle.enable_static()' before this api to enter static graph mode." % func.name AssertionError: In PaddlePaddle 2.x, we turn on dynamic graph mode by default, and 'data()' is only supported in static graph mode. So if you want to use this api, please call 'paddle.enable_static()' before this api to enter static graph mode.

thank you so much!!!

Aug 06 '21 17:08 neonin04

models models copied to clipboard

face detection耗时太多

models
models copied to clipboard