PaddleOCR 在ppstructure的版面分析和表格识别中缺乏onnx模型的调用方式

python ppstructure/predict_system.py --use_gpu=False --use_onnx=True --det_model_dir=./inference/det_onnx/det.onnx --rec_model_dir=./inference/rec_onnx/rec.onnx --table_model_dir=./inference/sla_onnx/sla.onnx --layout_model_dir=./inference/pico_onnx/pico.onnx --image_dir=./docs/table/1.png --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt --output=../output --vis_font_path=../doc/fonts/simfang.ttf会报错，经查是ppstructure/layout/predict_layout.py中缺少onnx模型的调用，临时改成了 class LayoutPredictor(object): def init(self, args): pre_process_list = [{ 'Resize': { 'size': [800, 608] } }, { 'NormalizeImage': { 'std': [0.229, 0.224, 0.225], 'mean': [0.485, 0.456, 0.406], 'scale': '1./255.', 'order': 'hwc' } }, { 'ToCHWImage': None }, { 'KeepKeys': { 'keep_keys': ['image'] } }] postprocess_params = { 'name': 'PicoDetPostProcess', "layout_dict_path": args.layout_dict_path, "score_threshold": args.layout_score_threshold, "nms_threshold": args.layout_nms_threshold, }

    self.preprocess_op = create_operators(pre_process_list)
    self.postprocess_op = build_post_process(postprocess_params)
    self.predictor, self.input_tensor, self.output_tensors, self.config = \
        utility.create_predictor(args, 'layout', logger)

def __call__(self, img):
    ori_im = img.copy()
    data = {'image': img}
    data = transform(data, self.preprocess_op)
    img = data[0]

    if img is None:
        return None, 0

    img = np.expand_dims(img, axis=0)
    img = img.copy()

    preds, elapse = 0, 1
    starttime = time.time()
    # print(self.input_tensor)
    # self.input_tensor.copy_from_cpu(img)
    # self.predictor.run()

    input_dict = {}
    input_dict[self.input_tensor.name] = img
    outputs = self.predictor.run(self.output_tensors,
                                 input_dict)
    preds = outputs

    np_score_list, np_boxes_list = [], []
    num_outs = int(len(preds) / 2)
    for out_idx in range(num_outs):
        np_score_list.append(preds[out_idx])
        np_boxes_list.append(preds[out_idx + num_outs])
    preds = dict(boxes=np_score_list, boxes_num=np_boxes_list)

    # np_score_list, np_boxes_list = [], []
    # output_names = self.predictor.get_output_names()
    # print('need', output_names)
    # num_outs = int(len(output_names) / 2)
    # for out_idx in range(num_outs):
    #     np_score_list.append(
    #         self.predictor.get_output_handle(output_names[out_idx])
    #         .copy_to_cpu())
    #     np_boxes_list.append(
    #         self.predictor.get_output_handle(output_names[
    #             out_idx + num_outs]).copy_to_cpu())
    # preds = dict(boxes=np_score_list, boxes_num=np_boxes_list)
    # print('need preds', preds)
    post_preds = self.postprocess_op(ori_im, img, preds)
    elapse = time.time() - starttime
    return post_preds, elapse

不过这只是我们自己临时用一下，希望官方可以修复这个Bug

Oct 21 '22 07:10 goalya

还有两个地方有问题，一个是tools/infer/predict_system.py def sorted_boxes(dt_boxes): """ Sort text boxes in order from top to bottom, left to right args: dt_boxes(array):detected text boxes with shape [4, 2] return: sorted boxes(array) with shape [4, 2] """ num_boxes = dt_boxes.shape[0] # print(dt_boxes) # print(dt_boxes.shape) if abs(num_boxes - 2) < 1e-4: sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) else: sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) # print('use', sorted_boxes) _boxes = list(sorted_boxes)

for i in range(num_boxes - 1):
    for j in range(i, 0, -1):
        if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and \
                (_boxes[j + 1][0][0] < _boxes[j][0][0]):
            tmp = _boxes[j]
            _boxes[j] = _boxes[j + 1]
            _boxes[j + 1] = tmp
        else:
            break
return _boxes

还有就是ppstructure/table/predict_structure.py class TableStructurer(object): def init(self, args): pre_process_list = build_pre_process_list(args) if args.table_algorithm not in ['TableMaster']: postprocess_params = { 'name': 'TableLabelDecode', "character_dict_path": args.table_char_dict_path, 'merge_no_span_structure': args.merge_no_span_structure } else: postprocess_params = { 'name': 'TableMasterLabelDecode', "character_dict_path": args.table_char_dict_path, 'box_shape': 'pad', 'merge_no_span_structure': args.merge_no_span_structure }

    self.preprocess_op = create_operators(pre_process_list)
    self.postprocess_op = build_post_process(postprocess_params)
    self.predictor, self.input_tensor, self.output_tensors, self.config = \
        utility.create_predictor(args, 'table', logger)

def __call__(self, img):
    starttime = time.time()
    ori_im = img.copy()
    data = {'image': img}
    data = transform(data, self.preprocess_op)
    img = data[0]
    if img is None:
        return None, 0
    img = np.expand_dims(img, axis=0)
    img = img.copy()

    input_dict = {}
    input_dict[self.input_tensor.name] = img
    outputs = self.predictor.run(self.output_tensors,
                                 input_dict)

    # self.input_tensor.copy_from_cpu(img)
    # self.predictor.run()
    # outputs = []
    # for output_tensor in self.output_tensors:
    #     output = output_tensor.copy_to_cpu()
    #     outputs.append(output)

    preds = {}
    preds['structure_probs'] = outputs[1]
    preds['loc_preds'] = outputs[0]

    shape_list = np.expand_dims(data[-1], axis=0)
    post_result = self.postprocess_op(preds, [shape_list])

    structure_str_list = post_result['structure_batch_list'][0]
    bbox_list = post_result['bbox_batch_list'][0]
    structure_str_list = structure_str_list[0]
    structure_str_list = [
        '<html>', '<body>', '<table>'
    ] + structure_str_list + ['</table>', '</body>', '</html>']
    elapse = time.time() - starttime
    return (structure_str_list, bbox_list), elapse

Oct 21 '22 10:10 goalya

你好，目前版面分析还没有适配onnx

Oct 21 '22 12:10 littletomatodonkey

PaddleOCR PaddleOCR copied to clipboard

在ppstructure的版面分析和表格识别中缺乏onnx模型的调用方式

PaddleOCR
PaddleOCR copied to clipboard