PaddleOCR
PaddleOCR copied to clipboard
在ppstructure的版面分析和表格识别中缺乏onnx模型的调用方式
python ppstructure/predict_system.py --use_gpu=False --use_onnx=True --det_model_dir=./inference/det_onnx/det.onnx --rec_model_dir=./inference/rec_onnx/rec.onnx --table_model_dir=./inference/sla_onnx/sla.onnx --layout_model_dir=./inference/pico_onnx/pico.onnx --image_dir=./docs/table/1.png --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt --output=../output --vis_font_path=../doc/fonts/simfang.ttf会报错,经查是ppstructure/layout/predict_layout.py中缺少onnx模型的调用,临时改成了 class LayoutPredictor(object): def init(self, args): pre_process_list = [{ 'Resize': { 'size': [800, 608] } }, { 'NormalizeImage': { 'std': [0.229, 0.224, 0.225], 'mean': [0.485, 0.456, 0.406], 'scale': '1./255.', 'order': 'hwc' } }, { 'ToCHWImage': None }, { 'KeepKeys': { 'keep_keys': ['image'] } }] postprocess_params = { 'name': 'PicoDetPostProcess', "layout_dict_path": args.layout_dict_path, "score_threshold": args.layout_score_threshold, "nms_threshold": args.layout_nms_threshold, }
self.preprocess_op = create_operators(pre_process_list)
self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.output_tensors, self.config = \
utility.create_predictor(args, 'layout', logger)
def __call__(self, img):
ori_im = img.copy()
data = {'image': img}
data = transform(data, self.preprocess_op)
img = data[0]
if img is None:
return None, 0
img = np.expand_dims(img, axis=0)
img = img.copy()
preds, elapse = 0, 1
starttime = time.time()
# print(self.input_tensor)
# self.input_tensor.copy_from_cpu(img)
# self.predictor.run()
input_dict = {}
input_dict[self.input_tensor.name] = img
outputs = self.predictor.run(self.output_tensors,
input_dict)
preds = outputs
np_score_list, np_boxes_list = [], []
num_outs = int(len(preds) / 2)
for out_idx in range(num_outs):
np_score_list.append(preds[out_idx])
np_boxes_list.append(preds[out_idx + num_outs])
preds = dict(boxes=np_score_list, boxes_num=np_boxes_list)
# np_score_list, np_boxes_list = [], []
# output_names = self.predictor.get_output_names()
# print('need', output_names)
# num_outs = int(len(output_names) / 2)
# for out_idx in range(num_outs):
# np_score_list.append(
# self.predictor.get_output_handle(output_names[out_idx])
# .copy_to_cpu())
# np_boxes_list.append(
# self.predictor.get_output_handle(output_names[
# out_idx + num_outs]).copy_to_cpu())
# preds = dict(boxes=np_score_list, boxes_num=np_boxes_list)
# print('need preds', preds)
post_preds = self.postprocess_op(ori_im, img, preds)
elapse = time.time() - starttime
return post_preds, elapse
不过这只是我们自己临时用一下,希望官方可以修复这个Bug
还有两个地方有问题,一个是tools/infer/predict_system.py def sorted_boxes(dt_boxes): """ Sort text boxes in order from top to bottom, left to right args: dt_boxes(array):detected text boxes with shape [4, 2] return: sorted boxes(array) with shape [4, 2] """ num_boxes = dt_boxes.shape[0] # print(dt_boxes) # print(dt_boxes.shape) if abs(num_boxes - 2) < 1e-4: sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) else: sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) # print('use', sorted_boxes) _boxes = list(sorted_boxes)
for i in range(num_boxes - 1):
for j in range(i, 0, -1):
if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and \
(_boxes[j + 1][0][0] < _boxes[j][0][0]):
tmp = _boxes[j]
_boxes[j] = _boxes[j + 1]
_boxes[j + 1] = tmp
else:
break
return _boxes
还有就是ppstructure/table/predict_structure.py class TableStructurer(object): def init(self, args): pre_process_list = build_pre_process_list(args) if args.table_algorithm not in ['TableMaster']: postprocess_params = { 'name': 'TableLabelDecode', "character_dict_path": args.table_char_dict_path, 'merge_no_span_structure': args.merge_no_span_structure } else: postprocess_params = { 'name': 'TableMasterLabelDecode', "character_dict_path": args.table_char_dict_path, 'box_shape': 'pad', 'merge_no_span_structure': args.merge_no_span_structure }
self.preprocess_op = create_operators(pre_process_list)
self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.output_tensors, self.config = \
utility.create_predictor(args, 'table', logger)
def __call__(self, img):
starttime = time.time()
ori_im = img.copy()
data = {'image': img}
data = transform(data, self.preprocess_op)
img = data[0]
if img is None:
return None, 0
img = np.expand_dims(img, axis=0)
img = img.copy()
input_dict = {}
input_dict[self.input_tensor.name] = img
outputs = self.predictor.run(self.output_tensors,
input_dict)
# self.input_tensor.copy_from_cpu(img)
# self.predictor.run()
# outputs = []
# for output_tensor in self.output_tensors:
# output = output_tensor.copy_to_cpu()
# outputs.append(output)
preds = {}
preds['structure_probs'] = outputs[1]
preds['loc_preds'] = outputs[0]
shape_list = np.expand_dims(data[-1], axis=0)
post_result = self.postprocess_op(preds, [shape_list])
structure_str_list = post_result['structure_batch_list'][0]
bbox_list = post_result['bbox_batch_list'][0]
structure_str_list = structure_str_list[0]
structure_str_list = [
'<html>', '<body>', '<table>'
] + structure_str_list + ['</table>', '</body>', '</html>']
elapse = time.time() - starttime
return (structure_str_list, bbox_list), elapse
你好,目前版面分析还没有适配onnx