Pix2Text
Pix2Text copied to clipboard
Is there a simple and unified way to load all the required models from a custom location?
I hope to configure something similar to the following, enabling the loading of all models from the D drive. This way, I can centrally manage various large model files:
analyzer_config = {
"model_name": "mfd",
"model_type": "yolov7_tiny",
"root": AIGC_ENTRIES_BASE_DIR / "huggingface.co" / "breezedeus" / "cnstd-cnocr-models" / "models" / "cnstd",
"model_backend": "pytorch",
}
clf_config = {
"base_model_name": "mobilenet_v2",
"categories": IMAGE_TYPES,
"transform_configs": {
"crop_size": [150, 450],
"resize_size": 160,
"resize_max_size": 1000,
},
"model_dir": AIGC_ENTRIES_BASE_DIR / "huggingface.co" / "breezedeus" / "cnstd-cnocr-models" / "models" / "pix2text" / "0.2" / "clf",
"model_fp": None
}
general_config = {
"rec_model_name": "ch_PP-OCRv3",
"det_model_name": "ch_PP-OCRv3_det",
"rec_root": AIGC_ENTRIES_BASE_DIR / "huggingface.co" / "breezedeus" / "cnstd-cnocr-models" / "models" / "cnocr",
"det_root": AIGC_ENTRIES_BASE_DIR / "huggingface.co" / "breezedeus" / "cnstd-cnocr-models" / "models" / "cnstd",
"rec_model_fp": None,
"det_model_fp": None,
"rec_model_backend": "onnx",
"det_model_backend": "onnx",
}
english_config = {
"rec_model_name": "en_PP-OCRv3",
"det_model_name": "en_PP-OCRv3_det",
"rec_root": AIGC_ENTRIES_BASE_DIR / "huggingface.co" / "breezedeus" / "cnstd-cnocr-models" / "models" / "cnocr",
"det_root": AIGC_ENTRIES_BASE_DIR / "huggingface.co" / "breezedeus" / "cnstd-cnocr-models" / "models" / "cnstd",
"rec_model_fp": None,
"det_model_fp": None,
"rec_model_backend": "onnx",
"det_model_backend": "onnx",
}
formula_config = {
"config": LATEX_CONFIG_FP,
"mfr_checkpoint": AIGC_ENTRIES_BASE_DIR / "github.com" / "lukas-blecher" / "LaTeX-OCR" / "weights.pth",
"resizer_checkpoint": AIGC_ENTRIES_BASE_DIR / "github.com" / "lukas-blecher" / "LaTeX-OCR" / "image_resizer.pth",
"no_resize": False,
}
Now, after executing git clone https://huggingface.co/breezedeus/cnstd-cnocr-models
,
it is necessary to unzip the contents of models\cnocr\2.2
, models\cnstd\1.2
, and models\pix2text\0.2
into the current folder.
import os
from pix2text import Pix2Text
text_formula_config = dict(
languages=('en', 'ch_sim'), # 设置识别的语言
mfd=dict( # 声明 LayoutAnalyzer 的初始化参数
model_type='yolov7', # 表示使用的是 YoloV7 模型,而不是 YoloV7_Tiny 模型
model_fp=os.path.expanduser(
'~/.cnstd/1.2/analysis/mfd-yolov7-epoch224-20230613.pt'
), # 注:修改成你的模型文件所存储的路径
),
formula=dict(
model_name='mfr-pro',
model_backend='onnx',
model_dir=os.path.expanduser(
'~/.pix2text/1.0/mfr-pro-onnx'
), # 注:修改成你的模型文件所存储的路径
),
text=dict(
rec_model_name='doc-densenet_lite_666-gru_large',
rec_model_backend='onnx',
rec_model_fp=os.path.expanduser(
'~/.cnocr/2.3/doc-densenet_lite_666-gru_large/cnocr-v2.3-doc-densenet_lite_666-gru_large-epoch=005-ft-model.onnx'
# noqa
), # 注:修改成你的模型文件所存储的路径
),
)
total_config = {
'layout': {'scores_thresh': 0.45},
'text_formula': text_formula_config,
}
p2t = Pix2Text.from_config(total_configs=total_config)
This may be what you're looking for. You can change the value of model_fp
or model_dir
to custom your model location.
See more @ Usage