MinerU icon indicating copy to clipboard operation
MinerU copied to clipboard

T4显卡 zero-size array to reduction operation maximum which has no identity

Open wen0527zhang opened this issue 1 year ago • 2 comments

Description of the bug | 错误描述

0: 1888x1312 299 embeddings, 1 isolated, 462.2ms Speed: 20.8ms preprocess, 462.2ms inference, 34.8ms postprocess per image at shape (1, 3, 1888, 1312) 2024-08-03 10:54:25.915 | ERROR | magic_pdf.cli.magicpdf:parse_doc:338 - zero-size array to reduction operation maximum which has no identity Traceback (most recent call last):

File "/root/anaconda3/envs/MinerU/bin/magic-pdf", line 8, in sys.exit(cli()) │ │ └ <Group cli> │ └ └ <module 'sys' (built-in)> File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/click/core.py", line 1157, in call return self.main(*args, **kwargs) │ │ │ └ {} │ │ └ () │ └ <function BaseCommand.main at 0x7ff697df36d0> └ <Group cli> File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/click/core.py", line 1078, in main rv = self.invoke(ctx) │ │ └ <click.core.Context object at 0x7ff698050c40> │ └ <function MultiCommand.invoke at 0x7ff697e04700> └ <Group cli> File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/click/core.py", line 1688, in invoke return _process_result(sub_ctx.command.invoke(sub_ctx)) │ │ │ │ └ <click.core.Context object at 0x7ff549676320> │ │ │ └ <function Command.invoke at 0x7ff697e041f0> │ │ └ <Command pdf-command> │ └ <click.core.Context object at 0x7ff549676320> └ <function MultiCommand.invoke.._process_result at 0x7ff6982c3d90> File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/click/core.py", line 1434, in invoke return ctx.invoke(self.callback, **ctx.params) │ │ │ │ │ └ {'pdf': 'small_ocr.pdf', 'model': None, 'method': 'auto', 'inside_model': True, 'model_mode': 'full'} │ │ │ │ └ <click.core.Context object at 0x7ff549676320> │ │ │ └ <function pdf_command at 0x7ff54968df30> │ │ └ <Command pdf-command> │ └ <function Context.invoke at 0x7ff697df2ef0> └ <click.core.Context object at 0x7ff549676320> File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/click/core.py", line 783, in invoke return __callback(*args, **kwargs) │ └ {'pdf': 'small_ocr.pdf', 'model': None, 'method': 'auto', 'inside_model': True, 'model_mode': 'full'} └ () File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/magic_pdf/cli/magicpdf.py", line 352, in pdf_command parse_doc(pdf) │ └ 'small_ocr.pdf' └ <function pdf_command..parse_doc at 0x7ff54968dc60>

File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/magic_pdf/cli/magicpdf.py", line 330, in parse_doc do_parse( └ <function do_parse at 0x7ff54968d870> File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/magic_pdf/cli/magicpdf.py", line 111, in do_parse pipe.pipe_analyze() │ └ <function UNIPipe.pipe_analyze at 0x7ff54968c4c0> └ <magic_pdf.pipe.UNIPipe.UNIPipe object at 0x7ff549676560> File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/magic_pdf/pipe/UNIPipe.py", line 31, in pipe_analyze self.model_list = doc_analyze(self.pdf_bytes, ocr=True) │ │ │ │ └ b'%PDF-1.7\r\n%\xa1\xb3\xc5\xd7\r\n1 0 obj\r\n<</Pages 2 0 R /Type/Catalog>>\r\nendobj\r\n2 0 obj\r\n<</Count 8/Kids[ 4 0 R ... │ │ │ └ <magic_pdf.pipe.UNIPipe.UNIPipe object at 0x7ff549676560> │ │ └ <function doc_analyze at 0x7ff696a33400> │ └ [] └ <magic_pdf.pipe.UNIPipe.UNIPipe object at 0x7ff549676560> File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/magic_pdf/model/doc_analyze_by_custom_model.py", line 113, in doc_analyze result = custom_model(img) │ └ array([[[255, 255, 255], │ [255, 255, 255], │ [255, 255, 255], │ ..., │ [255, 255, 255], │ [255... └ <magic_pdf.model.pdf_extract_kit.CustomPEKModel object at 0x7ff5490dd510> File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/magic_pdf/model/pdf_extract_kit.py", line 166, in call for mf_img in dataloader: │ └ <torch.utils.data.dataloader.DataLoader object at 0x7ff3cb64cfd0> └ tensor([[[[-4.5633, -4.5633, -4.5633, ..., -4.5633, -4.5633, -4.5633], [-4.5633, -4.5633, -4.5633, ..., -4.5633, ... File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 631, in next data = self._next_data() │ └ <function _SingleProcessDataLoaderIter._next_data at 0x7ff48f5cd120> └ <torch.utils.data.dataloader._SingleProcessDataLoaderIter object at 0x7ff3cb6ae470> File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 675, in _next_data data = self._dataset_fetcher.fetch(index) # may raise StopIteration │ │ │ └ [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216,... │ │ └ <function _MapDatasetFetcher.fetch at 0x7ff48f746b00> │ └ <torch.utils.data._utils.fetch._MapDatasetFetcher object at 0x7ff3cb6ad060> └ <torch.utils.data.dataloader._SingleProcessDataLoaderIter object at 0x7ff3cb6ae470> File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch data = [self.dataset[idx] for idx in possibly_batched_index] │ │ └ [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216,... │ └ <magic_pdf.model.pdf_extract_kit.MathDataset object at 0x7ff3cb64cd30> └ <torch.utils.data._utils.fetch._MapDatasetFetcher object at 0x7ff3cb6ad060> File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in data = [self.dataset[idx] for idx in possibly_batched_index] │ │ │ └ 195 │ │ └ 195 │ └ <magic_pdf.model.pdf_extract_kit.MathDataset object at 0x7ff3cb64cd30> └ <torch.utils.data._utils.fetch._MapDatasetFetcher object at 0x7ff3cb6ad060> File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/magic_pdf/model/pdf_extract_kit.py", line 73, in getitem image = self.transform(raw_image) │ │ └ <PIL.Image.Image image mode=RGB size=0x1800 at 0x7FF3CB5C8DF0> │ └ Compose( │ <unimernet.processors.formula_processor.FormulaImageEvalProcessor object at 0x7ff440ccff10> │ ) └ <magic_pdf.model.pdf_extract_kit.MathDataset object at 0x7ff3cb64cd30> File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/torchvision/transforms/transforms.py", line 95, in call img = t(img) │ └ <PIL.Image.Image image mode=RGB size=0x1800 at 0x7FF3CB5C8DF0> └ <unimernet.processors.formula_processor.FormulaImageEvalProcessor object at 0x7ff440ccff10> File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/unimernet/processors/formula_processor.py", line 164, in call image = self.prepare_input(item) │ │ └ <PIL.Image.Image image mode=RGB size=0x1800 at 0x7FF3CB5C8DF0> │ └ <function FormulaImageBaseProcessor.prepare_input at 0x7ff49022cca0> └ <unimernet.processors.formula_processor.FormulaImageEvalProcessor object at 0x7ff440ccff10> File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/unimernet/processors/formula_processor.py", line 51, in prepare_input img = self.crop_margin(img.convert("RGB")) │ │ │ └ <function Image.convert at 0x7ff48e3d57e0> │ │ └ <PIL.Image.Image image mode=RGB size=0x1800 at 0x7FF3CB5C8DF0> │ └ <staticmethod(<function FormulaImageBaseProcessor.crop_margin at 0x7ff49022cee0>)> └ <unimernet.processors.formula_processor.FormulaImageEvalProcessor object at 0x7ff440ccff10> File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/unimernet/processors/formula_processor.py", line 29, in crop_margin max_val = data.max() │ └ <method 'max' of 'numpy.ndarray' objects> └ array([], shape=(1800, 0), dtype=uint8) File "/root/anaconda3/envs/MinerU/lib/python3.10/site-packages/numpy/core/_methods.py", line 41, in _amax return umr_maximum(a, axis, None, out, keepdims, initial, where) │ │ │ │ │ │ └ True │ │ │ │ │ └ │ │ │ │ └ False │ │ │ └ None │ │ └ None │ └ array([], shape=(1800, 0), dtype=uint8) └ <built-in method reduce of numpy.ufunc object at 0x7ff693695d40>

ValueError: zero-size array to reduction operation maximum which has no identity

How to reproduce the bug | 如何复现

magic-pdf pdf-command --pdf small_ocr.pdf

Operating system | 操作系统

Linux

Python version | Python 版本

3.10

Software version | 软件版本 (magic-pdf --version)

0.6.x

Device mode | 设备模式

cuda

wen0527zhang avatar Aug 03 '24 02:08 wen0527zhang

用cpu模式有这个问题吗

myhloli avatar Aug 03 '24 03:08 myhloli

不可以

wen0527zhang avatar Aug 03 '24 14:08 wen0527zhang