pdf2docx
pdf2docx copied to clipboard
RuntimeError: could not parse color space (0 0 R)
[INFO] Start to convert xxxxxxxxxxxxxxxxxxxxxx.pdf
[INFO] [1;36m[1/4] Opening document...[0m
[INFO] [1;36m[2/4] Analyzing document...[0m
[WARNING] Ignore image due to inconsistent size of color and mask pixmaps: [18, 17, 1419, 1240, 8, 'ICCBased', '', 'Im18', 'FlateDecode', 0]
[ERROR] Exception on /pdf2word [POST]
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 2525, in wsgi_app
response = self.full_dispatch_request()
File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1822, in full_dispatch_request
rv = self.handle_user_exception(e)
File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1820, in full_dispatch_request
rv = self.dispatch_request()
File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1796, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
File "app.py", line 34, in pdf2word
cv.convert(docx_filename=word_path, start=0, end=None, pages=None)
File "/usr/local/lib/python3.7/site-packages/pdf2docx/converter.py", line 329, in convert
self.parse(start, end, pages, **settings).make_docx(docx_filename, **settings)
File "/usr/local/lib/python3.7/site-packages/pdf2docx/converter.py", line 113, in parse
.parse_document(**kwargs) \
File "/usr/local/lib/python3.7/site-packages/pdf2docx/converter.py", line 153, in parse_document
self._pages.parse(self.fitz_doc, **kwargs)
File "/usr/local/lib/python3.7/site-packages/pdf2docx/page/Pages.py", line 37, in parse
raw_page.restore(**settings)
File "/usr/local/lib/python3.7/site-packages/pdf2docx/common/share.py", line 226, in inner
objects = func(*args, **kwargs)
File "/usr/local/lib/python3.7/site-packages/pdf2docx/page/RawPage.py", line 66, in restore
raw_dict = self.extract_raw_dict(**settings)
File "/usr/local/lib/python3.7/site-packages/pdf2docx/page/RawPageFitz.py", line 33, in extract_raw_dict
image_blocks = self._preprocess_images(**settings)
File "/usr/local/lib/python3.7/site-packages/pdf2docx/page/RawPageFitz.py", line 118, in _preprocess_images
return ImagesExtractor(self.page_engine).extract_images(settings['clip_image_res_ratio'])
File "/usr/local/lib/python3.7/site-packages/pdf2docx/image/ImagesExtractor.py", line 108, in extract_images
rects = self._page.get_image_rects(item)
File "/usr/local/lib/python3.7/site-packages/fitz/utils.py", line 728, in get_image_rects
pix = Pixmap(page.parent, xref) # make pixmap of the image to compute MD5
File "/usr/local/lib/python3.7/site-packages/fitz/fitz.py", line 6823, in __init__
_fitz.Pixmap_swiginit(self, _fitz.new_Pixmap(*args))
RuntimeError: could not parse color space (0 0 R)