MegaParse
MegaParse copied to clipboard
README example gives errors on .pdf files
After pip install megaparse
in a clean environment, I've tried the README example on a couple of .pdf files and a .xlsx, .doc file. Although I guess the unstructured_parser
is expected to fail on everything but .pdf files, in all cases I get the following error message:
{
"name": "ImportError",
"message": "libGL.so.1: cannot open shared object file: No such file or directory",
"stack": "---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
Cell In[24], line 1
----> 1 from megaparse import MegaParse
2 from langchain_openai import ChatOpenAI
3 from megaparse.parser.unstructured_parser import UnstructuredParser
File ~/miniconda3/envs/megaflex/lib/python3.10/site-packages/megaparse/__init__.py:1
----> 1 from .Converter import MegaParse
3 __all__ = [\"MegaParse\"]
File ~/miniconda3/envs/megaflex/lib/python3.10/site-packages/megaparse/Converter.py:25
23 from megaparse.markdown_processor import MarkdownProcessor
24 from megaparse.multimodal_convertor.megaparse_vision import MegaParseVision
---> 25 from megaparse.unstructured_convertor import ModelEnum, UnstructuredParser
28 class Converter:
29 def __init__(self) -> None:
File ~/miniconda3/envs/megaflex/lib/python3.10/site-packages/megaparse/unstructured_convertor.py:4
2 import re
3 from langchain_openai import ChatOpenAI
----> 4 from unstructured.partition.pdf import partition_pdf
5 from dotenv import load_dotenv
6 from langchain_core.prompts import ChatPromptTemplate
File ~/miniconda3/envs/megaflex/lib/python3.10/site-packages/unstructured/partition/pdf.py:20
18 from PIL import Image as PILImage
19 from pypdf import PdfReader
---> 20 from unstructured_inference.inference.layout import DocumentLayout
21 from unstructured_inference.inference.layoutelement import LayoutElement
23 from unstructured.chunking import add_chunking_strategy
File ~/miniconda3/envs/megaflex/lib/python3.10/site-packages/unstructured_inference/inference/layout.py:15
10 from PIL import Image, ImageSequence
12 from unstructured_inference.inference.elements import (
13 TextRegion,
14 )
---> 15 from unstructured_inference.inference.layoutelement import LayoutElement, LayoutElements
16 from unstructured_inference.logger import logger
17 from unstructured_inference.models.base import get_model
File ~/miniconda3/envs/megaflex/lib/python3.10/site-packages/unstructured_inference/inference/layoutelement.py:7
4 from typing import Any, Collection, Iterable, List, Optional
6 import numpy as np
----> 7 from layoutparser.elements.layout import TextBlock
8 from pandas import DataFrame
9 from scipy.sparse.csgraph import connected_components
File ~/miniconda3/envs/megaflex/lib/python3.10/site-packages/layoutparser/elements/__init__.py:16
1 # Copyright 2021 The Layout Parser team. All rights reserved.
2 #
3 # Licensed under the Apache License, Version 2.0 (the \"License\");
(...)
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 from .base import BaseCoordElement, BaseLayoutElement
---> 16 from .layout_elements import (
17 Interval,
18 Rectangle,
19 Quadrilateral,
20 TextBlock,
21 ALL_BASECOORD_ELEMENTS,
22 BASECOORD_ELEMENT_NAMEMAP,
23 BASECOORD_ELEMENT_INDEXMAP,
24 )
25 from .layout import Layout
File ~/miniconda3/envs/megaflex/lib/python3.10/site-packages/layoutparser/elements/layout_elements.py:25
23 import pandas as pd
24 from PIL import Image
---> 25 from cv2 import getPerspectiveTransform as _getPerspectiveTransform
26 from cv2 import warpPerspective as _warpPerspective
28 from .base import BaseCoordElement, BaseLayoutElement
ImportError: libGL.so.1: cannot open shared object file: No such file or directory"
}