markitdown
markitdown copied to clipboard
The xls from test/ is failing, no others are
Cannot figure out why this is failing and nothing is, it's driving me crazy. UPDATE: To be fair I only tried the xls from tests, but that should work. Images, PDFs, docx ...all work.
LOGS:
INFO Starting markdown conversion... tools.py:134
INFO Type of input content: <class 'pathlib.PosixPath'> (57) tools.py:142
INFO Using existing file: tmp/job_a8a33fc3-604d-41b6-8914-ac26e0c1fddc/original.xls tools.py:152
INFO Starting md.convert() tools.py:165
INFO Using file extension: .xls tools.py:178
[01/15/25 00:05:35] ERROR Exception in ASGI application httptools_impl.py:414
╭───────────────────────────────────────────────────────────────────── Traceback (most recent call last) ──────────────────────────────────────────────────────────────────────╮
│ /Users/me/labs/project/.venv/lib/python3.11/site-packages/anyio/streams/memory.py:111 in receive │
│ │
│ 108 │ async def receive(self) -> T_co: │
│ 109 │ │ await checkpoint() │
│ 110 │ │ try: │
│ ❱ 111 │ │ │ return self.receive_nowait() │
│ 112 │ │ except WouldBlock: │
│ 113 │ │ │ # Add ourselves in the queue │
│ 114 │ │ │ receive_event = Event() │
│ │
│ /Users/me/labs/project/.venv/lib/python3.11/site-packages/anyio/streams/memory.py:106 in receive_nowait │
│ │
│ 103 │ │ elif not self._state.open_send_channels: │
│ 104 │ │ │ raise EndOfStream │
│ 105 │ │ │
│ ❱ 106 │ │ raise WouldBlock │
│ 107 │ │
│ 108 │ async def receive(self) -> T_co: │
│ 109 │ │ await checkpoint() │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
WouldBlock
During handling of the above exception, another exception occurred:
╭───────────────────────────────────────────────────────────────────── Traceback (most recent call last) ──────────────────────────────────────────────────────────────────────╮
│ /Users/me/labs/project/.venv/lib/python3.11/site-packages/anyio/streams/memory.py:124 in receive │
│ │
│ 121 │ │ │ │ self._state.waiting_receivers.pop(receive_event, None) │
│ 122 │ │ │ │
│ 123 │ │ │ try: │
│ ❱ 124 │ │ │ │ return receiver.item │
│ 125 │ │ │ except AttributeError: │
│ 126 │ │ │ │ raise EndOfStream │
│ 127 │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
AttributeError: 'MemoryObjectItemReceiver' object has no attribute 'item'
During handling of the above exception, another exception occurred:
╭───────────────────────────────────────────────────────────────────── Traceback (most recent call last) ──────────────────────────────────────────────────────────────────────╮
│ /Users/me/labs/project/.venv/lib/python3.11/site-packages/starlette/middleware/base.py:157 in call_next │
│ │
│ 154 │ │ │ task_group.start_soon(coro) │
│ 155 │ │ │ │
│ 156 │ │ │ try: │
│ ❱ 157 │ │ │ │ message = await recv_stream.receive() │
│ 158 │ │ │ │ info = message.get("info", None) │
│ 159 │ │ │ │ if message["type"] == "http.response.debug" and info is not None: │
│ 160 │ │ │ │ │ message = await recv_stream.receive() │
│ │
│ /Users/me/labs/project/.venv/lib/python3.11/site-packages/anyio/streams/memory.py:126 in receive │
│ │
│ 123 │ │ │ try: │
│ 124 │ │ │ │ return receiver.item │
│ 125 │ │ │ except AttributeError: │
│ ❱ 126 │ │ │ │ raise EndOfStream │
│ 127 │ │
│ 128 │ def clone(self) -> MemoryObjectReceiveStream[T_co]: │
│ 129 │ │ """ │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
EndOfStream
During handling of the above exception, another exception occurred:
╭───────────────────────────────────────────────────────────────────── Traceback (most recent call last) ──────────────────────────────────────────────────────────────────────╮
│ /Users/me/labs/project/.venv/lib/python3.11/site-packages/starlette/middleware/base.py:187 in __call__ │
│ │
│ 184 │ │ │
│ 185 │ │ with collapse_excgroups(): │
│ 186 │ │ │ async with anyio.create_task_group() as task_group: │
│ ❱ 187 │ │ │ │ response = await self.dispatch_func(request, call_next) │
│ 188 │ │ │ │ await response(scope, wrapped_receive, send) │
│ 189 │ │ │ │ response_sent.set() │
│ 190 │
│ │
│ /Users/me/labs/project/.venv/lib/python3.11/site-packages/slowapi/middleware.py:136 in dispatch │
│ │
│ 133 │ │ if error_response is not None: │
│ 134 │ │ │ return error_response │
│ 135 │ │ │
│ ❱ 136 │ │ response = await call_next(request) │
│ 137 │ │ if should_inject_headers: │
│ 138 │ │ │ response = limiter._inject_headers(response, request.state.view_rate_limit) │
│ 139 │ │ return response │
│ │
│ /Users/me/labs/project/.venv/lib/python3.11/site-packages/starlette/middleware/base.py:164 in call_next │
│ │
│ 161 │ │ │ except anyio.EndOfStream: │
│ 162 │ │ │ │ if app_exc is not None: │
│ 163 │ │ │ │ │ raise app_exc │
│ ❱ 164 │ │ │ │ raise RuntimeError("No response returned.") │
│ 165 │ │ │ │
│ 166 │ │ │ assert message["type"] == "http.response.start" │
│ 167 │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
RuntimeError: No response returned.
During handling of the above exception, another exception occurred:
╭───────────────────────────────────────────────────────────────────── Traceback (most recent call last) ──────────────────────────────────────────────────────────────────────╮
│ /Users/me/labs/project/.venv/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py:409 in │
│ run_asgi │
│ │
│ 406 │ # ASGI exception wrapper │
│ 407 │ async def run_asgi(self, app: ASGI3Application) -> None: │
│ 408 │ │ try: │
│ ❱ 409 │ │ │ result = await app( # type: ignore[func-returns-value] │
│ 410 │ │ │ │ self.scope, self.receive, self.send │
│ 411 │ │ │ ) │
│ 412 │ │ except BaseException as exc: │
│ │
│ /Users/me/labs/project/.venv/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py:60 in __call__ │
│ │
│ 57 │ │ │ │ │ port = 0 │
│ 58 │ │ │ │ │ scope["client"] = (host, port) │
│ 59 │ │ │
│ ❱ 60 │ │ return await self.app(scope, receive, send) │
│ 61 │
│ 62 │
│ 63 def _parse_raw_hosts(value: str) -> list[str]: │
│ │
│ /Users/me/labs/project/.venv/lib/python3.11/site-packages/fastapi/applications.py:1054 in __call__ │
│ │
│ 1051 │ async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: │
│ 1052 │ │ if self.root_path: │
│ 1053 │ │ │ scope["root_path"] = self.root_path │
│ ❱ 1054 │ │ await super().__call__(scope, receive, send) │
│ 1055 │ │
│ 1056 │ def add_api_route( │
│ 1057 │ │ self, │
│ │
│ /Users/me/labs/project/.venv/lib/python3.11/site-packages/starlette/applications.py:113 in __call__ │
│ │
│ 110 │ │ scope["app"] = self │
│ 111 │ │ if self.middleware_stack is None: │
│ 112 │ │ │ self.middleware_stack = self.build_middleware_stack() │
│ ❱ 113 │ │ await self.middleware_stack(scope, receive, send) │
│ 114 │ │
│ 115 │ def on_event(self, event_type: str) -> typing.Callable: # type: ignore[type-arg] │
│ 116 │ │ return self.router.on_event(event_type) # pragma: no cover │
│ │
│ /Users/me/labs/project/.venv/lib/python3.11/site-packages/starlette/middleware/errors.py:165 in __call__ │
│ │
│ 162 │ │ │ await send(message) │
│ 163 │ │ │
│ 164 │ │ try: │
│ ❱ 165 │ │ │ await self.app(scope, receive, _send) │
│ 166 │ │ except Exception as exc: │
│ 167 │ │ │ request = Request(scope) │
│ 168 │ │ │ if self.debug: │
│ │
│ /Users/me/labs/project/.venv/lib/python3.11/site-packages/starlette/middleware/cors.py:93 in __call__ │
│ │
│ 90 │ │ │ await response(scope, receive, send) │
│ 91 │ │ │ return │
│ 92 │ │ │
│ ❱ 93 │ │ await self.simple_response(scope, receive, send, request_headers=headers) │
│ 94 │ │
│ 95 │ def is_allowed_origin(self, origin: str) -> bool: │
│ 96 │ │ if self.allow_all_origins: │
│ │
│ /Users/me/labs/project/.venv/lib/python3.11/site-packages/starlette/middleware/cors.py:144 in simple_response │
│ │
│ 141 │ │
│ 142 │ async def simple_response(self, scope: Scope, receive: Receive, send: Send, │
│ request_headers: Headers) -> None: │
│ 143 │ │ send = functools.partial(self.send, send=send, request_headers=request_headers) │
│ ❱ 144 │ │ await self.app(scope, receive, send) │
│ 145 │ │
│ 146 │ async def send(self, message: Message, send: Send, request_headers: Headers) -> │
│ None: │
│ 147 │ │ if message["type"] != "http.response.start": │
│ │
│ /Users/me/labs/project/.venv/lib/python3.11/site-packages/starlette/middleware/base.py:186 in __call__ │
│ │
│ 183 │ │ │ return response │
│ 184 │ │ │
│ 185 │ │ with collapse_excgroups(): │
│ ❱ 186 │ │ │ async with anyio.create_task_group() as task_group: │
│ 187 │ │ │ │ response = await self.dispatch_func(request, call_next) │
│ 188 │ │ │ │ await response(scope, wrapped_receive, send) │
│ 189 │ │ │ │ response_sent.set() │
│ │
│ /Users/me/labs/project/.venv/lib/python3.11/site-packages/anyio/_backends/_asyncio.py:815 in __aexit__ │
│ │
│ 812 │ │ │ │ │
│ 813 │ │ │ │ self._active = False │
│ 814 │ │ │ │ if self._exceptions: │
│ ❱ 815 │ │ │ │ │ raise BaseExceptionGroup( │
│ 816 │ │ │ │ │ │ "unhandled errors in a TaskGroup", self._exceptions │
│ 817 │ │ │ │ │ ) │
│ 818 │ │ │ │ elif exc_val: │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
BaseExceptionGroup: unhandled errors in a TaskGroup (2 sub-exceptions)
(*this is wrapped in FastAPI)