onyx
onyx copied to clipboard
Error while background indexing
Using web connector and Indexing Attempts failed
Traceback (most recent call last):
File "/app/danswer/background/indexing/run_indexing.py", line 212, in _run_indexing
credential_id=db_credential.id,
^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/orm/attributes.py", line 563, in __get__
return self.impl.get(state, dict_) # type: ignore[no-any-return]
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/orm/attributes.py", line 1084, in get
value = self._fire_loader_callables(state, key, passive)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/orm/attributes.py", line 1114, in _fire_loader_callables
return state._load_expired(state, passive)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/orm/state.py", line 798, in _load_expired
self.manager.expired_attribute_loader(self, toload, passive)
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/orm/loading.py", line 1626, in load_scalar_attributes
result = load_on_ident(
^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/orm/loading.py", line 482, in load_on_ident
return load_on_pk_identity(
^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/orm/loading.py", line 679, in load_on_pk_identity
return result.one()
^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 1825, in one
return self._only_one_row(
^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 760, in _only_one_row
row: Optional[_InterimRowType[Any]] = onerow(hard_close=True)
^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 1688, in _fetchone_impl
return self._real_result._fetchone_impl(hard_close=hard_close)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 2280, in _fetchone_impl
row = next(self.iterator, _NO_ROW)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/orm/loading.py", line 191, in chunks
fetch = cursor._raw_all_rows()
^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 549, in _raw_all_rows
return [make_row(row) for row in rows]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 549, in <listcomp>
return [make_row(row) for row in rows]
^^^^^^^^^^^^^
File "lib/sqlalchemy/cyextension/resultproxy.pyx", line 16, in sqlalchemy.cyextension.resultproxy.BaseRow.__init__
File "lib/sqlalchemy/cyextension/resultproxy.pyx", line 73, in sqlalchemy.cyextension.resultproxy._apply_processors
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/sql/type_api.py", line 2160, in process
return fixed_process_value(value, dialect)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/fastapi_users_db_sqlalchemy/generics.py", line 46, in process_result_value
value = uuid.UUID(value)
^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/uuid.py", line 175, in __init__
hex = hex.replace('urn:', '').replace('uuid:', '')
^^^^^^^^^^^
AttributeError: 'int' object has no attribute 'replace'
Another failed task:
Traceback (most recent call last):
File "/app/danswer/background/indexing/run_indexing.py", line 191, in _run_indexing
db_session.refresh(db_connector)
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/orm/session.py", line 3012, in refresh
loading.load_on_ident(
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/orm/loading.py", line 482, in load_on_ident
return load_on_pk_identity(
^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/orm/loading.py", line 679, in load_on_pk_identity
return result.one()
^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 1825, in one
return self._only_one_row(
^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 760, in _only_one_row
row: Optional[_InterimRowType[Any]] = onerow(hard_close=True)
^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 1688, in _fetchone_impl
return self._real_result._fetchone_impl(hard_close=hard_close)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 2280, in _fetchone_impl
row = next(self.iterator, _NO_ROW)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/orm/loading.py", line 191, in chunks
fetch = cursor._raw_all_rows()
^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 546, in _raw_all_rows
make_row = self._row_getter
^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/util/langhelpers.py", line 1257, in __get__
obj.__dict__[self.__name__] = result = self.fget(obj)
^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 478, in _row_getter
key_to_index = metadata._key_to_index
^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/cursor.py", line 1363, in _key_to_index
self._we_dont_return_rows()
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/cursor.py", line 1343, in _we_dont_return_rows
raise exc.ResourceClosedError(
sqlalchemy.exc.ResourceClosedError: This result object does not return rows. It has been closed automatically.
3
Traceback (most recent call last):
File "/app/danswer/background/indexing/run_indexing.py", line 208, in _run_indexing
new_docs, total_batch_chunks = indexing_pipeline(
^^^^^^^^^^^^^^^^^^
File "/app/danswer/utils/timing.py", line 31, in wrapped_func
result = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/app/danswer/indexing/indexing_pipeline.py", line 129, in index_doc_batch
db_docs = get_documents_by_ids(
^^^^^^^^^^^^^^^^^^^^^
File "/app/danswer/db/document.py", line 49, in get_documents_by_ids
documents = db_session.execute(stmt).scalars().all()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 1784, in all
return self._allrows()
^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 557, in _allrows
rows = self._fetchall_impl()
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 1691, in _fetchall_impl
return self._real_result._fetchall_impl()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 2291, in _fetchall_impl
return list(self.iterator)
^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/orm/loading.py", line 191, in chunks
fetch = cursor._raw_all_rows()
^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 549, in _raw_all_rows
return [make_row(row) for row in rows]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/result.py", line 549, in <listcomp>
return [make_row(row) for row in rows]
^^^^^^^^^^^^^
File "lib/sqlalchemy/cyextension/resultproxy.pyx", line 16, in sqlalchemy.cyextension.resultproxy.BaseRow.__init__
File "lib/sqlalchemy/cyextension/resultproxy.pyx", line 71, in sqlalchemy.cyextension.resultproxy._apply_processors
IndexError: tuple index out of range
From my own experience using sqlalchemy for the background job is highly unstable. I had to switch to using redis as both broker and backend. After that, they are way more stable.
From my own experience using sqlalchemy for the background job is highly unstable. I had to switch to using redis as both broker and backend. After that, they are way more stable.
Hey, how do you switch Redis? Do you need to modify the code, or can it be done through configuration?
I had to modify the code to force using redis as backend and broker. There are no configurations atm to support switching backend and broker.
Also it's a note that for document indexing, Danswer is not relying on Celery, instead it has it's own workers. Celery is only being used for:
- Document set sync.
- Beat (Cron) schedule.
Maybe the dev team didn't want to complicating the architecture by adding another component into it, instead relying on existing Postgres for running the Celery, and maybe they will be able to fix all the issues in the future. But for now, if you're self-hosting Danswer, adding redis into your own deployment is relatively easy and you only need to modify a few lines of code.
Hey all! We recently put in https://github.com/danswer-ai/danswer/pull/1399/files, which should address most of this! Let me know if you're still running into any issues 🙏