llama-stack
llama-stack copied to clipboard
Chat completion unique id collisions with ollama provider
System Info
Latest llama-stack from main commit 2603f10f95fcd302297158adb709d2a84c9f60af
🐛 Describe the bug
When testing the responses API with the ollama provider, I'm getting frequent errors with unique id collisions when attempting to store the chat completions generated as part of my Responses API calls into my local sqlite database. I'm not sure whether Llama Stack or ollama are generating the chat completion ids, but they are not very unique. I'm seeing id values like chatcmpl-373 or chatcmpl-948.
We have a unique constraint in the sqlite database when storing these chat completions, and that's being violated by the lack of uniqueness in these values.
Error logs
23:45:17.586 [ERROR] Error executing endpoint route='/v1/openai/v1/responses' method='post'
Traceback (most recent call last):
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1963, in _exec_single_context
self.dialect.do_execute(
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/engine/default.py", line 943, in do_execute
cursor.execute(statement, parameters)
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py", line 149, in execute
self._adapt_connection._handle_exception(error)
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py", line 300, in _handle_exception
raise error
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py", line 131, in execute
self.await_(_cursor.execute(operation, parameters))
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 132, in await_only
return current.parent.switch(awaitable) # type: ignore[no-any-return,attr-defined] # noqa: E501
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 196, in greenlet_spawn
value = await result
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/aiosqlite/cursor.py", line 40, in execute
await self._execute(self._cursor.execute, sql, parameters)
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/aiosqlite/cursor.py", line 32, in _execute
return await self._conn._execute(fn, *args, **kwargs)
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/aiosqlite/core.py", line 122, in _execute
return await future
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/aiosqlite/core.py", line 105, in run
result = function()
sqlite3.IntegrityError: UNIQUE constraint failed: chat_completions.id
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/Volumes/SourceCode/llama-stack/llama_stack/distribution/server/server.py", line 235, in route_handler
return await maybe_await(value)
File "/Volumes/SourceCode/llama-stack/llama_stack/distribution/server/server.py", line 172, in maybe_await
return await value
File "/Volumes/SourceCode/llama-stack/llama_stack/providers/inline/agents/meta_reference/agents.py", line 329, in create_openai_response
return await self.openai_responses_impl.create_openai_response(
File "/Volumes/SourceCode/llama-stack/llama_stack/providers/inline/agents/meta_reference/openai_responses.py", line 361, in create_openai_response
inference_result = await self.inference_api.openai_chat_completion(
File "/Volumes/SourceCode/llama-stack/llama_stack/providers/utils/telemetry/trace_protocol.py", line 103, in async_wrapper
result = await method(self, *args, **kwargs)
File "/Volumes/SourceCode/llama-stack/llama_stack/distribution/routers/inference.py", line 546, in openai_chat_completion
await self.store.store_chat_completion(response, messages)
File "/Volumes/SourceCode/llama-stack/llama_stack/providers/utils/inference/inference_store.py", line 50, in store_chat_completion
await self.sql_store.insert(
File "/Volumes/SourceCode/llama-stack/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py", line 88, in insert
await session.execute(self.metadata.tables[table].insert(), data)
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/ext/asyncio/session.py", line 463, in execute
result = await greenlet_spawn(
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 201, in greenlet_spawn
result = context.throw(*sys.exc_info())
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/orm/session.py", line 2365, in execute
return self._execute_internal(
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/orm/session.py", line 2260, in _execute_internal
result = conn.execute(
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1415, in execute
return meth(
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/sql/elements.py", line 523, in _execute_on_connection
return connection._execute_clauseelement(
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1637, in _execute_clauseelement
ret = self._execute_context(
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1842, in _execute_context
return self._exec_single_context(
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1982, in _exec_single_context
self._handle_dbapi_exception(
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 2351, in _handle_dbapi_exception
raise sqlalchemy_exception.with_traceback(exc_info[2]) from e
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1963, in _exec_single_context
self.dialect.do_execute(
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/engine/default.py", line 943, in do_execute
cursor.execute(statement, parameters)
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py", line 149, in execute
self._adapt_connection._handle_exception(error)
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py", line 300, in _handle_exception
raise error
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py", line 131, in execute
self.await_(_cursor.execute(operation, parameters))
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 132, in await_only
return current.parent.switch(awaitable) # type: ignore[no-any-return,attr-defined] # noqa: E501
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 196, in greenlet_spawn
value = await result
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/aiosqlite/cursor.py", line 40, in execute
await self._execute(self._cursor.execute, sql, parameters)
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/aiosqlite/cursor.py", line 32, in _execute
return await self._conn._execute(fn, *args, **kwargs)
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/aiosqlite/core.py", line 122, in _execute
return await future
File "/Volumes/SourceCode/llama-stack/venv/lib/python3.10/site-packages/aiosqlite/core.py", line 105, in run
result = function()
sqlalchemy.exc.IntegrityError: (sqlite3.IntegrityError) UNIQUE constraint failed: chat_completions.id
[SQL: INSERT INTO chat_completions (id, created, model, choices, input_messages) VALUES (?, ?, ?, ?, ?)]
[parameters: ('chatcmpl-948', 1748562316, 'llama3.2:3b-instruct-fp16', '[{"finish_reason": "stop", "index": 0, "logprobs": null, "message": {"content": "Lines of code unfold\\nLogic\'s hidden beauty shines\\nMachine\'s sweet dance", "refusal": null, "role": "assistant", "annotations": null, "audio": null, "function_call": null, "tool_calls": null}}]', '[{"role": "user", "content": "Write a haiku about coding.", "name": null}]')]
(Background on this error at: https://sqlalche.me/e/20/gkpj)
Expected behavior
The chat completions should get stored without any error and each should have a unique id, or at least sufficiently unique to very rarely collide.