JSON output throws error for ollama provider
I am trying to run the example for Usage - JSON Mode from the LiteLLM docs, but this results in an error, namely in
json.dumps(function_call["arguments"])
I get KeyError: 'arguments'
in Detail:
I have litellm Version 1.48.0 and ollama version 0.3.13. All the other examples work, so it does seem to be the problem of format="json"
Here is what I am running:
from litellm import completion
response = completion(
model="ollama/"+my_llm,
messages=[{ "content": "respond in json in 20 words. who are you?","role": "user"}],
api_base="http://localhost:11434",
format='json'
# response_format={ "type" : "json_object" },
)
print(response)
And this is the error message:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File [~/Daten/miniforge3/envs/crawl/lib/python3.11/site-packages/litellm/main.py:2558](http://localhost:8888/lab/tree/DASU/Crawl4AI/~/Daten/miniforge3/envs/crawl/lib/python3.11/site-packages/litellm/main.py#line=2557), in completion(model, messages, timeout, temperature, top_p, n, stream, stream_options, stop, max_completion_tokens, max_tokens, presence_penalty, frequency_penalty, logit_bias, user, response_format, seed, tools, tool_choice, logprobs, top_logprobs, parallel_tool_calls, deployment_id, extra_headers, functions, function_call, base_url, api_version, api_key, model_list, **kwargs)
2557 ## LOGGING
-> 2558 generator = ollama.get_ollama_response(
2559 api_base=api_base,
2560 model=model,
2561 prompt=prompt,
2562 optional_params=optional_params,
2563 logging_obj=logging,
2564 acompletion=acompletion,
2565 model_response=model_response,
2566 encoding=encoding,
2567 )
2568 if acompletion is True or optional_params.get("stream", False) == True:
File [~/Daten/miniforge3/envs/crawl/lib/python3.11/site-packages/litellm/llms/ollama.py:295](http://localhost:8888/lab/tree/DASU/Crawl4AI/~/Daten/miniforge3/envs/crawl/lib/python3.11/site-packages/litellm/llms/ollama.py#line=294), in get_ollama_response(model_response, api_base, model, prompt, optional_params, logging_obj, acompletion, encoding)
287 function_call = json.loads(response_json["response"])
288 message = litellm.Message(
289 content=None,
290 tool_calls=[
291 {
292 "id": f"call_{str(uuid.uuid4())}",
293 "function": {
294 "name": function_call["name"],
--> 295 "arguments": json.dumps(function_call["arguments"]),
296 },
297 "type": "function",
298 }
299 ],
300 )
301 model_response.choices[0].message = message # type: ignore
KeyError: 'arguments'
During handling of the above exception, another exception occurred:
APIConnectionError Traceback (most recent call last)
[/tmp/ipykernel_12760/1700835460.py](http://localhost:8888/tmp/ipykernel_12760/1700835460.py) in ?()
1 from litellm import completion
2
3 # here we do not set Stream=True
----> 4 response = completion(
5 model="ollama[/](http://localhost:8888/)"+my_llm,
6 messages=[{ "content": "respond in json in 20 words. who are you?","role": "user"}],
7 api_base="http://localhost:11434",
~/Daten/miniforge3/envs/crawl/lib/python3.11/site-packages/litellm/utils.py in ?(*args, **kwargs)
1082 if (
1083 liteDebuggerClient and liteDebuggerClient.dashboard_url != None
1084 ): # make it easy to get to the debugger logs if you've initialized it
1085 e.message += f"\n Check the log in your dashboard - {liteDebuggerClient.dashboard_url}"
-> 1086 raise e
[~/Daten/miniforge3/envs/crawl/lib/python3.11/site-packages/litellm/utils.py](http://localhost:8888/lab/tree/DASU/Crawl4AI/~/Daten/miniforge3/envs/crawl/lib/python3.11/site-packages/litellm/utils.py) in ?(*args, **kwargs)
1082 if (
1083 liteDebuggerClient and liteDebuggerClient.dashboard_url != None
1084 ): # make it easy to get to the debugger logs if you've initialized it
1085 e.message += f"\n Check the log in your dashboard - {liteDebuggerClient.dashboard_url}"
-> 1086 raise e
[~/Daten/miniforge3/envs/crawl/lib/python3.11/site-packages/litellm/main.py](http://localhost:8888/lab/tree/DASU/Crawl4AI/~/Daten/miniforge3/envs/crawl/lib/python3.11/site-packages/litellm/main.py) in ?(model, messages, timeout, temperature, top_p, n, stream, stream_options, stop, max_completion_tokens, max_tokens, presence_penalty, frequency_penalty, logit_bias, user, response_format, seed, tools, tool_choice, logprobs, top_logprobs, parallel_tool_calls, deployment_id, extra_headers, functions, function_call, base_url, api_version, api_key, model_list, **kwargs)
2844 )
2845 return response
2846 except Exception as e:
2847 ## Map to OpenAI Exception
-> 2848 raise exception_type(
2849 model=model,
2850 custom_llm_provider=custom_llm_provider,
2851 original_exception=e,
[~/Daten/miniforge3/envs/crawl/lib/python3.11/site-packages/litellm/utils.py](http://localhost:8888/lab/tree/DASU/Crawl4AI/~/Daten/miniforge3/envs/crawl/lib/python3.11/site-packages/litellm/utils.py) in ?(model, original_exception, custom_llm_provider, completion_kwargs, extra_kwargs)
8203 threading.Thread(target=get_all_keys, args=(e.llm_provider,)).start()
8204 # don't let an error with mapping interrupt the user from receiving an error from the llm api calls
8205 if exception_mapping_worked:
8206 setattr(e, "litellm_response_headers", litellm_response_headers)
-> 8207 raise e
8208 else:
8209 for error_type in litellm.LITELLM_EXCEPTION_TYPES:
8210 if isinstance(e, error_type):
[~/Daten/miniforge3/envs/crawl/lib/python3.11/site-packages/litellm/utils.py](http://localhost:8888/lab/tree/DASU/Crawl4AI/~/Daten/miniforge3/envs/crawl/lib/python3.11/site-packages/litellm/utils.py) in ?(model, original_exception, custom_llm_provider, completion_kwargs, extra_kwargs)
8203 threading.Thread(target=get_all_keys, args=(e.llm_provider,)).start()
8204 # don't let an error with mapping interrupt the user from receiving an error from the llm api calls
8205 if exception_mapping_worked:
8206 setattr(e, "litellm_response_headers", litellm_response_headers)
-> 8207 raise e
8208 else:
8209 for error_type in litellm.LITELLM_EXCEPTION_TYPES:
8210 if isinstance(e, error_type):
APIConnectionError: litellm.APIConnectionError: 'arguments'
Traceback (most recent call last):
File "[/home/rvs/Daten/miniforge3/envs/crawl/lib/python3.11/site-packages/litellm/main.py", line 2558](http://localhost:8888/home/rvs/Daten/miniforge3/envs/crawl/lib/python3.11/site-packages/litellm/main.py#line=2557), in completion
generator = ollama.get_ollama_response(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "[/home/rvs/Daten/miniforge3/envs/crawl/lib/python3.11/site-packages/litellm/llms/ollama.py", line 295](http://localhost:8888/home/rvs/Daten/miniforge3/envs/crawl/lib/python3.11/site-packages/litellm/llms/ollama.py#line=294), in get_ollama_response
"arguments": json.dumps(function_call["arguments"]),
~~~~~~~~~~~~~^^^^^^^^^^^^^
KeyError: 'arguments'
Since the code I am running is the exact one provided as an example (just using llama3.2 instead of llama2), I assume it is some compatibility issue. It would be great if that can be fixed.
Hope this info is sufficient, but would be happy to provide more, if i can.
Following
Hi, I'm experiencing a similar issue while using ollama_chat in JSON mode with llama3.1 with async. The process works correctly when not streaming, but fails during streaming.
I suspect the issue may be related to how the ollama_acompletion function in llms/ollama_chat.py handles requests. This function checks both data.get("format", "") == "json" and whether function_name is not None, assuming the request is a tool call.
However, the ollama_async_streaming function only checks data.get("format", "") == "json". That try to fit the response of a json mode request in tool calling format.
I hope this helps in diagnosing and resolving the problem. Thanks for your attention to this!
Following! Im facing the same issue
Same issue. Using ollama/llama3.1 (not chat mode)
I am running into this error using DSPy and serving llama3.1 and ollama. Setting litellm.set_verbose=True I can see the response being returned and it looks good. I just dont think its being parsed correctly.
however I was able to make my program work and avoid the error by increasing the max_tokens.
not sure if this could help debug
Also ran into this with DSPy and llama3.2 i haven't had time to do a PR BUT i just editing the ollama code on my side ollama.py in llms line 337 ish... it looks like you out set format to json its assuming its a function call
i added if if "name" not in function_call and skip the function parsing and just parse like a regular response.. this has fixed my DSPy... if i have some time tomorrow ill submit a PR.
Total caveat, I have never messed with litellm and literally was just fiddling today trying to get by the error. If somoene knows more about what litellm is trying to do please shed some light
if data.get("format", "") == "json":
function_call = json.loads(response_json["response"])
# ADD NEXT 3 LINES AND TAB STUFF IN
if "name" not in function_call:
model_response.choices[0].message.content = response_json["response"] # jusst cause json doesnt mean #function
else:
message = litellm.Message(
content=None,
tool_calls=[
{
"id": f"call_{str(uuid.uuid4())}",
"function": {
"name": function_call["name"],
"arguments": json.dumps(function_call["arguments"]),
},
"type": "function",
}
],
)
model_response.choices[0].message = message # type: ignore
model_response.choices[0].finish_reason = "tool_calls"
else:
model_response.choices[0].message.content = response_json["response"] # type: ignore
My workaround was to have litellm sdk use the ollama openai endpoint, so instead of using ollama/llama3.2:latest, I prefix openai and provide the api_base:
responses = completion(
model="openai/llama3.2:latest",
api_base="http://localhost:11434/v1",
messages=[{ "content": "respond in json in 20 words. who are you?","role": "user"}],
response_format={ "type": "json_object"}
)
I've had to use ollama_chat/ instead of ollama/, which really threw me at first.
My working config:
- model_name: ollama_chat/deepseek-r1:14b
litellm_params:
model: ollama_chat/deepseek-r1:14b
api_base: "http://localhost:11434"
This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs.