Error With Tool Calling
System Info
I am testing using the TGI Tool Call. But The error continues to occur, can you check it?
Information
- [X] Docker
- [ ] The CLI directly
Tasks
- [X] An officially supported command
- [ ] My own modifications
Reproduction
from openai import OpenAI
import json
from openai.types.chat import ChatCompletion, ChatCompletionMessageToolCall
from openai.types.chat.chat_completion import ChatCompletionMessage, Choice
from openai.types.completion_usage import CompletionUsage
client = OpenAI(base_url="http://0.0.0.0/v1", api_key="not-used")
MODEL_NAME = "Meta-Llama-3.1-8B-Instruct"
# Define available function
weather_tool = {
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
},
"format": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "The temperature unit to use. Infer this from the user's location."
}
},
"required": ["location", "format"]
}
}
}
messages = [
{
"role": "system",
"content": f"[AVAILABLE_TOOLS] {json.dumps(weather_tool)} [/AVAILABLE_TOOLS]"
"You're a helpful assistant! Use tools if necessary, and reply in a JSON format",
},
{
"role": "user",
"content": "Is it hot in Pittsburgh, PA right now? long answer please"
}
]
chat_response = client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
tools=[weather_tool],
tool_choice="auto",
stream=False
)
assistant_message = chat_response.choices[0].message
messages.append(assistant_message)
#Expected `str` but got `dict` - serialized value may not be as expected
#Example output:
ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_abc123', function=Function(arguments='{"location": "Pittsburgh, PA", "format": "fahrenheit"}', name='get_current_weather'), type='function')])
tool_call_result = 88
tool_call_id = assistant_message.tool_calls[0].id
tool_function_name = assistant_message.tool_calls[0].function.name
messages.append({"role": "tool", "content": str(tool_call_result), "tool_call_id": tool_call_id, "name": tool_function_name})
chat_response = client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
tools=[weather_tool],
tool_choice="auto",
stream=False
)
assistant_message = chat_response.choices[0].message
print(chat_response)
# Example output:
# ChatCompletionMessage(content='Based on the current temperature of 88°F (31°C) in Pittsburgh, PA, it is indeed quite hot right now. This temperature is generally considered warm to hot, especially if accompanied by high humidity, which is common in Pittsburgh during summer months.', role='assistant', function_call=None, tool_calls=None)
Expected behavior
UnprocessableEntityError: Failed to deserialize the JSON body into the target type: messages[2].content: data did not match any variant of untagged enum MessageContent at line 1 column 675
Hi @Archmilio 👋
I edited your original issue a bit to be able to get the code formatting. Hopefully that was okay.
Unfortunately I'm not able to reproduce your issue. When I deploy the model and call it with this code:
import json
from openai import OpenAI
from openai.types.chat import ChatCompletion, ChatCompletionMessageToolCall
from openai.types.chat.chat_completion import ChatCompletionMessage, Choice
from openai.types.completion_usage import CompletionUsage
client = OpenAI(
base_url="MY_ENDPOINT",
api_key=""
)
weather_tool = {
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
},
"format": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "The temperature unit to use. Infer this from the user's location."
}
},
"required": ["location", "format"]
}
}
}
messages = [
{
"role": "system",
"content": f"[AVAILABLE_TOOLS] {json.dumps(weather_tool)} [/AVAILABLE_TOOLS]"
"You're a helpful assistant! Use tools if necessary, and reply in a JSON format",
},
{
"role": "user",
"content": "Is it hot in Pittsburgh, PA right now? long answer please"
}
]
chat_response = client.chat.completions.create(
model="Meta-Llama-3.1-8B-Instruct",
messages=messages,
tools=[weather_tool],
tool_choice="auto",
stream=False
)
assistant_message = chat_response.choices[0].message
messages.append(assistant_message)
print(assistant_message)
I get the result:
ChatCompletionMessage(content=None, refusal=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='0', function=Function(arguments={'format': 'fahrenheit', 'location': 'Pittsburgh, PA'}, name='get_current_weather', description=None), type='function')])
Or did I misunderstand your question?
Yes, it works normally until the part you reproduced. Function calling generally makes two requests to llm. As in the code above, request 1 works normally, but a json formatting error occurs in the process of sending the request to llm again by appending the result of request 1 and function execution.
**messages.append(assistant_message)**
ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_abc123', function=Function(arguments='{"location": "Pittsburgh, PA", "format": "fahrenheit"}', name='get_current_weather'), type='function')])
tool_call_result = 88
tool_call_id = assistant_message.tool_calls[0].id
tool_function_name = assistant_message.tool_calls[0].function.name
messages.append({"role": "tool", "content": str(tool_call_result), "tool_call_id": tool_call_id, "name": tool_function_name})
chat_response = client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
tools=[weather_tool],
tool_choice="auto",
stream=False
)
assistant_message = chat_response.choices[0].message
print(chat_response)
Expected output: ChatCompletionMessage(content='Based on the current temperature of 88°F (31°C) in Pittsburgh, PA, it is indeed quite hot right now. This temperature is generally considered warm to hot, especially if accompanied by high humidity, which is common in Pittsburgh during summer months.', role='assistant', function_call=None, tool_calls=None
Actual output: UnprocessableEntityError: Failed to deserialize the JSON body into the target type: messages[2].content: data did not match any variant of untagged enum MessageContent at line 1 column 675
Hello! I am facing the same issue here. Was anyone able to find a workaround for this by any chance?
I guess this is related to #2480.
As meta describes, for passing back the ToolCall Message we need to use their new Role ipython:
https://www.llama.com/docs/model-cards-and-prompt-formats/llama3_1
Maybe that is causing the issue.
I am encountering the same issue when using the react agent. The agent executes the language model (LLM) twice, passing the tool descriptions in each. In a basic setup, the expected behavior is that the agent receives a function call in the first response and the final answer in the second response after the tool's answer is appended to the message list.
However, when the TGI detects a function call descriptor in the request, it enforces grammar interpretation and expects to handle the function call. This behavior causes a problem when a function call is not actually needed or intended, leading to one of the following issues:
- Agent Failure: The agent cannot process the response correctly and fails.
- Infinite Loop: TGI continuously returns function call responses, resulting in the agent getting stuck in an endless loop.
When I try to force TGI to bypass the function call and return a normal response (when no tool call is needed), it returns a notify_error.
Below is an example of the call and the problematic response behavior:
Request
{
"model": "llama3",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant.",
"name": null
},
{
"role": "user",
"content": "For the following plan:\n1. Find the winner of the 2016 Australia Open\n2. Find the hometown of the winner\n\nYou are tasked with executing step 1, Find the winner of the 2016 Australia Open",
"name": null
},
{
"role": "assistant",
"content": "",
"name": null,
"tool_calls": [
{
"id": 0,
"type": "function",
"function": {
"name": "web_search",
"arguments": "{\"query\": \"2016 Australia Open winner\"}"
}
}
]
},
{
"role": "tool",
"content": "The 2016 Australian Open was a tennis tournament that took place at Melbourne Park between 18 and 31 January 2016.[1] It was the 104th edition of the Australian Open, and the first Grand Slam tournament of the year. The tournament consisted of events for professional players in singles, doubles and mixed doubles play. Junior and wheelchair players competed in singles and doubles tournaments. Novak Djokovic successfully defended the men'\''s singles title and thus won a record-equaling sixth Australian Open title. Serena Williams was the defending champion in the women'\''s singles but failed to defend her title, losing to Angelique Kerber in the final; by winning, Kerber became the first German player of any gender to win a Grand Slam title since Steffi Graf won her last such title at the 1999 French Open.[2]",
"tool_call_id": "0"
}
],
"temperature": 0.1,
"stop": [
"<|start_header_id|>",
"<|end_header_id|>",
"<|reserved_special_token|>",
"<|eot_id|>"
],
"tools": [
{
"type": "function",
"function": {
"name": "web_search",
"description": "A search engine optimized for comprehensive, accurate, and trusted results.\nUseful for when you need to answer questions about current events.\nInput should be a search query.",
"parameters": {
"type": "object",
"properties": {
"query": {
"description": "search query to look up",
"type": "string"
}
},
"required": [
"query"
]
}
}
}
],
"tool_choice": "auto",
"tool_prompt": "Please respond directly to the question unless using a function call provides significant clarity or concision. In cases where a function call is necessary, provide a JSON object specifying the function name and its required arguments, formatted as {name: '\''function_name'\'', parameters: {'\''argument1'\'': '\''value1'\'',...}}. Avoid unnecessary function calls and variable assignments"
}
TGI Response:
{
"object": "chat.completion",
"id": "",
"created": 1727596257,
"model": "/models/models--meta-llama--Meta-Llama-3.1-70B-instruct/",
"system_fingerprint": "2.3.1-dev0-sha-169178b",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"tool_calls": [
{
"id": "0",
"type": "function",
"function": {
"description": null,
"name": "notify_error",
"arguments": {
"error": "The winner of the 2016 Australia Open is Novak Djokovic for the men"
}
}
}
]
},
"logprobs": null,
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 443,
"completion_tokens": 39,
"total_tokens": 482
}
}
I am running into the same issue: I can get Llama 3.1 to respond with a tool call using the Messages API, but I cannot seem to make it respond to a tool call result. If I manually convert it to Llama 3.1's template and use the /generate endpoint, it appears to work just fine.
Here is my code with the Messages API, which results in just another tool call response from the model:
URL = "<tgi_instance/v1/chat/completions"
headers = {
"Content-Type": "application/json",
}
data = """{
"model": "tgi",
"messages": [
{
"role": "user",
"content": "What is the weather like in New York?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "0",
"type": "function",
"function": {
"arguments": "{'format': 'celsius', 'location': 'New York}",
"name": "get_current_weather"
}
}
]
},
{
"role": "ipython",
"content": "{'temperature': '18', 'unit': 'celsius'}",
"tool_call_id": "0"
}
],
"tools": [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
},
"format": {
"type": "string",
"enum": [
"celsius",
"fahrenheit"
],
"description": "The temperature unit to use. Infer this from the users location."
}
},
"required": [
"location",
"format"
]
}
}
}
],
"tool_choice": "auto"
}
"""
response = requests.post(URL, data=data, headers=headers)
print(response.json())
Output:
{'object': 'chat.completion', 'id': '', 'created': 1729618340, 'model': 'meta-llama/Llama-3.1-8B-Instruct', 'system_fingerprint': '2.3.1-sha-a094729', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'tool_calls': [{'id': '0', 'type': 'function', 'function': {'description': None, 'name': 'get_current_weather', 'arguments': {'format': 'celsius', 'location': 'New York'}}}]}, 'logprobs': None, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 450, 'completion_tokens': 27, 'total_tokens': 477}}
I was expecting a response paraphrasing the provided tool call result in natural language instead of the same tool call again.
Here is the equivalent code using the /generate endpoint that works as expected:
GENERATE_ENDPOINT = "<tgi_instance>/generate"
prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
Environment: ipython
Tools: brave_search, wolfram_alpha
Cutting Knowledge Date: December 2023
Today Date: 23 July 2024
# Tool Instructions
- Always execute python code in messages that you share.
- When looking for real time information use relevant functions if available else fallback to brave_search
You have access to the following functions:
Use the function 'get_current_weather' to: Get the current weather
{"name": "get_current_weather", "description": "Get the current weather", "parameters": {"location": {"param_type": "str", "description": "The city and state, e.g. San Francisco, CA", "required": true}, "format": {"param_type": "str", "description": "The temperature unit to use. Infer this from the users location.", "required": true}}}
If a you choose to call a function ONLY reply in the following format:
<{start_tag}={function_name}>{parameters}{end_tag}
where
start_tag => `<function`
parameters => a JSON dict with the function argument name as key and function argument value as value.
end_tag => `</function>`
Here is an example,
<function=example_function_name>{"example_name": "example_value"}</function>
Reminder:
- Function calls MUST follow the specified format
- Required parameters MUST be specified
- Only call one function at a time
- Put the entire function call reply on one line
- Always add your sources when using search results to answer the user query
You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
What is the weather like in New York?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
<function=get_current_weather>{"location": "New York", "format": "celsius"}</function><|eom_id|><|start_header_id|>ipython<|end_header_id|>
{'temperature': '18', 'unit': 'celsius'}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
response = requests.post(GENERATE_ENDPOINT, headers=headers, json={"inputs": prompt})
print(response.json())
Output:
{'generated_text': 'The current weather in New York is 18 degrees Celsius.'}
Am I doing it wrong or is this an issue with TGI?
@Simon-Stone the issues is that TGI's v1/chat/completions API responds with an object for arguments:
'arguments': {'format': 'celsius', 'location': 'New York'} while OpenAI v1/chat/completions responds with a string for arguments: 'arguments': '{\'format\': \'celsius\', \'location\': \'New York\'}'
And OpenAI client then does json parse of the arguments' value, which it expects to be a string. That is why you are seeing this error: #Expected str but got dict - serialized value may not be as expected.
Similar issue was reported https://github.com/huggingface/text-generation-inference/issues/2136
Does anyone have a solution to this?
Not a solution but a workaround: I am intercepting the dict coming from the API and convert it back into a string using json.dumps() before passing it back to the OpenAI client (which parses the string back into a dict). It's stupid, but it works.
Are there any news on this with the new update 3.0.0 ? Still getting type errors, because string objects are required and not dictionaries for tool response by haystack/langchain.
As of now, the issue still persists. Stuck in a infinite tool calling loop.
Still broken in 2025 (openai==1.51.2, text-generation-inference image 3.0.1, model is "mistralai/Mistral-Nemo-Instruct-2407")
Still broken , tested with Hf InferenceClient + Qwen. But using QwenAgent works with OpenAI Cleitn mode conneting to TGI.. Here is the qwen agent alternative :
from huggingface_hub import ChatCompletionOutputToolCall
from typing import Any
import json
from qwen_agent.llm import get_chat_model
tools_qa = [
{
"name": "get_current_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"format": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "The temperature unit to use. Infer this from the users location.",
},
},
"required": ["location", "format"],
},
},
{
"name": "get_n_day_weather_forecast",
"description": "Get an N-day weather forecast",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"format": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "The temperature unit to use. Infer this from the users location.",
},
"num_days": {
"type": "integer",
"description": "The number of days to forecast",
},
},
"required": ["location", "format", "num_days"],
},
},
{
"name": "calculate_travelling_days",
"description": "Calculate travelling time in days by dividing distance by speed and calculate time",
"parameters": {
"type": "object",
"properties": {
"distance": {
"type": "number",
"description": "The distance in kilometer to travel, e.g. Distance between China and america",
},
"speed": {
"type": "number",
"description": "Speed in kilometer per hour, e.g. speed of an object .",
},
},
"required": ["distance", "speed"],
},
},
]
def get_current_weather(location: str, format: str):
if format == "F":
return json.dumps({"temp": 50, "format": "F"})
else:
return json.dumps({"temp": 30, "format": "C"})
def get_current_weather_forcast(location: str, format: str, num_days: int):
if format == "F":
return json.dumps({"temp": 40, "format": "F"})
else:
return json.dumps({"temp": 20, "format": "C"})
def calculate_travelling_days(distance: float, speed: float):
# Constants
hours_per_day = 24
# Calculate total hours needed to walk to the Moon
total_hours_needed = distance / speed
# Calculate the number of days needed
total_days_needed = total_hours_needed / hours_per_day
return total_days_needed
tool_functions = {
"get_current_weather": get_current_weather,
"get_current_weather_forcast": get_current_weather_forcast,
"calculate_travelling_days": calculate_travelling_days,
}
def get_function_by_name(name: str):
try:
return tool_functions[name]
except:
return ""
def tool_calling_qa(function_name: str, arguments: dict):
print(function_name)
print(arguments)
return get_function_by_name(function_name)(**arguments)
model = get_chat_model(
{
"model": "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ",
"model_server": "http://localhost:8080/v1",
"api_key": "EMPTY",
}
)
messages: list[dict[str, Any]] = [
{
"role": "system",
"content": "You're a helpful assistant! Answer the users question best you can.",
},
{
"role": "user",
"content": "How many days it would take for a man to walk to moon?",
},
]
responses = model.chat(messages=messages, functions=tools_qa, stream=False)
messages.extend(responses) # extend conversation with assistant's reply
print (responses)z
last_response = messages[-1]
if last_response.get("function_call", None):
function_name = last_response["function_call"]["name"]
function_args = json.loads(last_response["function_call"]["arguments"])
function_response = tool_calling_qa(function_name, function_args)
print("function_response")
print(function_response)
messages.append(
{
"role": "function",
"name": function_name,
"content": str(function_response),
}
) # extend conversation with function response
print("# Assistant Response 2:")
responses = model.chat(messages=messages, functions=tools_qa, stream=False)
# for responses in model.chat(
# messages=messages,
# functions=tools_qa,
# stream=True,
# ): # get a new response from the model where it can see the function response
# print(responses)
print(responses)
👋 I made a PR (#2954) for a potential fix. I'm hoping it fixes the issues you were having, if any of you want to test it with your use cases I'd love to get some feedback.