glm4 function调用报错
System Info / 系統信息
推理框架transformers
Running Xinference with Docker? / 是否使用 Docker 运行 Xinfernece?
- [X] docker / docker
- [ ] pip install / 通过 pip install 安装
- [ ] installation from source / 从源码安装
Version info / 版本信息
14.0
The command used to start Xinference / 用以启动 xinference 的命令
docker run
Reproduction / 复现过程
传参:
{
"model": "glm-4-9b-chat",
"messages": [
{
"role": "system",
"content": "你是一个小助手"
},
{
"role": "user",
"content": "北京的天气怎么样"
}
],
"tools": [
{
"type": "function",
"function": {
"name": "find_sports_shoes",
"description": "查找商场内可用的运动鞋信息",
"parameters": {
"type": "object",
"properties": {
"brand": {
"type": "string",
"description": "鞋子的品牌"
},
"size": {
"type": "string",
"description": "鞋子的尺码"
}
},
"required": ["brand"]
},
"strict": false
}
}
],
"tool_choice": "auto",
"stream": false
}
返回:
{
"detail": "[address=0.0.0.0:45971, pid=242] 'type'"
}
报错:
2024-08-13 04:04:15,718 xinference.api.restful_api 155 ERROR [address=0.0.0.0:45971, pid=242] 'type'
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/xinference/api/restful_api.py", line 1710, in create_chat_completion
data = await model.chat(
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 656, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 367, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 384, in __on_receive__
return await super().__on_receive__(message) # type: ignore
File "xoscar/core.pyx", line 558, in __on_receive__
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.__on_receive__
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.__on_receive__
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.__on_receive__
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/utils.py", line 45, in wrapped
ret = await func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 90, in wrapped_func
ret = await fn(self, *args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 462, in _wrapper
r = await func(self, *args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 523, in chat
response = await self._call_wrapper_json(
File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 393, in _call_wrapper_json
return await self._call_wrapper("json", fn, *args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 114, in _async_wrapper
return await fn(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 404, in _call_wrapper
ret = await asyncio.to_thread(fn, *args, **kwargs)
File "/usr/lib/python3.10/asyncio/threads.py", line 25, in to_thread
return await loop.run_in_executor(None, func_call)
File "/usr/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "/usr/local/lib/python3.10/dist-packages/xinference/model/llm/pytorch/chatglm.py", line 556, in chat
response = chat(self._tokenizer, prompt, chat_history, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/glm-4-9b-chat/modeling_chatglm.py", line 972, in chat
inputs = tokenizer.apply_chat_template(history, add_generation_prompt=True, tokenize=True,
File "/root/.cache/huggingface/modules/transformers_modules/glm-4-9b-chat/tokenization_chatglm.py", line 211, in apply_chat_template
result = handle_single_conversation(conversation)
File "/root/.cache/huggingface/modules/transformers_modules/glm-4-9b-chat/tokenization_chatglm.py", line 173, in handle_single_conversation
if tool["type"] == "function":
KeyError: [address=0.0.0.0:45971, pid=242] 'type'
Expected behavior / 期待表现
正常工具调用
I've seen this bug in 0.12, but I don't know why it's not working again.
Met same error
TypeError: 'NoneType' object is not subscriptable 该bug还是可复现,聊天页面正常推理也报错了
请发一下错误栈,上面问题里的栈明显不是新版的。
用的你的 post数据(改了 model name 为 glm4-chat),使用正常。你可以使用最新的 xinf + 最新的内置模型看看是否有问题。
请发一下错误栈,上面问题里的栈明显不是新版的。
2024-08-20 09:25:02,989 xinference.api.restful_api 614 ERROR [address=0.0.0.0:42749, pid=701] 'type' Traceback (most recent call last): File "/usr/local/lib/python3.10/dist-packages/xinference/api/restful_api.py", line 1752, in create_chat_completion data = await model.chat( File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 231, in send return self._process_result_message(result) File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 102, in _process_result_message raise message.as_instanceof_cause() File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 656, in send result = await self._run_coro(message.message_id, coro) File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 367, in _run_coro return await coro File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 384, in on_receive return await super().on_receive(message) # type: ignore File "xoscar/core.pyx", line 558, in on_receive raise ex File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.on_receive async with self._lock: File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.on_receive with debug_async_timeout('actor_lock_timeout', File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive result = await result File "/usr/local/lib/python3.10/dist-packages/xinference/core/utils.py", line 45, in wrapped ret = await func(*args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 90, in wrapped_func ret = await fn(self, *args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 462, in _wrapper r = await func(self, *args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 528, in chat response = await self._call_wrapper_json( File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 398, in _call_wrapper_json return await self._call_wrapper("json", fn, *args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 114, in _async_wrapper return await fn(*args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 409, in _call_wrapper ret = await asyncio.to_thread(fn, *args, **kwargs) File "/usr/lib/python3.10/asyncio/threads.py", line 25, in to_thread return await loop.run_in_executor(None, func_call) File "/usr/lib/python3.10/concurrent/futures/thread.py", line 58, in run result = self.fn(*self.args, **self.kwargs) File "/usr/local/lib/python3.10/dist-packages/xinference/model/llm/transformers/chatglm.py", line 547, in chat response = self._non_stream_chat( File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context return func(*args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/xinference/model/llm/transformers/chatglm.py", line 416, in _non_stream_chat kwargs, tools = self._get_generate_args( File "/usr/local/lib/python3.10/dist-packages/xinference/model/llm/transformers/chatglm.py", line 304, in _get_generate_args inputs = tokenizer.apply_chat_template( File "/root/.cache/huggingface/modules/transformers_modules/glm-4-9b-chat/tokenization_chatglm.py", line 211, in apply_chat_template result = handle_single_conversation(conversation) File "/root/.cache/huggingface/modules/transformers_modules/glm-4-9b-chat/tokenization_chatglm.py", line 173, in handle_single_conversation if tool["type"] == "function": KeyError: [address=0.0.0.0:42749, pid=701] 'type'
glm4-chat
好的,xinf是最新的0.14.2,docker 启动:
sudo docker run -it -e XINFERENCE_MODEL_SRC=modelscope -v /fastdfs/sinhon/llm:/root/llm -e XINFERENCE_HOME=/root/llm -p 9998:9997 --gpus all --security-opt seccomp:unconfined --privileged xprobe/xinference:v0.14.2 /bin/bash
应该是模型文件的问题,你用的模型太旧了。Xinf pin 的版本中已经没有handle_single_conversation https://huggingface.co/THUDM/glm-4-9b-chat/blob/aae8bd74af5c6dff63a49d7fbdcc89349ebf87aa/tokenization_chatglm.py#L173
应该是模型文件的问题,你用的模型太旧了。Xinf pin 的版本中已经没有handle_single_conversation https://huggingface.co/THUDM/glm-4-9b-chat/blob/aae8bd74af5c6dff63a49d7fbdcc89349ebf87aa/tokenization_chatglm.py#L173
你说的对,更新模型文件后没问题了,感谢