[BUG] GraphRAG collection 上传时, Leave chat, error: probability tensor contains either `inf`, `nan` or element < 0, elapsed time: 0 s
Description
2024-10-14 20:35:43,646 xinference.api.restful_api 4982 ERROR Chat completion stream got an error: [address=0.0.0.0:39705, pid=6081] probability tensor contains either
inf, nan or element < 0
Traceback (most recent call last):
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xinference/api/restful_api.py", line 1926, in stream_results
async for item in iterator:
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xoscar/api.py", line 340, in anext
return await self._actor_ref.xoscar_next(self._uid)
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xoscar/backends/pool.py", line 656, in send
result = await self._run_coro(message.message_id, coro)
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xoscar/backends/pool.py", line 367, in _run_coro
return await coro
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xoscar/api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 558, in on_receive
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xoscar/api.py", line 431, in xoscar_next
raise e
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xoscar/api.py", line 417, in xoscar_next
r = await asyncio.to_thread(_wrapper, gen)
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/asyncio/threads.py", line 25, in to_thread
return await loop.run_in_executor(None, func_call)
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xoscar/api.py", line 402, in _wrapper
return next(_gen)
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xinference/core/model.py", line 358, in _to_generator
for v in gen:
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xinference/model/llm/utils.py", line 272, in _to_chat_completion_chunks
for i, chunk in enumerate(chunks):
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xinference/model/llm/transformers/core.py", line 356, in generator_wrapper
for completion_chunk, completion_usage in generate_stream(
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 57, in generator_context
response = gen.send(request)
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xinference/model/llm/transformers/utils.py", line 226, in generate_stream
indices = torch.multinomial(probs, num_samples=2)
RuntimeError: [address=0.0.0.0:39705, pid=6081] probability tensor contains either inf, nan or element < 0
2024-10-14 20:35:43,671 xinference.core.model 6081 ERROR [request d3fd5eb8-8a28-11ef-9291-0242ac110009] Leave chat, error: probability tensor contains either inf, nan or element < 0, elapsed time: 0 s
Traceback (most recent call last):
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xinference/core/utils.py", line 69, in wrapped
ret = await func(*args, **kwargs)
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xinference/core/model.py", line 586, in chat
response = await self._call_wrapper_json(
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xinference/core/model.py", line 433, in _call_wrapper_json
return await self._call_wrapper("json", fn, *args, **kwargs)
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xinference/core/model.py", line 121, in _async_wrapper
return await fn(*args, **kwargs)
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/site-packages/xinference/core/model.py", line 444, in _call_wrapper
ret = await asyncio.to_thread(fn, *args, **kwargs)
File "/root/autodl-tmp/conda-envs/xinference/lib/python3.10/asyncio/threads.py", line 25, in to_thread
return await loop.run_in_executor(None, func_call)
Reproduction steps
1. Go to '...'
2. Click on '....'
3. Scroll down to '....'
4. See error
Screenshots

Logs
No response
Browsers
No response
OS
No response
Additional information
No response