lightllm
lightllm copied to clipboard
调用报错
/usr/bin/ld: skipping incompatible /usr/lib32/libcuda.so when searching for -lcuda
/usr/bin/ld: cannot find -lcuda: No such file or directory
/usr/bin/ld: skipping incompatible /usr/lib32/libcuda.so when searching for -lcuda
collect2: error: ld returned 1 exit status
Task exception was never retrieved
future: <Task finished name='Task-5' coro=<RouterManager.loop_for_fwd() done, defined at /home/house365ai/xxm/lightllm/lightllm/server/router/manager.py:84> exception=CalledProcessError(1, ['/usr/bin/gcc', '/tmp/tmpchhdqwt0/main.c', '-O3', '-I/usr/local/cuda-11.8/include', '-I/home/house365ai/.conda/envs/lightllm/include/python3.10', '-I/tmp/tmpchhdqwt0', '-shared', '-fPIC', '-lcuda', '-o', '/tmp/tmpchhdqwt0/_rms_norm_fwd_fused.cpython-310-x86_64-linux-gnu.so', '-L/usr/lib32'])>
Traceback (most recent call last):
File "
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/house365ai/xxm/lightllm/lightllm/server/router/manager.py", line 87, in loop_for_fwd
await self._step()
File "/home/house365ai/xxm/lightllm/lightllm/server/router/manager.py", line 106, in _step
await self._prefill_batch(self.running_batch)
File "/home/house365ai/xxm/lightllm/lightllm/server/router/manager.py", line 139, in _prefill_batch
ans = await asyncio.gather(*rets)
File "/home/house365ai/xxm/lightllm/lightllm/server/router/model_infer/model_rpc.py", line 182, in prefill_batch
ans = self._prefill_batch(batch_id)
File "/home/house365ai/xxm/lightllm/lightllm/utils/infer_utils.py", line 49, in inner_func
result = func(*args, **kwargs)
File "/home/house365ai/xxm/lightllm/lightllm/server/router/model_infer/model_rpc.py", line 67, in exposed_prefill_batch
return self.forward(batch_id, is_prefill=True)
File "/home/house365ai/xxm/lightllm/lightllm/server/router/model_infer/model_rpc.py", line 118, in forward
logits = self.model.forward(**kwargs)
File "/home/house365ai/.conda/envs/lightllm/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/house365ai/xxm/lightllm/lightllm/models/llama/layer_infer/model.py", line 103, in forward
predict_logics = self._context_forward(input_ids, infer_state)
File "/home/house365ai/xxm/lightllm/lightllm/models/llama/layer_infer/model.py", line 141, in _context_forward
input_embs = self.layers_infer[i].context_forward(input_embs, infer_state, self.trans_layers_weight[i])
File "/home/house365ai/xxm/lightllm/lightllm/models/llama/layer_infer/transformer_layer_inference.py", line 103, in context_forward
self._context_flash_attention(input_embdings,
File "/home/house365ai/xxm/lightllm/lightllm/utils/infer_utils.py", line 21, in time_func
ans = func(*args, **kwargs)
File "/home/house365ai/xxm/lightllm/lightllm/models/llama/layer_infer/transformer_layer_inference.py", line 49, in context_flash_attention
input1 = rmsnorm_forward(input_embding, weight=layer_weight.input_layernorm, eps=self.layer_norm_eps)
File "/home/house365ai/xxm/lightllm/lightllm/models/llama/triton_kernel/rmsnorm.py", line 59, in rmsnorm_forward
_rms_norm_fwd_fused[(M,)](x_arg, y, weight,
File "/home/house365ai/.conda/envs/lightllm/lib/python3.10/site-packages/triton-2.0.0.dev20221202-py3.10-linux-x86_64.egg/triton/runtime/jit.py", line 106, in launcher
return self.run(*args, grid=grid, **kwargs)
File "
@xxm1668 hello, 请问你用的什么gpu,还有环境大概是怎样的。从日志上看感觉是缺一些cuda依赖。
A100
cuda 11.8,python 3.10,通过pip install -r requirement.txt安装的
@xxm1668
你可以尝试编译一下这个代码 gcc a.c -o a -lcuda
#include <stdio.h>
extern void cuInit();
int main()
{
printf("%x\n", &cuInit);
return 0;
}
看看编译是否报错,运行是否报错。
如果编译运行正常,可以看一下ldd ./a
的输出结果,也许可以定位到正确的libcuda.so
的位置。这里是我的输出
linux-vdso.so.1 => (0x00007ffe235ef000)
libcuda.so.1 => /lib64/libcuda.so.1 (0x00007f2a5da70000)
libc.so.6 => /lib64/libc.so.6 (0x00007f2a5d6a2000)
libm.so.6 => /lib64/libm.so.6 (0x00007f2a5d3a0000)
libdl.so.2 => /lib64/libdl.so.2 (0x00007f2a5d19c000)
libpthread.so.0 => /lib64/libpthread.so.0 (0x00007f2a5cf80000)
librt.so.1 => /lib64/librt.so.1 (0x00007f2a5cd78000)
/lib64/ld-linux-x86-64.so.2 (0x00007f2a5f72b000)
AttributeError: /usr/local/cuda-11.8/lib64/libcudart.so: undefined symbol: cuInit
/usr/local/cuda-11.8/targets/x86_64-linux/lib/stubs/libcuda.so /usr/local/cuda-12.2/targets/x86_64-linux/lib/stubs/libcuda.so /usr/lib32/libcuda.so
/usr/bin/ld: skipping incompatible /usr/lib32/libcuda.so when searching for -lcuda
Task exception was never retrieved
future: <Task finished name='Task-5' coro=<RouterManager.loop_for_fwd() done, defined at /home/house365ai/xxm/lightllm/lightllm/server/router/manager.py:84> exception=RuntimeError('unimplemented code: 34')>
Traceback (most recent call last):
File "
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/house365ai/xxm/lightllm/lightllm/server/router/manager.py", line 87, in loop_for_fwd
await self._step()
File "/home/house365ai/xxm/lightllm/lightllm/server/router/manager.py", line 106, in _step
await self._prefill_batch(self.running_batch)
File "/home/house365ai/xxm/lightllm/lightllm/server/router/manager.py", line 139, in _prefill_batch
ans = await asyncio.gather(*rets)
File "/home/house365ai/xxm/lightllm/lightllm/server/router/model_infer/model_rpc.py", line 182, in prefill_batch
ans = self._prefill_batch(batch_id)
File "/home/house365ai/xxm/lightllm/lightllm/utils/infer_utils.py", line 49, in inner_func
result = func(*args, **kwargs)
File "/home/house365ai/xxm/lightllm/lightllm/server/router/model_infer/model_rpc.py", line 67, in exposed_prefill_batch
return self.forward(batch_id, is_prefill=True)
File "/home/house365ai/xxm/lightllm/lightllm/server/router/model_infer/model_rpc.py", line 118, in forward
logits = self.model.forward(**kwargs)
File "/home/house365ai/.conda/envs/lightllm/lib/python3.10/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/house365ai/xxm/lightllm/lightllm/models/llama/layer_infer/model.py", line 103, in forward
predict_logics = self._context_forward(input_ids, infer_state)
File "/home/house365ai/xxm/lightllm/lightllm/models/llama/layer_infer/model.py", line 141, in _context_forward
input_embs = self.layers_infer[i].context_forward(input_embs, infer_state, self.trans_layers_weight[i])
File "/home/house365ai/xxm/lightllm/lightllm/models/llama/layer_infer/transformer_layer_inference.py", line 103, in context_forward
self._context_flash_attention(input_embdings,
File "/home/house365ai/xxm/lightllm/lightllm/utils/infer_utils.py", line 21, in time_func
ans = func(*args, **kwargs)
File "/home/house365ai/xxm/lightllm/lightllm/models/llama/layer_infer/transformer_layer_inference.py", line 49, in context_flash_attention
input1 = rmsnorm_forward(input_embding, weight=layer_weight.input_layernorm, eps=self.layer_norm_eps)
File "/home/house365ai/xxm/lightllm/lightllm/models/llama/triton_kernel/rmsnorm.py", line 59, in rmsnorm_forward
_rms_norm_fwd_fused[(M,)](x_arg, y, weight,
File "/home/house365ai/.conda/envs/lightllm/lib/python3.10/site-packages/triton-2.0.0.dev20221202-py3.10-linux-x86_64.egg/triton/runtime/jit.py", line 106, in launcher
return self.run(*args, grid=grid, **kwargs)
File "
@llehtahw
gcc a.c -o a -lcuda
#include <stdio.h> extern void cuInit(); int main() { printf("%x\n", &cuInit); return 0; }
AttributeError: /usr/local/cuda-11.8/lib64/libcudart.so: undefined symbol: cuInit
按照上面给的gcc命令行,这里不应该有AttributeError
,也不依赖libcudart
,方便透露一下你这里是如何编译的吗。
/usr/local/cuda-11.8/targets/x86_64-linux/lib/stubs/libcuda.so /usr/local/cuda-12.2/targets/x86_64-linux/lib/stubs/libcuda.so /usr/lib32/libcuda.so
这几个libcuda.so应该都是不可用的。建议按照我给的方法,或者参考其他方法,找到机器上可用的库文件
https://github.com/ModelTC/lightllm/issues/16#issuecomment-1665233258 ... RuntimeError: unimplemented code: 34
这应该是因为强行链了stub里的libcuda.so,这是不可用的,建议再检查一下你的编译环境。你的nvcc可以正常工作,编译运行cuda程序吗?
参考 https://github.com/openai/triton/blob/8650b4d1cbc750d659156e2c17a058736614827b/lib/driver/error.cc#L93 https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html
CUDA_ERROR_STUB_LIBRARY = 34
This indicates that the CUDA driver that the application has loaded is a stub library. Applications that run with the stub rather than a real driver loaded will result in CUDA API returning this error.
@xxm1668
I encountered the exact same problem; my code works well on an AWS g5 instance, but it doesn't work on an AWS g4 large instance. I compiled the code:
#include <stdio.h>
extern void cuInit();
int main()
{
printf("%x\n", &cuInit);
return 0;
}
and run ldd ./a
It shows:
linux-vdso.so.1 (0x00007ffc15be6000)
libcuda.so.1 => /lib64/libcuda.so.1 (0x00007ff6ef52a000)
libc.so.6 => /lib64/libc.so.6 (0x00007ff6ef17d000)
libm.so.6 => /lib64/libm.so.6 (0x00007ff6eee3d000)
libdl.so.2 => /lib64/libdl.so.2 (0x00007ff6eec39000)
libpthread.so.0 => /lib64/libpthread.so.0 (0x00007ff6eea1b000)
librt.so.1 => /lib64/librt.so.1 (0x00007ff6ee813000)
/lib64/ld-linux-x86-64.so.2 (0x00007ff6f120a000)
@llehtahw
https://github.com/ModelTC/lightllm/issues/16#issuecomment-1670362288
May I have your error log please?
- If
/usr/bin/ld: cannot find -lcuda: No such file or directory
was reported- check if there's any
libcuda.so
(without any suffix as.1
) file (or symbol link) in one of these paths:-
/usr/lib/x86_64-linux-gnu
-
/lib32/
-
/lib64/
- any other path in env var LD_LIBRARY_PATH
-
- ensure a symbol link to
/lib64/libcuda.so.1
with file namelibcuda.so
in one of above paths
- check if there's any
- If
RuntimeError: unimplemented code: 34
was raised-
export LD_PRELOAD=/lib64/libcuda.so.1
before launching lightllm
-
- If other error showed up, we shall look into the error log
@andy-yang-1
After launching the server:
Using a slow tokenizer. This might cause a significant slowdown. Consider using a fast tokenizer instead.
Using a slow tokenizer. This might cause a significant slowdown. Consider using a fast tokenizer instead.
INFO: Started server process [27287]
INFO: Waiting for application startup.
INFO: Application startup complete.
INFO: Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
After curl request:
Task exception was never retrieved
future: <Task finished name='Task-8' coro=<RouterManager.loop_for_fwd() done, defined at /home/ec2-user/long/lightllm/lightllm/server/router/manager.py:83> exception=Triton Error [CUDA]: invalid argument
========= Remote Traceback (1) =========
Traceback (most recent call last):
File "<string>", line 21, in _fwd_kernel
KeyError: ('2-.-0-.-0-83ca8b715a9dc5f32dc1110973485f64-2b0c5161c53c71b37ae20a9996ee4bb8-c1f92808b4e4644c1732e8338187ac87-f24b6aa9b101a518b6a4a6bddded372e-12f7ac1ca211e037f62a7c0c323d9990-5c5e32ff210f3b7f56c98ca29917c25e-06f0df2d61979d629033f4a22eff5198-0dd03b0bd512a184b3512b278d9dfa59-d35ab04ae841e2714a253c523530b071', (torch.float16, torch.float16, torch.float16, 'fp32', torch.int32, torch.int32, torch.float32, torch.float16, 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32'), (128, 128, 128), (True, True, True, (False,), True, True, True, True, (True, False), (True, False), (False, True), (True, False), (True, False), (False, True), (True, False), (True, False), (False, True), (True, False), (True, False), (False, True), (True, False), (False, False), (False, True), (False, True)))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/conda/envs/light/lib/python3.9/site-packages/rpyc/core/protocol.py", line 359, in _dispatch_request
res = self._HANDLERS[handler](self, *args)
File "/opt/conda/envs/light/lib/python3.9/site-packages/rpyc/core/protocol.py", line 837, in _handle_call
return obj(*args, **dict(kwargs))
File "/home/ec2-user/long/lightllm/lightllm/utils/infer_utils.py", line 49, in inner_func
result = func(*args, **kwargs)
File "/home/ec2-user/long/lightllm/lightllm/server/router/model_infer/model_rpc.py", line 71, in exposed_prefill_batch
return self.forward(batch_id, is_prefill=True)
File "/home/ec2-user/long/lightllm/lightllm/server/router/model_infer/model_rpc.py", line 122, in forward
logits = self.model.forward(**kwargs)
File "/opt/conda/envs/light/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/ec2-user/long/lightllm/lightllm/models/llama2/layer_infer/model.py", line 109, in forward
predict_logics = self._context_forward(input_ids, infer_state)
File "/home/ec2-user/long/lightllm/lightllm/models/llama2/layer_infer/model.py", line 147, in _context_forward
input_embs = self.layers_infer[i].context_forward(input_embs, infer_state, self.trans_layers_weight[i])
File "/home/ec2-user/long/lightllm/lightllm/models/llama2/layer_infer/transformer_layer_inference.py", line 111, in context_forward
self._context_flash_attention(input_embdings,
File "/home/ec2-user/long/lightllm/lightllm/utils/infer_utils.py", line 21, in time_func
ans = func(*args, **kwargs)
File "/home/ec2-user/long/lightllm/lightllm/models/llama2/layer_infer/transformer_layer_inference.py", line 69, in _context_flash_attention
context_attention_fwd(q.view(calcu_shape1),
File "/opt/conda/envs/light/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/ec2-user/long/lightllm/lightllm/models/llama2/triton_kernel/context_flashattention_nopad.py", line 234, in context_attention_fwd
_fwd_kernel[grid](
File "/opt/conda/envs/light/lib/python3.9/site-packages/triton/runtime/jit.py", line 106, in launcher
return self.run(*args, grid=grid, **kwargs)
File "<string>", line 43, in _fwd_kernel
RuntimeError: Triton Error [CUDA]: invalid argument
>
Traceback (most recent call last):
File "/home/ec2-user/long/lightllm/lightllm/server/router/manager.py", line 86, in loop_for_fwd
await self._step()
File "/home/ec2-user/long/lightllm/lightllm/server/router/manager.py", line 105, in _step
await self._prefill_batch(self.running_batch)
File "/home/ec2-user/long/lightllm/lightllm/server/router/manager.py", line 138, in _prefill_batch
ans = await asyncio.gather(*rets)
File "/home/ec2-user/long/lightllm/lightllm/server/router/model_infer/model_rpc.py", line 197, in prefill_batch
return await ans
File "/home/ec2-user/long/lightllm/lightllm/server/router/model_infer/model_rpc.py", line 159, in _func
return ans.value
File "/opt/conda/envs/light/lib/python3.9/site-packages/rpyc/core/async_.py", line 108, in value
raise self._obj
_get_exception_class.<locals>.Derived: Triton Error [CUDA]: invalid argument
========= Remote Traceback (1) =========
Traceback (most recent call last):
File "<string>", line 21, in _fwd_kernel
KeyError: ('2-.-0-.-0-83ca8b715a9dc5f32dc1110973485f64-2b0c5161c53c71b37ae20a9996ee4bb8-c1f92808b4e4644c1732e8338187ac87-f24b6aa9b101a518b6a4a6bddded372e-12f7ac1ca211e037f62a7c0c323d9990-5c5e32ff210f3b7f56c98ca29917c25e-06f0df2d61979d629033f4a22eff5198-0dd03b0bd512a184b3512b278d9dfa59-d35ab04ae841e2714a253c523530b071', (torch.float16, torch.float16, torch.float16, 'fp32', torch.int32, torch.int32, torch.float32, torch.float16, 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32'), (128, 128, 128), (True, True, True, (False,), True, True, True, True, (True, False), (True, False), (False, True), (True, False), (True, False), (False, True), (True, False), (True, False), (False, True), (True, False), (True, False), (False, True), (True, False), (False, False), (False, True), (False, True)))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/conda/envs/light/lib/python3.9/site-packages/rpyc/core/protocol.py", line 359, in _dispatch_request
res = self._HANDLERS[handler](self, *args)
File "/opt/conda/envs/light/lib/python3.9/site-packages/rpyc/core/protocol.py", line 837, in _handle_call
return obj(*args, **dict(kwargs))
File "/home/ec2-user/long/lightllm/lightllm/utils/infer_utils.py", line 49, in inner_func
result = func(*args, **kwargs)
File "/home/ec2-user/long/lightllm/lightllm/server/router/model_infer/model_rpc.py", line 71, in exposed_prefill_batch
return self.forward(batch_id, is_prefill=True)
File "/home/ec2-user/long/lightllm/lightllm/server/router/model_infer/model_rpc.py", line 122, in forward
logits = self.model.forward(**kwargs)
File "/opt/conda/envs/light/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/ec2-user/long/lightllm/lightllm/models/llama2/layer_infer/model.py", line 109, in forward
predict_logics = self._context_forward(input_ids, infer_state)
File "/home/ec2-user/long/lightllm/lightllm/models/llama2/layer_infer/model.py", line 147, in _context_forward
input_embs = self.layers_infer[i].context_forward(input_embs, infer_state, self.trans_layers_weight[i])
File "/home/ec2-user/long/lightllm/lightllm/models/llama2/layer_infer/transformer_layer_inference.py", line 111, in context_forward
self._context_flash_attention(input_embdings,
File "/home/ec2-user/long/lightllm/lightllm/utils/infer_utils.py", line 21, in time_func
ans = func(*args, **kwargs)
File "/home/ec2-user/long/lightllm/lightllm/models/llama2/layer_infer/transformer_layer_inference.py", line 69, in _context_flash_attention
context_attention_fwd(q.view(calcu_shape1),
File "/opt/conda/envs/light/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/ec2-user/long/lightllm/lightllm/models/llama2/triton_kernel/context_flashattention_nopad.py", line 234, in context_attention_fwd
_fwd_kernel[grid](
File "/opt/conda/envs/light/lib/python3.9/site-packages/triton/runtime/jit.py", line 106, in launcher
return self.run(*args, grid=grid, **kwargs)
File "<string>", line 43, in _fwd_kernel
RuntimeError: Triton Error [CUDA]: invalid argument
@llehtahw
RuntimeError: Triton Error [CUDA]: invalid argument
This seems to be another problem.
Could you please open a new issue on it, and post your system, cuda driver version, pip show triton
and lightllm commit id? @andy-yang-1
@llehtahw Many thanks for your help! New issue #34