triton
triton copied to clipboard
fused_moe SIGSEGV
Hi im trying to run mixtral 8x7b on 8xL4 gpus. And im getting this error
*** SIGSEGV received at time=1724779802 on cpu 38 ***
PC: @ 0x5266a0 (unknown) (unknown)
@ 0x7fe5a8a9e520 (unknown) (unknown)
@ 0x7fe43009a2c0 (unknown) (unknown)
@ 0x95e040 (unknown) (unknown)
[2024-08-27 17:30:02,927 E 442885 442885] logging.cc:440: *** SIGSEGV received at time=1724779802 on cpu 38 ***
[2024-08-27 17:30:02,931 E 442885 442885] logging.cc:440: PC: @ 0x5266a0 (unknown) (unknown)
[2024-08-27 17:30:02,931 E 442885 442885] logging.cc:440: @ 0x7fe5a8a9e520 (unknown) (unknown)
[2024-08-27 17:30:02,936 E 442885 442885] logging.cc:440: @ 0x7fe43009a2c0 (unknown) (unknown)
[2024-08-27 17:30:02,945 E 442885 442885] logging.cc:440: @ 0x95e040 (unknown) (unknown)
Fatal Python error: Segmentation fault
Stack (most recent call first):
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/triton/compiler/code_generator.py", line 1234 in ast_to_ttir
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/triton/compiler/compiler.py", line 117 in make_ir
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/triton/compiler/compiler.py", line 191 in compile
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/triton/runtime/jit.py", line 416 in run
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/triton/runtime/jit.py", line 167 in <lambda>
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/model_executor/layers/fused_moe/fused_moe.py", line 245 in invoke_fused_moe_kernel
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/model_executor/layers/fused_moe/fused_moe.py", line 427 in fused_experts
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/model_executor/layers/fused_moe/fused_moe.py", line 515 in fused_moe
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/model_executor/models/mixtral.py", line 271 in forward
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1541 in _call_impl
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1532 in _wrapped_call_impl
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/model_executor/models/mixtral.py", line 424 in forward
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1541 in _call_impl
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1532 in _wrapped_call_impl
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/model_executor/models/mixtral.py", line 468 in forward
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1541 in _call_impl
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1532 in _wrapped_call_impl
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/model_executor/models/mixtral.py", line 535 in forward
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1541 in _call_impl
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1532 in _wrapped_call_impl
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/worker/model_runner.py", line 738 in execute_model
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 115 in decorate_context
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/worker/model_runner.py", line 833 in profile_run
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 115 in decorate_context
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/worker/worker.py", line 154 in determine_num_available_blocks
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 115 in decorate_context
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/worker/worker_base.py", line 140 in execute_method
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/executor/ray_gpu_executor.py", line 246 in _run_workers
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/executor/distributed_gpu_executor.py", line 38 in determine_num_available_blocks
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/engine/llm_engine.py", line 313 in _initialize_kv_caches
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/engine/llm_engine.py", line 236 in __init__
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 470 in _init_engine
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 349 in __init__
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 395 in from_engine_args
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-15317f08-7af2-4341-9fb8-4992e86141e4/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 196 in <module>
File "<frozen runpy>", line 88 in _run_code
File "<frozen runpy>", line 198 in _run_module_as_main
Extension modules: numpy.core._multiarray_umath, numpy.core._multiarray_tests, numpy.linalg._umath_linalg, numpy.fft._pocketfft_internal, numpy.random._common, numpy.random.bit_generator, numpy.random._bounded_integers, numpy.random._mt19937, numpy.random.mtrand, numpy.random._philox, numpy.random._pcg64, numpy.random._sfc64, numpy.random._generator, torch._C, torch._C._fft, torch._C._linalg, torch._C._nested, torch._C._nn, torch._C._sparse, torch._C._special, _brotli, simplejson._speedups, yaml._yaml, psutil._psutil_linux, psutil._psutil_posix, msgpack._cmsgpack, google._upb._message, setproctitle, uvloop.loop, ray._raylet, pvectorc, sentencepiece._sentencepiece, ujson, regex._regex, scipy._lib._ccallback_c, numba.core.typeconv._typeconv, numba._helperlib, numba._dynfunc, numba._dispatcher, numba.core.runtime._nrt_python, numba.np.ufunc._internal, numba.experimental.jitclass._box, snappy._snappy, lz4._version, lz4.frame._frame, pandas._libs.tslibs.np_datetime, pandas._libs.tslibs.dtypes, pandas._libs.tslibs.base, pandas._libs.tslibs.nattype, pandas._libs.tslibs.timezones, pandas._libs.tslibs.tzconversion, pandas._libs.tslibs.ccalendar, pandas._libs.tslibs.fields, pandas._libs.tslibs.timedeltas, pandas._libs.tslibs.timestamps, pandas._libs.properties, pandas._libs.tslibs.offsets, pandas._libs.tslibs.parsing, pandas._libs.tslibs.conversion, pandas._libs.tslibs.period, pandas._libs.tslibs.vectorized, pandas._libs.ops_dispatch, pandas._libs.missing, pandas._libs.hashtable, pandas._libs.algos, pandas._libs.interval, pandas._libs.tslib, pandas._libs.lib, pandas._libs.hashing, pyarrow.lib, pyarrow._hdfsio, pandas._libs.ops, pyarrow._compute, pandas._libs.arrays, pandas._libs.index, pandas._libs.join, pandas._libs.sparse, pandas._libs.reduction, pandas._libs.indexing, pandas._libs.internals, pandas._libs.writers, pandas._libs.window.aggregations, pandas._libs.window.indexers, pandas._libs.reshape, pandas._libs.tslibs.strptime, pandas._libs.groupby, pandas._libs.testing, pandas._libs.parsers, pandas._libs.json, _cffi_backend, pyarrow._parquet, pyarrow._fs, pyarrow._hdfs, pyarrow._gcsfs, pyarrow._s3fs, multidict._multidict, yarl._quoting_c, aiohttp._helpers, aiohttp._http_writer, aiohttp._http_parser, aiohttp._websocket, frozenlist._frozenlist, xxhash._xxhash, pyarrow._json, markupsafe._speedups, PIL._imaging, h5py._errors, h5py.defs, h5py._objects, h5py.h5, h5py.utils, h5py.h5t, h5py.h5s, h5py.h5ac, h5py.h5p, h5py.h5r, h5py._proxy, h5py._conv, h5py.h5z, h5py.h5a, h5py.h5d, h5py.h5ds, h5py.h5g, h5py.h5i, h5py.h5f, h5py.h5fd, h5py.h5pl, h5py.h5o, h5py.h5l, h5py._selector, scipy.sparse._sparsetools, _csparsetools, scipy.sparse._csparsetools, scipy.sparse.linalg._isolve._iterative, scipy.linalg._fblas, scipy.linalg._flapack, scipy.linalg.cython_lapack, scipy.linalg._cythonized_array_utils, scipy.linalg._solve_toeplitz, scipy.linalg._decomp_lu_cython, scipy.linalg._matfuncs_sqrtm_triu, scipy.linalg.cython_blas, scipy.linalg._matfuncs_expm, scipy.linalg._decomp_update, scipy.linalg._flinalg, scipy.sparse.linalg._dsolve._superlu, scipy.sparse.linalg._eigen.arpack._arpack, scipy.sparse.csgraph._tools, scipy.sparse.csgraph._shortest_path, scipy.sparse.csgraph._traversal, scipy.sparse.csgraph._min_spanning_tree, scipy.sparse.csgraph._flow, scipy.sparse.csgraph._matching, scipy.sparse.csgraph._reordering, grpc._cython.cygrpc, cuda_utils (total: 156)