quanto
quanto copied to clipboard
RuntimeError: Error building extension 'quanto_cuda':
Using Ultralytics and quantize(model, weights=qint4), raise error: RuntimeError: Error building extension 'quanto_cuda':
code:
from ultralytics import YOLO
from optimum.quanto import quantize, qint8, qint4, qfloat8, Calibration, freeze, quantization_map, requantize
from safetensors.torch import save_file, load_file
model = YOLO('/path/weights/best.pt')
quantize(model, weights=qint4)
freeze(model)
model.val(data="/path/datasets/dataset.yaml",device=[0],batch=1)
error:
/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers
warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning)
/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/timm/models/registry.py:4: FutureWarning: Importing from timm.models.registry is deprecated, please import via timm.models
warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.models", FutureWarning)
Ultralytics YOLOv8.0.197 🚀 Python-3.9.17 torch-2.6.0+cu118 CUDA:0 (NVIDIA GeForce RTX 3090, 24230MiB)
Traceback (most recent call last):
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/utils/cpp_extension.py", line 2209, in _run_ninja_build
subprocess.run(
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/subprocess.py", line 528, in run
raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/data4T/wmh/docker_tmp/home/data20T/SpikeYOLO/test_detect_int8.py", line 18, in <module>
model.val(data="/home/data20T/SpikeYOLO/ultralytics-main/ultralytics/cfg/datasets/kg_detect_4.yaml",device=[0],batch=1)
File "/home/data4T/wmh/docker_tmp/home/data20T/SpikeYOLO/ultralytics/engine/model.py", line 285, in val
validator(model=self.model)
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/home/data4T/wmh/docker_tmp/home/data20T/SpikeYOLO/ultralytics/engine/validator.py", line 125, in __call__
model = AutoBackend(model or self.args.model,
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/home/data4T/wmh/docker_tmp/home/data20T/SpikeYOLO/ultralytics/nn/autobackend.py", line 114, in __init__
model = weights.to(device)
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1343, in to
return self._apply(convert)
File "/home/data4T/wmh/docker_tmp/home/data20T/SpikeYOLO/ultralytics/nn/tasks.py", line 216, in _apply
self = super()._apply(fn)
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/nn/modules/module.py", line 903, in _apply
module._apply(fn)
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/nn/modules/module.py", line 903, in _apply
module._apply(fn)
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/nn/modules/module.py", line 903, in _apply
module._apply(fn)
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/nn/modules/module.py", line 931, in _apply
p_should_use_set_data = compute_should_use_set_data(param, param_applied)
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/nn/modules/module.py", line 906, in compute_should_use_set_data
if torch._has_compatible_shallow_copy_type(tensor, tensor_applied):
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/tensor/weights/qbits.py", line 272, in __torch_function__
return func(*args, **kwargs)
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/tensor/weights/qbits.py", line 302, in __torch_dispatch__
return qfallback(op, *args, **kwargs)
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/tensor/qtensor.py", line 28, in qfallback
args, kwargs = pytree.tree_map_only(QTensor, lambda x: x.dequantize(), (args, kwargs or {}))
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/utils/_pytree.py", line 1163, in tree_map_only
return tree_map(map_only(__type_or_types_or_pred)(func), tree, is_leaf=is_leaf)
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/utils/_pytree.py", line 991, in tree_map
return treespec.unflatten(map(func, *flat_args))
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/utils/_pytree.py", line 830, in unflatten
leaves = list(leaves)
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/utils/_pytree.py", line 1109, in wrapped
return func(x)
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/tensor/qtensor.py", line 28, in <lambda>
args, kwargs = pytree.tree_map_only(QTensor, lambda x: x.dequantize(), (args, kwargs or {}))
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/tensor/qbits.py", line 68, in dequantize
return QBitsDequantizer.apply(self)
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/autograd/function.py", line 575, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/tensor/qbits.py", line 31, in forward
data = t._data.unpack()
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/tensor/packed.py", line 101, in unpack
unpacked_data = torch.ops.quanto.unpack(self._data, self._bits)
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/_ops.py", line 1123, in __call__
return self._op(*args, **(kwargs or {}))
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/library/extensions/cuda/__init__.py", line 79, in unpack_cuda
return ext.lib.unpack(t, bits)
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/library/extensions/extension.py", line 44, in lib
self._lib = load(
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/utils/cpp_extension.py", line 1380, in load
return _jit_compile(
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/utils/cpp_extension.py", line 1798, in _jit_compile
_write_ninja_file_and_build_library(
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/utils/cpp_extension.py", line 1926, in _write_ninja_file_and_build_library
_run_ninja_build(
File "/home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/utils/cpp_extension.py", line 2225, in _run_ninja_build
raise RuntimeError(message) from e
RuntimeError: Error building extension 'quanto_cuda': [1/9] :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output unpack.cuda.o.d -DTORCH_EXTENSION_NAME=quanto_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/TH -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/THC -isystem :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/include -isystem /home/wmh/miniforge3/envs/yolo/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' --expt-extended-lambda --use_fast_math -DQUANTO_CUDA_ARCH=860 -std=c++17 -c /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/library/extensions/cuda/unpack.cu -o unpack.cuda.o
FAILED: unpack.cuda.o
:/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output unpack.cuda.o.d -DTORCH_EXTENSION_NAME=quanto_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/TH -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/THC -isystem :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/include -isystem /home/wmh/miniforge3/envs/yolo/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' --expt-extended-lambda --use_fast_math -DQUANTO_CUDA_ARCH=860 -std=c++17 -c /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/library/extensions/cuda/unpack.cu -o unpack.cuda.o
/bin/sh: 1: :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc: not found
[2/9] :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output gemm_cuda.cuda.o.d -DTORCH_EXTENSION_NAME=quanto_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/TH -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/THC -isystem :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/include -isystem /home/wmh/miniforge3/envs/yolo/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' --expt-extended-lambda --use_fast_math -DQUANTO_CUDA_ARCH=860 -std=c++17 -c /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/library/extensions/cuda/awq/v2/gemm_cuda.cu -o gemm_cuda.cuda.o
FAILED: gemm_cuda.cuda.o
:/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output gemm_cuda.cuda.o.d -DTORCH_EXTENSION_NAME=quanto_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/TH -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/THC -isystem :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/include -isystem /home/wmh/miniforge3/envs/yolo/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' --expt-extended-lambda --use_fast_math -DQUANTO_CUDA_ARCH=860 -std=c++17 -c /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/library/extensions/cuda/awq/v2/gemm_cuda.cu -o gemm_cuda.cuda.o
/bin/sh: 1: :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc: not found
[3/9] :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output gemv_cuda.cuda.o.d -DTORCH_EXTENSION_NAME=quanto_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/TH -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/THC -isystem :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/include -isystem /home/wmh/miniforge3/envs/yolo/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' --expt-extended-lambda --use_fast_math -DQUANTO_CUDA_ARCH=860 -std=c++17 -c /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/library/extensions/cuda/awq/v2/gemv_cuda.cu -o gemv_cuda.cuda.o
FAILED: gemv_cuda.cuda.o
:/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output gemv_cuda.cuda.o.d -DTORCH_EXTENSION_NAME=quanto_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/TH -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/THC -isystem :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/include -isystem /home/wmh/miniforge3/envs/yolo/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' --expt-extended-lambda --use_fast_math -DQUANTO_CUDA_ARCH=860 -std=c++17 -c /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/library/extensions/cuda/awq/v2/gemv_cuda.cu -o gemv_cuda.cuda.o
/bin/sh: 1: :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc: not found
[4/9] :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output fp8_marlin.cuda.o.d -DTORCH_EXTENSION_NAME=quanto_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/TH -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/THC -isystem :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/include -isystem /home/wmh/miniforge3/envs/yolo/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' --expt-extended-lambda --use_fast_math -DQUANTO_CUDA_ARCH=860 -std=c++17 -c /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/library/extensions/cuda/marlin/fp8_marlin.cu -o fp8_marlin.cuda.o
FAILED: fp8_marlin.cuda.o
:/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output fp8_marlin.cuda.o.d -DTORCH_EXTENSION_NAME=quanto_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/TH -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/THC -isystem :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/include -isystem /home/wmh/miniforge3/envs/yolo/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' --expt-extended-lambda --use_fast_math -DQUANTO_CUDA_ARCH=860 -std=c++17 -c /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/library/extensions/cuda/marlin/fp8_marlin.cu -o fp8_marlin.cuda.o
/bin/sh: 1: :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc: not found
[5/9] :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output gptq_marlin_repack.cuda.o.d -DTORCH_EXTENSION_NAME=quanto_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/TH -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/THC -isystem :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/include -isystem /home/wmh/miniforge3/envs/yolo/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' --expt-extended-lambda --use_fast_math -DQUANTO_CUDA_ARCH=860 -std=c++17 -c /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/library/extensions/cuda/marlin/gptq_marlin_repack.cu -o gptq_marlin_repack.cuda.o
FAILED: gptq_marlin_repack.cuda.o
:/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output gptq_marlin_repack.cuda.o.d -DTORCH_EXTENSION_NAME=quanto_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/TH -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/THC -isystem :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/include -isystem /home/wmh/miniforge3/envs/yolo/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' --expt-extended-lambda --use_fast_math -DQUANTO_CUDA_ARCH=860 -std=c++17 -c /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/library/extensions/cuda/marlin/gptq_marlin_repack.cu -o gptq_marlin_repack.cuda.o
/bin/sh: 1: :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc: not found
[6/9] :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output marlin_cuda_kernel.cuda.o.d -DTORCH_EXTENSION_NAME=quanto_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/TH -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/THC -isystem :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/include -isystem /home/wmh/miniforge3/envs/yolo/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' --expt-extended-lambda --use_fast_math -DQUANTO_CUDA_ARCH=860 -std=c++17 -c /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/library/extensions/cuda/marlin/marlin_cuda_kernel.cu -o marlin_cuda_kernel.cuda.o
FAILED: marlin_cuda_kernel.cuda.o
:/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output marlin_cuda_kernel.cuda.o.d -DTORCH_EXTENSION_NAME=quanto_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/TH -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/THC -isystem :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/include -isystem /home/wmh/miniforge3/envs/yolo/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' --expt-extended-lambda --use_fast_math -DQUANTO_CUDA_ARCH=860 -std=c++17 -c /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/library/extensions/cuda/marlin/marlin_cuda_kernel.cu -o marlin_cuda_kernel.cuda.o
/bin/sh: 1: :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/bin/nvcc: not found
[7/9] c++ -MMD -MF marlin_cuda.o.d -DTORCH_EXTENSION_NAME=quanto_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/TH -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/THC -isystem :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/include -isystem /home/wmh/miniforge3/envs/yolo/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -g -O3 -c /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/library/extensions/cuda/marlin/marlin_cuda.cpp -o marlin_cuda.o
[8/9] c++ -MMD -MF pybind_module.o.d -DTORCH_EXTENSION_NAME=quanto_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/TH -isystem /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/torch/include/THC -isystem :/usr/local/cuda:/usr/local/cuda:/usr/local/cuda/include -isystem /home/wmh/miniforge3/envs/yolo/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -g -O3 -c /home/wmh/miniforge3/envs/yolo/lib/python3.9/site-packages/optimum/quanto/library/extensions/cuda/pybind_module.cpp -o pybind_module.o
ninja: build stopped: subcommand failed.
Is there anything I set wrong? Need help!
This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days.
This issue was closed because it has been stalled for 5 days with no activity.
I encountered the same problem as you, did you solve it?