cutlass
cutlass copied to clipboard
[BUG] Use of undeclared identifier '__hfma2' (3.9.1)
Describe the bug Error during compilation with LLVM Clang.
Steps/Code to reproduce bug Just include "cutlass/numeric_conversion.h"
Environment details (please complete the following information):
[1369/1739] /usr/lib/llvm/20/bin/clang++ -DAT_PER_OPERATOR_HEADERS
-DGFLAGS_IS_A_DLL=0 -DGLOG_CUSTOM_PREFIX_SUPPORT
-DHAVE_MALLOC_USABLE_SIZE=1 -DHAVE_MMAP=1 -DHAVE_SHM_OPEN=1
-DHAVE_SHM_UNLINK=1 -DMINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
-DONNXIFI_ENABLE_EXT=1 -DONNX_ML=1 -DONNX_NAMESPACE=onnx
-DPROTOBUF_USE_DLLS -DTORCH_CUDA_BUILD_MAIN_LIB -DUSE_CUDA
-DUSE_CUFILE -DUSE_EXTERNAL_MZCRC -D_FILE_OFFSET_BITS=64
-Dtorch_cuda_EXPORTS
-I/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0_build/aten/src
-I/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0/aten/src
-I/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0_build
-I/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0
-I/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0/nlohmann
-I/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0/aten/src/THC
-I/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0/aten/src/ATen/cuda
-I/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0/aten/src/ATen/../../../third_party/cutlass/include
-I/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0/aten/src/ATen/../../../third_party/cutlass/tools/util/include
-I/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0_build/caffe2/aten/src
-I/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0/aten/src/ATen/.. -I/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0/c10/cuda/../.. -I/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0/c10/.. -I/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0/torch/csrc/api
-I/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0/torch/csrc/api/include
-isystem /usr/include/eigen3 -isystem
/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0/INTERFACE
-isystem
/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0/third_party/nlohmann/include
-isystem /opt/cuda/include
-DLIBCUDACXX_ENABLE_SIMPLIFIED_COMPLEX_OPERATIONS
-D_GLIBCXX_USE_CXX11_ABI=1 -DCUB_WRAPPED_NAMESPACE=at_cuda_detail
-DCUDA_HAS_FP16=1 -D__CUDA_NO_HALF_OPERATORS__
-D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__
-D__CUDA_NO_BFLOAT16_CONVERSIONS__ -O2 -g -DNDEBUG -std=c++17
--cuda-gpu-arch=sm_52 --cuda-path=/opt/cuda -fPIC -Wall -Wextra
-Wdeprecated -Wno-unused-parameter -Wno-missing-field-initializers
-Wno-array-bounds -Wno-unknown-pragmas -Wno-strict-overflow
-Wno-strict-aliasing -Wunused-function -Wunused-variable
-Wunused-private-field -Wextra-semi -Wno-error=extra-semi -MD -MT
caffe2/CMakeFiles/torch_cuda.dir/__/aten/src/ATen/native/cuda/MixedDtypesLinear.cu.o
-MF
caffe2/CMakeFiles/torch_cuda.dir/__/aten/src/ATen/native/cuda/MixedDtypesLinear.cu.o.d
-x cuda -c
/var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0/aten/src/ATen/native/cuda/MixedDtypesLinear.cu
-o
caffe2/CMakeFiles/torch_cuda.dir/__/aten/src/ATen/native/cuda/MixedDtypesLinear.cu.o
Additional context
In file included from /var/tmp/portage/sci-ml/caffe2-2.7.0-r8882/work/pytorch-2.7.0/aten/src/ATen/native/cuda/MixedDtypesLinear.cu:14:
In file included from /usr/include/cutlass/gemm/device/gemm_universal_base.h:51:
In file included from /usr/include/cutlass/gemm/kernel/gemm_universal.h:45:
In file included from /usr/include/cutlass/gemm/kernel/gemm_universal.hpp:57:
In file included from /usr/include/cutlass/gemm/kernel/sm90_gemm_tma.hpp:38:
In file included from /usr/include/cutlass/epilogue/collective/detail.hpp:38:
In file included from /usr/include/cutlass/epilogue/dispatch_policy.hpp:33:
/usr/include/cutlass/numeric_conversion.h:5562:20: error: use of undeclared identifier '__hfma2'
5562 | fp16x2_val = __hfma2(fp16x2_val,
| ^
/usr/include/cutlass/numeric_conversion.h:5698:20: error: use of undeclared identifier '__hfma2'
5698 | fp16x2_val = __hfma2(fp16x2_val,
| ^
/usr/include/cutlass/numeric_conversion.h:5844:20: error: use of undeclared identifier '__hfma2'
5844 | fp16x2_val = __hfma2(hfma_scale, fp16x2_val, hfma_bias);
| ^
/usr/include/cutlass/numeric_conversion.h:5970:22: error: use of undeclared identifier '__hfma2'
5970 | fp16x2_val = __hfma2(fp16x2_val, reinterpret_cast<const __half2&>(hfma_scale), reinterpret_cast<const __half2&>(hfma_bias));
| ^
@Anoncheg1, please list your full CMake configuration and build commands as well as what changes you made to source. Thanks!
The issue was because of wrong architecture sm_52