Building from source fails
Trying to build with gcc 12 and CUDA toolkit 12.2 on a Linux x86_64 machine. It fails with
CMakeFiles/fbgemm_gpu_py.dir/src/intraining_embedding_pruning_ops/intraining_embedding_pruning.cu.o
ccache /home/isuruf/.conda/envs/pytorch-dev/bin/nvcc -forward-unknown-to-host-compiler -Dfbgemm_gpu_py_EXPORTS -I/home/isuruf/git/FBGEMM/fbgemm_gpu/../include -I/home/isuruf/git/FBGEMM/fbgemm_gpu -I/home/isuruf/git/FBGEMM/fbgemm_gpu/include -I/home/isuruf/git/FBGEMM/fbgemm_gpu/../external/asmjit/src -I/home/isuruf/git/FBGEMM/fbgemm_gpu/../external/cpuinfo/include -I/home/isuruf/git/FBGEMM/fbgemm_gpu/../external/cutlass/include -I/home/isuruf/git/FBGEMM/fbgemm_gpu/../external/cutlass/tools/util/include -I/home/isuruf/git/FBGEMM/fbgemm_gpu/../external/json/include -isystem /home/isuruf/git/pytorch2/torch/include -isystem /home/isuruf/git/pytorch2/torch/include/torch/csrc/api/include -isystem /home/isuruf/git/pytorch2/torch/include -isystem /home/isuruf/git/pytorch2/torch/include/torch/csrc/api/include -isystem /home/isuruf/.conda/envs/pytorch-dev/targets/x86_64-linux/include -DONNX_NAMESPACE=onnx_c2 -gencode arch=compute_75,code=sm_75 -Xcudafe --diag_suppress=cc_clobber_ignored,--diag_suppress=field_without_dll_interface,--diag_suppress=base_class_has_different_dll_interface,--diag_suppress=dll_interface_conflict_none_assumed,--diag_suppress=dll_interface_conflict_dllexport_assumed,--diag_suppress=bad_friend_decl --expt-relaxed-constexpr --expt-extended-lambda -O3 -DNDEBUG -std=c++20 -Xcompiler=-fPIC -Wno-deprecated-anon-enum-enum-conversion -Wno-deprecated-declarations -D_GLIBCXX_USE_CXX11_ABI=1 --expt-relaxed-constexpr -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ -MD -MT CMakeFiles/fbgemm_gpu_py.dir/src/intraining_embedding_pruning_ops/intraining_embedding_pruning.cu.o -MF CMakeFiles/fbgemm_gpu_py.dir/src/intraining_embedding_pruning_ops/intraining_embedding_pruning.cu.o.d -x cu -c /home/isuruf/git/FBGEMM/fbgemm_gpu/src/intraining_embedding_pruning_ops/intraining_embedding_pruning.cu -o CMakeFiles/fbgemm_gpu_py.dir/src/intraining_embedding_pruning_ops/intraining_embedding_pruning.cu.o
/home/isuruf/.conda/envs/pytorch-dev/targets/x86_64-linux/include/cuda_bf16.hpp(2185): error: no suitable conversion function from "const __nv_bfloat16" to "unsigned short" exists
const __nv_bfloat16_raw araw = __nv_bfloat16_raw(a);
^
/home/isuruf/.conda/envs/pytorch-dev/targets/x86_64-linux/include/cuda_bf16.hpp(2272): error: no suitable conversion function from "const __nv_bfloat16" to "unsigned short" exists
{ return static_cast<short int>(__nv_bfloat16_raw(h).x); }
^
/home/isuruf/.conda/envs/pytorch-dev/targets/x86_64-linux/include/cuda_bf16.hpp(2280): error: no suitable conversion function from "const __nv_bfloat16" to "unsigned short" exists
{ return __nv_bfloat16_raw(h).x; }
^
/home/isuruf/.conda/envs/pytorch-dev/targets/x86_64-linux/include/cuda_bf16.hpp(4103): error: no suitable conversion function from "const __nv_bfloat16" to "unsigned short" exists
{ __nv_bfloat16 maxval; maxval = (__hge(a, b) || __hisnan(b)) ? a : b; if (__hisnan(maxval)) { maxval = __ushort_as_bfloat16((unsigned short)0x7FFFU); } else if (__heq(a, b)) { __nv_bfloat16_raw ra = __nv_bfloat16_raw(a); __nv_bfloat16_raw rb = __nv_bfloat16_raw(b); maxval = (ra.x > rb.x) ? b : a; } return maxval; }
^
/home/isuruf/.conda/envs/pytorch-dev/targets/x86_64-linux/include/cuda_bf16.hpp(4103): error: no suitable conversion function from "const __nv_bfloat16" to "unsigned short" exists
{ __nv_bfloat16 maxval; maxval = (__hge(a, b) || __hisnan(b)) ? a : b; if (__hisnan(maxval)) { maxval = __ushort_as_bfloat16((unsigned short)0x7FFFU); } else if (__heq(a, b)) { __nv_bfloat16_raw ra = __nv_bfloat16_raw(a); __nv_bfloat16_raw rb = __nv_bfloat16_raw(b); maxval = (ra.x > rb.x) ? b : a; } return maxval; }
^
/home/isuruf/.conda/envs/pytorch-dev/targets/x86_64-linux/include/cuda_bf16.hpp(4132): error: no suitable conversion function from "const __nv_bfloat16" to "unsigned short" exists
{ __nv_bfloat16 minval; minval = (__hle(a, b) || __hisnan(b)) ? a : b; if (__hisnan(minval)) { minval = __ushort_as_bfloat16((unsigned short)0x7FFFU); } else if (__heq(a, b)) { __nv_bfloat16_raw ra = __nv_bfloat16_raw(a); __nv_bfloat16_raw rb = __nv_bfloat16_raw(b); minval = (ra.x > rb.x) ? a : b; } return minval; }
^
/home/isuruf/.conda/envs/pytorch-dev/targets/x86_64-linux/include/cuda_bf16.hpp(4132): error: no suitable conversion function from "const __nv_bfloat16" to "unsigned short" exists
{ __nv_bfloat16 minval; minval = (__hle(a, b) || __hisnan(b)) ? a : b; if (__hisnan(minval)) { minval = __ushort_as_bfloat16((unsigned short)0x7FFFU); } else if (__heq(a, b)) { __nv_bfloat16_raw ra = __nv_bfloat16_raw(a); __nv_bfloat16_raw rb = __nv_bfloat16_raw(b); minval = (ra.x > rb.x) ? a : b; } return minval; }
^
/home/isuruf/.conda/envs/pytorch-dev/x86_64-conda-linux-gnu/include/c++/12.3.0/variant(479): error: function "std::__detail::__variant::_Variadic_union<_First, _Rest...>::~_Variadic_union() [with _First=c10::OperatorName, _Rest=<c10::FunctionSchema>]" (declared at line 400) cannot be referenced -- it is a deleted function
~_Variant_storage()
^
detected during:
instantiation of "std::__detail::__variant::_Variant_storage<false, _Types...>::~_Variant_storage() [with _Types=<c10::OperatorName, c10::FunctionSchema>]" at line 287 of /home/isuruf/.conda/envs/pytorch-dev/x86_64-conda-linux-gnu/include/c++/12.3.0/optional
implicit generation of "std::__detail::__variant::_Copy_ctor_base<<unnamed>, _Types...>::~_Copy_ctor_base() noexcept [with <unnamed>=false, _Types=<c10::OperatorName, c10::FunctionSchema>]" at line 287 of /home/isuruf/.conda/envs/pytorch-dev/x86_64-conda-linux-gnu/include/c++/12.3.0/optional
instantiation of class "std::__detail::__variant::_Copy_ctor_base<<unnamed>, _Types...> [with <unnamed>=false, _Types=<c10::OperatorName, c10::FunctionSchema>]" at line 287 of /home/isuruf/.conda/envs/pytorch-dev/x86_64-conda-linux-gnu/include/c++/12.3.0/optional
implicit generation of "std::__detail::__variant::_Move_ctor_base<<unnamed>, _Types...>::~_Move_ctor_base() noexcept [with <unnamed>=false, _Types=<c10::OperatorName, c10::FunctionSchema>]" at line 287 of /home/isuruf/.conda/envs/pytorch-dev/x86_64-conda-linux-gnu/include/c++/12.3.0/optional
instantiation of class "std::__detail::__variant::_Move_ctor_base<<unnamed>, _Types...> [with <unnamed>=false, _Types=<c10::OperatorName, c10::FunctionSchema>]" at line 287 of /home/isuruf/.conda/envs/pytorch-dev/x86_64-conda-linux-gnu/include/c++/12.3.0/optional
[ 5 instantiation contexts not shown ]
instantiation of class "std::__detail::__variant::_Variant_base<_Types...> [with _Types=<c10::OperatorName, c10::FunctionSchema>]" at line 287 of /home/isuruf/.conda/envs/pytorch-dev/x86_64-conda-linux-gnu/include/c++/12.3.0/optional
instantiation of class "std::variant<_Types...> [with _Types=<c10::OperatorName, c10::FunctionSchema>]" at line 287 of /home/isuruf/.conda/envs/pytorch-dev/x86_64-conda-linux-gnu/include/c++/12.3.0/optional
instantiation of "void std::_Optional_payload_base<_Tp>::_M_destroy() noexcept [with _Tp=std::variant<c10::OperatorName, c10::FunctionSchema>]" at line 318 of /home/isuruf/.conda/envs/pytorch-dev/x86_64-conda-linux-gnu/include/c++/12.3.0/optional
instantiation of "void std::_Optional_payload_base<_Tp>::_M_reset() noexcept [with _Tp=std::variant<c10::OperatorName, c10::FunctionSchema>]" at line 439 of /home/isuruf/.conda/envs/pytorch-dev/x86_64-conda-linux-gnu/include/c++/12.3.0/optional
instantiation of "std::_Optional_payload<_Tp, false, _Copy, _Move>::~_Optional_payload() [with _Tp=std::variant<c10::OperatorName, c10::FunctionSchema>, _Copy=false, _Move=false]" at line 73 of /home/isuruf/git/pytorch2/torch/include/ATen/core/op_registration/op_registration.h
/home/isuruf/.conda/envs/pytorch-dev/x86_64-conda-linux-gnu/include/c++/12.3.0/variant(391): error: function "std::__detail::__variant::_Variadic_union<_First, _Rest...>::~_Variadic_union() [with _First=c10::FunctionSchema, _Rest=<>]" (declared at line 400) cannot be referenced -- it is a deleted function
: _M_rest(in_place_index<_Np-1>, std::forward<_Args>(__args)...)
^
detected during:
instantiation of "std::__detail::__variant::_Variadic_union<_First, _Rest...>::_Variadic_union(std::in_place_index_t<_Np>, _Args &&...) [with _First=c10::OperatorName, _Rest=<c10::FunctionSchema>, _Np=1UL, _Args=<c10::FunctionSchema>]" at line 460
instantiation of "std::__detail::__variant::_Variant_storage<false, _Types...>::_Variant_storage(std::in_place_index_t<_Np>, _Args &&...) [with _Types=<c10::OperatorName, c10::FunctionSchema>, _Np=1UL, _Args=<c10::FunctionSchema>]" at line 747
implicit generation of "std::__detail::__variant::_Copy_ctor_base<false, c10::OperatorName, c10::FunctionSchema>::_Copy_ctor_base(std::in_place_index_t<_Np>, _Args &&...) [with _Np=1UL, _Args=<c10::FunctionSchema>]" at line 747
instantiation of class "std::__detail::__variant::_Copy_ctor_base<<unnamed>, _Types...> [with <unnamed>=false, _Types=<c10::OperatorName, c10::FunctionSchema>]" at line 747
implicit generation of "std::__detail::__variant::_Move_ctor_base<false, c10::OperatorName, c10::FunctionSchema>::_Move_ctor_base(std::in_place_index_t<_Np>, _Args &&...) [with _Np=1UL, _Args=<c10::FunctionSchema>]" at line 747
[ 8 instantiation contexts not shown ]
instantiation of "auto std::construct_at(_Tp *, _Args &&...)->decltype((<expression>)) [with _Tp=std::variant<c10::OperatorName, c10::FunctionSchema>, _Args=<c10::FunctionSchema>]" at line 115 of /home/isuruf/.conda/envs/pytorch-dev/x86_64-conda-linux-gnu/include/c++/12.3.0/bits/stl_construct.h
instantiation of "void std::_Construct(_Tp *, _Args &&...) [with _Tp=std::variant<c10::OperatorName, c10::FunctionSchema>, _Args=<c10::FunctionSchema>]" at line 279 of /home/isuruf/.conda/envs/pytorch-dev/x86_64-conda-linux-gnu/include/c++/12.3.0/optional
instantiation of "void std::_Optional_payload_base<_Tp>::_M_construct(_Args &&...) [with _Tp=std::variant<c10::OperatorName, c10::FunctionSchema>, _Args=<c10::FunctionSchema>]" at line 458 of /home/isuruf/.conda/envs/pytorch-dev/x86_64-conda-linux-gnu/include/c++/12.3.0/optional
instantiation of "void std::_Optional_base_impl<_Tp, _Dp>::_M_construct(_Args &&...) [with _Tp=std::variant<c10::OperatorName, c10::FunctionSchema>, _Dp=std::_Optional_base<std::variant<c10::OperatorName, c10::FunctionSchema>, false, false>, _Args=<c10::FunctionSchema>]" at line 853 of /home/isuruf/.conda/envs/pytorch-dev/x86_64-conda-linux-gnu/include/c++/12.3.0/optional
instantiation of "std::enable_if_t<std::__and_v, std::optional<_Tp> &> std::optional<_Tp>::operator=(_Up &&) [with _Tp=std::variant<c10::OperatorName, c10::FunctionSchema>, _Up=c10::FunctionSchema]" at line 85 of /home/isuruf/git/pytorch2/torch/include/ATen/core/op_registration/op_registration.h
cc @ezyang
one thing I know is bad about fbgemm build is that the cuda install you want to use MUST be in /usr/local/cuda, fbgemm's build system does NOT support you putting it anywhere else
Forcing C++17 instead of C++20 makes this error go away.
@ezyang I cannot speak for non-Conda environments, but we have been building and installing fbgemm in Conda environments where the CUDA installations are located in the Conda environment's path.
@isuruf We recommend building FBGEMM_GPU inside a Conda environment. The instructions can be found here.
@q10, I was using a conda environment, but using gcc 12 inside the conda environment instead of the recommended gcc 10.