xlstm
xlstm copied to clipboard
error: identifier "__high2bfloat16" is undefined
Hello,
Thanks for sharing your great insight. I tried to install xlstm. Following the instructions, first I created a conda environment by environment_pt220cu121.yaml , and use pip install xlstm. However, some error message appeared:
/home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/xlstm/blocks/slstm/src/cuda/../util/inline_ops_2bf16.cuh(170): error: no suitable user-defined conversion from "const __nv_bfloat162" to "__half2" exists if (__hbgt2(x, zero)) { ^
/home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/xlstm/blocks/slstm/src/cuda/../util/inline_ops_2bf16.cuh(170): error: no suitable user-defined conversion from "const __nv_bfloat162" to "__half2" exists
if (__hbgt2(x, zero)) { ^
/home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/xlstm/blocks/slstm/src/cuda/../util/inline_ops_2bf16.cuh(172): error: no suitable user-defined conversion from "const __nv_bfloat162" to "__half2" exists
negx = __hneg2(x); ^
/home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/xlstm/blocks/slstm/src/cuda/../util/inline_ops_2bf16.cuh(174): error: no suitable user-defined conversion from "__nv_bfloat16" to "__half" exists
} else if (__hgt(lowhalf, szero)) { ^
/home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/xlstm/blocks/slstm/src/cuda/../util/inline_ops_2bf16.cuh(174): error: no suitable user-defined conversion from "const __nv_bfloat16" to "__half" exists
} else if (__hgt(lowhalf, szero)) { ^
/home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/xlstm/blocks/slstm/src/cuda/../util/inline_ops_2bf16.cuh(175): error: no suitable user-defined conversion from "__nv_bfloat16" to "__half" exists
negx = __halves2bfloat162(__hneg(lowhalf), highhalf); ^
Error limit reached.
100 errors detected in the compilation of "/home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/xlstm/blocks/slstm/src/cuda/slstm_backward_cut.cu".
Compilation terminated. [5/8] /home/chihchieh/.conda/envs/xlstm/bin/nvcc --generate-dependencies-with-compile --dependency-output blas.cuda.o.d -ccbin /home/chihchieh/.conda/envs/xlstm/bin/x86_64-conda-linux-gnu-cc -DTORCH_EXTENSION_NAME=slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE="gcc" -DPYBIND11_STDLIB="libstdcpp" -DPYBIND11_BUILD_ABI="cxxabi1011" -isystem /home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/torch/include -isystem /home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/torch/include/TH -isystem /home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/torch/include/THC -isystem /home/chihchieh/.conda/envs/xlstm/include -isystem /home/chihchieh/.conda/envs/xlstm/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS -D__CUDA_NO_HALF_CONVERSIONS_ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=compute_70 -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' -Xptxas="-v" -gencode arch=compute_80,code=compute_80 -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -DSLSTM_HIDDEN_SIZE=128 -DSLSTM_BATCH_SIZE=8 -DSLSTM_NUM_HEADS=4 -DSLSTM_NUM_STATES=4 -DSLSTM_DTYPE_B=float -DSLSTM_DTYPE_R=nv_bfloat16 -DSLSTM_DTYPE_W=nv_bfloat16 -DSLSTM_DTYPE_G=nv_bfloat16 -DSLSTM_DTYPE_S=nv_bfloat16 -DSLSTM_DTYPE_A=float -DSLSTM_NUM_GATES=4 -DSLSTM_SIMPLE_AGG=true -DSLSTM_GRADIENT_RECURRENT_CLIPVAL_VALID=false -DSLSTM_GRADIENT_RECURRENT_CLIPVAL=0.0 -DSLSTM_FORWARD_CLIPVAL_VALID=false -DSLSTM_FORWARD_CLIPVAL=0.0 -U__CUDA_NO_HALF_OPERATORS -U__CUDA_NO_HALF_CONVERSIONS -U__CUDA_NO_BFLOAT16_OPERATORS -U__CUDA_NO_BFLOAT16_CONVERSIONS -U__CUDA_NO_BFLOAT162_OPERATORS__ -U__CUDA_NO_BFLOAT162_CONVERSIONS__ -std=c++17 -c /home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/xlstm/blocks/slstm/src/util/blas.cu -o blas.cuda.o FAILED: blas.cuda.o /home/chihchieh/.conda/envs/xlstm/bin/nvcc --generate-dependencies-with-compile --dependency-output blas.cuda.o.d -ccbin /home/chihchieh/.conda/envs/xlstm/bin/x86_64-conda-linux-gnu-cc -DTORCH_EXTENSION_NAME=slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE="gcc" -DPYBIND11_STDLIB="libstdcpp" -DPYBIND11_BUILD_ABI="cxxabi1011" -isystem /home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/torch/include -isystem /home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/torch/include/TH -isystem /home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/torch/include/THC -isystem /home/chihchieh/.conda/envs/xlstm/include -isystem /home/chihchieh/.conda/envs/xlstm/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS -D__CUDA_NO_HALF_CONVERSIONS_ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=compute_70 -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' -Xptxas="-v" -gencode arch=compute_80,code=compute_80 -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -DSLSTM_HIDDEN_SIZE=128 -DSLSTM_BATCH_SIZE=8 -DSLSTM_NUM_HEADS=4 -DSLSTM_NUM_STATES=4 -DSLSTM_DTYPE_B=float -DSLSTM_DTYPE_R=nv_bfloat16 -DSLSTM_DTYPE_W=nv_bfloat16 -DSLSTM_DTYPE_G=nv_bfloat16 -DSLSTM_DTYPE_S=nv_bfloat16 -DSLSTM_DTYPE_A=float -DSLSTM_NUM_GATES=4 -DSLSTM_SIMPLE_AGG=true -DSLSTM_GRADIENT_RECURRENT_CLIPVAL_VALID=false -DSLSTM_GRADIENT_RECURRENT_CLIPVAL=0.0 -DSLSTM_FORWARD_CLIPVAL_VALID=false -DSLSTM_FORWARD_CLIPVAL=0.0 -U__CUDA_NO_HALF_OPERATORS -U__CUDA_NO_HALF_CONVERSIONS -U__CUDA_NO_BFLOAT16_OPERATORS -U__CUDA_NO_BFLOAT16_CONVERSIONS -U__CUDA_NO_BFLOAT162_OPERATORS__ -U__CUDA_NO_BFLOAT162_CONVERSIONS__ -std=c++17 -c /home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/xlstm/blocks/slstm/src/util/blas.cu -o blas.cuda.o /home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/xlstm/blocks/slstm/src/util/inline_print.cuh(49): error: identifier "__low2bfloat16" is undefined __bfloat162float(__low2bfloat16(val)), ^
/home/chihchieh/.conda/envs/xlstm/lib/python3.11/site-packages/xlstm/blocks/slstm/src/util/inline_print.cuh(50): error: identifier "__high2bfloat16" is undefined
__bfloat162float(__high2bfloat16(val))); ^
Am I doing anything wrong? Since I worked on V100 machines, where bf16 is not supported. Does that mean this package can not be installed on V100 machine?