torch-nightly: Build fails due to flatbuffer->absl dependency requiring c++17 (std::string_view)
I'm trying to build horovod with torch-nightly support in a conda environment. During build, I receive the following error:
[ 73%] Building CXX object horovod/torch/CMakeFiles/pytorch.dir//common/process_set.cc.o cd /tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/build/temp.linux-x86_64-cpython-311/RelWithDebInfo/horovod/torch && /mnt/qb/work2/goswami0/gkd021/conda/env s/wb_dev/bin/x86_64-conda-linux-gnu-c++ -DEIGEN_MPL2_ONLY=1 -DHAVE_CUDA=1 -DHAVE_GLOO=1 -DHAVE_GPU=1 -DHAVE_MPI=1 -DHAVE_NCCL=1 -DHAVE_NVTX=1 -DHOROVOD_GPU_ALLGATHER=78 -DHOROVOD_GPU_ ALLREDUCE=78 -DHOROVOD_GPU_ALLTOALL=78 -DHOROVOD_GPU_BROADCAST=78 -DHOROVOD_GPU_REDUCESCATTER=78 -DPYTORCH_VERSION=9999999999 -DTORCH_API_INCLUDE_EXTENSION_H=1 -Dpytorch_EXPORTS -I/tm p/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/third_party/HTTPRequest/include -I/tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/third_party/boost/ assert/include -I/tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/third_party/boost/config/include -I/tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a730068741207 88/third_party/boost/core/include -I/tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/third_party/boost/detail/include -I/tmp/pip-install-0ss1puiz/horovod_121ace9320d d4c559a73006874120788/third_party/boost/iterator/include -I/tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/third_party/boost/lockfree/include -I/tmp/pip-install-0ss 1puiz/horovod_121ace9320dd4c559a73006874120788/third_party/boost/mpl/include -I/tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/third_party/boost/parameter/include - I/tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/third_party/boost/predef/include -I/tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/third_party/b oost/preprocessor/include -I/tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/third_party/boost/static_assert/include -I/tmp/pip-install-0ss1puiz/horovod_121ace9320dd 4c559a73006874120788/third_party/boost/type_traits/include -I/tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/third_party/boost/utility/include -I/tmp/pip-install-0s s1puiz/horovod_121ace9320dd4c559a73006874120788/third_party/lbfgs/include -I/tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/third_party/gloo -I/tmp/pip-install-0ss1 puiz/horovod_121ace9320dd4c559a73006874120788/third_party/eigen -I/tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/third_party/flatbuffers/include -isystem /mnt/qb/w ork2/goswami0/gkd021/conda/envs/wb_dev/lib/python3.11/site-packages/torch/include -isystem /mnt/qb/work2/goswami0/gkd021/conda/envs/wb_dev/lib/python3.11/site-packages/torch/include/t orch/csrc/api/include -isystem /mnt/qb/work2/goswami0/gkd021/conda/envs/wb_dev/lib/python3.11/site-packages/torch/include/TH -isystem /mnt/qb/work2/goswami0/gkd021/conda/envs/wb_dev/l ib/python3.11/site-packages/torch/include/THC -isystem /mnt/qb/work2/goswami0/gkd021/conda/envs/wb_dev/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -fvisibility-inlines-hidden -fmes sage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /mnt/qb/work2/goswami0/gkd021/conda/envs/wb_ dev/include -pthread -fPIC -Wall -ftree-vectorize -mf16c -mavx -mfma -O3 -g -DNDEBUG -fPIC -std=c++14 -MD -MT horovod/torch/CMakeFiles/pytorch.dir//common/process_set.cc.o -MF CMake Files/pytorch.dir//common/process_set.cc.o.d -o CMakeFiles/pytorch.dir//common/process_set.cc.o -c /tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/horovod/commo n/process_set.cc In file included from /tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/third_party/flatbuffers/include/flatbuffers/base.h:241, from /tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/third_party/flatbuffers/include/flatbuffers/flatbuffers.h:20, from /tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/horovod/common/wire/message_generated.h:23, from /tmp/pip-install-0ss1puiz/horovod_121ace9320dd4c559a73006874120788/horovod/common/message.cc:23: /mnt/qb/work2/goswami0/gkd021/conda/envs/wb_dev/include/absl/strings/string_view.h:52:26: error: 'string_view' in namespace 'std' does not name a type 52 | using string_view = std::string_view; | ^~~~~~~~~~~ /mnt/qb/work2/goswami0/gkd021/conda/envs/wb_dev/include/absl/strings/string_view.h:52:21: note: 'std::string_view' is only available from C++17 onwards 52 | using string_view = std::string_view; | ^~~ /mnt/qb/work2/goswami0/gkd021/conda/envs/wb_dev/include/absl/strings/string_view.h:686:8: error: 'string_view' does not name a type 686 | inline string_view ClippedSubstr(string_view s, size_t pos, | ^~~~~~~~~~~ /mnt/qb/work2/goswami0/gkd021/conda/envs/wb_dev/include/absl/strings/string_view.h:697:11: error: 'string_view' does not name a type 697 | constexpr string_view NullSafeStringView(const char* p) {
This is obviously caused by horovod building with C++14, and a too modern absl version. For reference, I'm using the following environment.yml file:
channels:
- pytorch-nightly
- nvidia
- anaconda
- conda-forge
- defaults
dependencies:
############# ESSENTIALS ################
# Do not remove any of these
#########################################
- python >= 3.9
- pip
- pip:
- -r requirements.txt
# Cuda & Buildtools
- cudatoolkit = 11.8 # we need to specifiy this for nccl, because pytorch-cuda and nccl use different cuda packages
- cuda
- cxx-compiler >= 1.5
- cmake >= 3.20
# Torch
- pytorch
- pytorch-cuda = 11.8
- torchvision
- torchaudio
- torchdata >= 0.6
# MPI & NCCL
- openmpi-mpicxx
- mpi4py
- ucx # UCX support for OpenMPI
- nccl >= 2.15.5 # <- might cause cuda incompability issues because it does not take into account
# the cuda version. Thus we MUST fixate the version
################ OPTIONAL DEPENDENCIES ##################
# Feel free to remove any of these if you don't need them
#########################################################
# Packaging
- sphinx
- pytest
- pre_commit
# Numeric Libs
- numba >= 0.56.4
- xarray >= 2023.3.0
- dask
- einops >= 0.6
# IO
#- hdf5 >= 1.12.1
- h5netcdf >= 1.1.0
# ML and Misc
- jupyter
- wandb >= 0.13
- scikit-learn >= 1.1
# Visualization
- matplotlib >= 3.6
- imageio
- moviepy
- cartopy >= 0.21
- ffmpeg >= 5.1
- anaconda::graphviz
I'm not sure where the absl flattbuffers / absl dependency comes from, but I suspect it comes from torch.
Please consider raising CMAKE_CXX_STANDARD to 17.
Indeed, pytorch migrated to C++17 half a year ago: https://github.com/pytorch/pytorch/commit/36ac095ff8918bf7c208029bf6ad28418f1620c1
It thus might and can assume that downstream dependencies require C++17. I'm still not sure whether the flatbuffer library in my include path stems from torch, but one should consider that this mismatch could potentially create future version conflicts independent of that.
After further investigation I can confirm that a libabseil (conda) package is likely included from a newer version of libprotobuf. I'm not where the protobuf dependency stems from exactly though.
I'm aware that these exact details are not of interest or in the scope of the horovod team, but are instead more related to conda. However, by sharing, I want to shed some light on how such version mismatchs can easily occur in practice. I hope that this gives some input on what kind of actions could be taken on horovod's side to reduce the chance of such conflicts in the future. There are potentially also some other options available than migrating to C++17.
On a side note: C++17 is the default standard used by conda when building packages.
Hi @sehoffmann, thanks for pointing this out. Yes, Horovod needs to be updated to build with C++17 for recent PyTorch nightlies (similarly to how we switch to C++17 for recent versions of TensorFlow). Contributions are welcome!
is there any update? current version failed while using 'pip install horovod[pytorch]'
made below changes to source & it builds okay with nightly pytorch.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2c12719..1a58a26 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,7 +16,7 @@ endif()
project(horovod CXX)
-set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
diff --git a/horovod/common/ops/cuda/CMakeLists.txt b/horovod/common/ops/cuda/CMakeLists.txt
index 383185e..a6e4955 100644
--- a/horovod/common/ops/cuda/CMakeLists.txt
+++ b/horovod/common/ops/cuda/CMakeLists.txt
@@ -6,7 +6,7 @@ MESSAGE(STATUS "HVD_NVCC_COMPILE_FLAGS = ${HVD_NVCC_COMPILE_FLAGS}")
# If we don't set CMAKE_CUDA_STANDARD, it will default to ${CMAKE_CXX_STANDARD} ("14" at this time). nvcc may fail if
# the --std=c++... argument is passed multiple times.
-set(CMAKE_CUDA_STANDARD 11)
+set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
add_library(horovod_cuda_kernels cuda_kernels.cu)
diff --git a/horovod/torch/CMakeLists.txt b/horovod/torch/CMakeLists.txt
index 7234d74..5a20a21 100644
--- a/horovod/torch/CMakeLists.txt
+++ b/horovod/torch/CMakeLists.txt
@@ -64,7 +64,7 @@ parse_version(${Pytorch_VERSION} VERSION_DEC)
add_definitions(-DPYTORCH_VERSION=${VERSION_DEC} -DTORCH_API_INCLUDE_EXTENSION_H=1)
set(Pytorch_CXX11 ${Pytorch_CXX11} PARENT_SCOPE)
if(NOT Pytorch_VERSION VERSION_LESS "1.5.0")
- set(CMAKE_CXX_STANDARD 14)
+ set(CMAKE_CXX_STANDARD 17)
endif()
# PyTorch SOURCES
my build command, please change to your cuda path / version etc. The wheel will be in dist folder, you can install with pip.
CUDA_HOME=/usr/local/cuda-12 HOROVOD_WITHOUT_GLOO=1 HOROVOD_WITHOUT_MXNET=1 HOROVOD_WITHOUT_TENSORFLOW=1 HOROVOD_CUDA_HOME=/usr/local/cuda-12 HOROVOD_NCCL_LINK=SHARED HOROVOD_WITH_PYTORCH=1 HOROVOD_GPU_OPERATIONS=NCCL python setup.py bdist_wheel
"Pytorch release 2.1 codebase has migrated from the C++14 to the C++17 standard." https://github.com/pytorch/pytorch/pull/100557 Use previous releases of pytorch, such as 2.0.1 instead, and it should be OK.
CUDA 11.7
pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2
CUDA 11.8
pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118