apex
apex copied to clipboard
Getting compiler segfaults?!
/usr/include/c++/10/chrono: In substitution of ‘template<class _Rep, class _Period> template<class _Period2> using __is_harmonic = std::__bool_constant<(std::ratio<((_Period2::num / std::chrono::duration<_Rep, _Period>::_S_gcd(_Period2::num, _Period::num)) * (_Period::den / std::chrono::duration<_Rep, _Period>::_S_gcd(_Period2::den, _Period::den))), ((_Period2::den / std::chrono::duration<_Rep, _Period>::_S_gcd(_Period2::den, _Period::den)) * (_Period::num / std::chrono::duration<_Rep, _Period>::_S_gcd(_Period2::num, _Period::num)))>::den == 1)> [with _Period2 = _Period2; _Rep = _Rep; _Period = _Period]’:
/usr/include/c++/10/chrono:473:154: required from here
/usr/include/c++/10/chrono:428:27: internal compiler error: Segmentation fault
428 | _S_gcd(intmax_t __m, intmax_t __n) noexcept
| ^~~~~~
Please submit a full bug report,
with preprocessed source if appropriate.
See <file:///usr/share/doc/gcc-10/README.Bugs> for instructions.
error: command '/usr/local/cuda-11.1/bin/nvcc' failed with exit code 1
Running setup.py install for apex ... error
ERROR: Command errored out with exit status 1: /home/hugh/anaconda3/envs/gptserv/bin/python -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-req-build-64_9qhrd/setup.py'"'"'; __file__='"'"'/tmp/pip-req-build-64_9qhrd/setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__file__) else io.StringIO('"'"'from setuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' --cpp_ext --cuda_ext install --record /tmp/pip-record-i56j5n1o/install-record.txt --single-version-externally-managed --compile --install-headers /home/hugh/anaconda3/envs/gptserv/include/python3.9/apex Check the logs for full command output.```
I am getting basically the same error. Whether im trying to install with the official commands in README or with the simpler
pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" git+https://github.com/NVIDIA/apex.git@e2083df5eb96643c61613b9df48dd4eea6b07690 (no need to manually clone repo)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/tmp/pip-req-build-lwwgcoua/setup.py", line 498, in <module>
setup(
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/site-packages/setuptools/__init__.py", line 153, in setup
return distutils.core.setup(**attrs)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/core.py", line 148, in setup
dist.run_commands()
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/dist.py", line 966, in run_commands
self.run_command(cmd)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/dist.py", line 985, in run_command
cmd_obj.run()
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/site-packages/setuptools/command/install.py", line 61, in run
return orig.install.run(self)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/command/install.py", line 546, in run
self.run_command('build')
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/cmd.py", line 313, in run_command
self.distribution.run_command(command)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/dist.py", line 985, in run_command
cmd_obj.run()
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/command/build.py", line 135, in run
self.run_command(cmd_name)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/cmd.py", line 313, in run_command
self.distribution.run_command(command)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/dist.py", line 985, in run_command
cmd_obj.run()
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/site-packages/setuptools/command/build_ext.py", line 79, in run
_build_ext.run(self)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/command/build_ext.py", line 340, in run
self.build_extensions()
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/site-packages/torch/utils/cpp_extension.py", line 709, in build_extensions
build_ext.build_extensions(self)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/command/build_ext.py", line 449, in build_extensions
self._build_extensions_serial()
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/command/build_ext.py", line 474, in _build_extensions_serial
self.build_extension(ext)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/site-packages/setuptools/command/build_ext.py", line 202, in build_extension
_build_ext.build_extension(self, ext)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/command/build_ext.py", line 529, in build_extension
objects = self.compiler.compile(sources,
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/site-packages/torch/utils/cpp_extension.py", line 530, in unix_wrap_ninja_compile
_write_ninja_file_and_compile_objects(
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/site-packages/torch/utils/cpp_extension.py", line 1355, in _write_ninja_file_and_compile_objects
_run_ninja_build(
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/site-packages/torch/utils/cpp_extension.py", line 1682, in _run_ninja_build
raise RuntimeError(message) from e
RuntimeError: Error compiling objects for extension
Running setup.py install for apex ... error
ERROR: Command errored out with exit status 1: /home/eino/miniconda3/envs/gpt-neox/bin/python3.9 -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-req-build-lwwgcoua/setup.py'"'"'; __file__='"'"'/tmp/pip-req-build-lwwgcoua/setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__file__) else io.StringIO('"'"'from setuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' --cpp_ext --cuda_ext install --record /t The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/tmp/pip-req-build-lwwgcoua/setup.py", line 498, in <module>
setup(
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/site-packages/setuptools/__init__.py", line 153, in setup
return distutils.core.setup(**attrs)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/core.py", line 148, in setup
dist.run_commands()
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/dist.py", line 966, in run_commands
self.run_command(cmd)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/dist.py", line 985, in run_command
cmd_obj.run()
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/site-packages/setuptools/command/install.py", line 61, in run
return orig.install.run(self)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/command/install.py", line 546, in run
self.run_command('build')
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/cmd.py", line 313, in run_command
self.distribution.run_command(command)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/dist.py", line 985, in run_command
cmd_obj.run()
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/command/build.py", line 135, in run
self.run_command(cmd_name)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/cmd.py", line 313, in run_command
self.distribution.run_command(command)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/dist.py", line 985, in run_command
cmd_obj.run()
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/site-packages/setuptools/command/build_ext.py", line 79, in run
_build_ext.run(self)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/command/build_ext.py", line 340, in run
self.build_extensions()
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/site-packages/torch/utils/cpp_extension.py", line 709, in build_extensions
build_ext.build_extensions(self)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/command/build_ext.py", line 449, in build_extensions
self._build_extensions_serial()
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/command/build_ext.py", line 474, in _build_extensions_serial
self.build_extension(ext)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/site-packages/setuptools/command/build_ext.py", line 202, in build_extension
_build_ext.build_extension(self, ext)
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/distutils/command/build_ext.py", line 529, in build_extension
objects = self.compiler.compile(sources,
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/site-packages/torch/utils/cpp_extension.py", line 530, in unix_wrap_ninja_compile
_write_ninja_file_and_compile_objects(
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/site-packages/torch/utils/cpp_extension.py", line 1355, in _write_ninja_file_and_compile_objects
_run_ninja_build(
File "/home/eino/miniconda3/envs/gpt-neox/lib/python3.9/site-packages/torch/utils/cpp_extension.py", line 1682, in _run_ninja_build
raise RuntimeError(message) from e
RuntimeError: Error compiling objects for extension
Running setup.py install for apex ... error
ERROR: Command errored out with exit status 1: /home/eino/miniconda3/envs/gpt-neox/bin/python3.9 -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-req-build-lwwgcoua/setup.py'"'"'; __file__='"'"'/tmp/pip-req-build-lwwgcoua/setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__file__) else io.StringIO('"'"'from setuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' --cpp_ext --cuda_ext install --record /tmp/pip-record-d18djoyp/install-record.txt --single-version-externally-managed --compile --install-headers /home/eino/miniconda3/envs/gpt-neox/include/python3.9/apex Check the logs for full command output.mp/pip-record-d18djoyp/install-record.txt --single-version-externally-managed --compile --install-headers /home/eino/miniconda3/envs/gpt-neox/include/python3.9/apex Check the logs for full command output.
For others who encountered the same issue:
It's a compatibility problem between cuda11.1 and gcc10.
You need to downgrade the gcc version to 9.3 to solve this problem.
Thank you