When I use the 4bits llama model and run loss.backward(), the following error occurs:
Traceback (most recent call last):
File "/home/zzy/anaconda3/envs/gptq/lib/python3.9/runpy.py", line 197, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/home/zzy/anaconda3/envs/gptq/lib/python3.9/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/home/zzy/.vscode-server/extensions/ms-python.python-2023.6.1/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/main.py", line 39, in
cli.main()
File "/home/zzy/.vscode-server/extensions/ms-python.python-2023.6.1/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/../debugpy/server/cli.py", line 430, in main
run()
File "/home/zzy/.vscode-server/extensions/ms-python.python-2023.6.1/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/../debugpy/server/cli.py", line 284, in run_file
runpy.run_path(target, run_name="main")
File "/home/zzy/.vscode-server/extensions/ms-python.python-2023.6.1/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 321, in run_path
return _run_module_code(code, init_globals, run_name,
File "/home/zzy/.vscode-server/extensions/ms-python.python-2023.6.1/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 135, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "/home/zzy/.vscode-server/extensions/ms-python.python-2023.6.1/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 124, in _run_code
exec(code, run_globals)
File "/home/zzy/workplace/ICA/llama/GPTQ-for-LLaMa/llama_ICA.py", line 300, in
loss.backward(retain_graph=True)
File "/home/zzy/anaconda3/envs/gptq/lib/python3.9/site-packages/torch/_tensor.py", line 487, in backward
torch.autograd.backward(
File "/home/zzy/anaconda3/envs/gptq/lib/python3.9/site-packages/torch/autograd/init.py", line 200, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: Expected is_sm80 to be true, but got false. (Could this error message be improved? If so, please report an enhancement request to PyTorch.)
I wondering if I could get the gradient when using the 4bits model.