OpenChatKit
OpenChatKit copied to clipboard
Training on BookSum
output_and_error.log Describe the bug A clear and concise description of what the bug is.
To Reproduce Steps to reproduce the behavior:
- Go to '...'
- Click on '....'
- Scroll down to '....'
- See error
Expected behavior A clear and concise description of what you expected to happen.
Screenshots If applicable, add screenshots to help explain your problem.
Desktop (please complete the following information):
- OS: [e.g. iOS]
- Browser [e.g. chrome, safari]
- Version [e.g. 22]
Smartphone (please complete the following information):
- Device: [e.g. iPhone6]
- OS: [e.g. iOS8.1]
- Browser [e.g. stock browser, safari]
- Version [e.g. 22]
Additional context Add any other context about the problem here.
File output
Traceback (most recent call last):
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 478, in <module>
main()
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 397, in main
init_communicators(args)
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/comm_utils.py", line 103, in init_communicators
_PIPELINE_PARALLEL_COMM = NCCLCommunicator(_PIPELINE_PARALLEL_RANK, args.cuda_id, args.pipeline_group_size,
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/nccl_backend.py", line 31, in __init__
cupy.cuda.Device(cuda_id).use()
File "cupy/cuda/device.pyx", line 192, in cupy.cuda.device.Device.use
File "cupy/cuda/device.pyx", line 198, in cupy.cuda.device.Device.use
File "cupy_backends/cuda/api/runtime.pyx", line 375, in cupy_backends.cuda.api.runtime.setDevice
File "cupy_backends/cuda/api/runtime.pyx", line 144, in cupy_backends.cuda.api.runtime.check_status
cupy_backends.cuda.api.runtime.CUDARuntimeError: cudaErrorInvalidDevice: invalid device ordinal
Traceback (most recent call last):
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 478, in <module>
main()
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 397, in main
init_communicators(args)
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/comm_utils.py", line 103, in init_communicators
_PIPELINE_PARALLEL_COMM = NCCLCommunicator(_PIPELINE_PARALLEL_RANK, args.cuda_id, args.pipeline_group_size,
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/nccl_backend.py", line 31, in __init__
cupy.cuda.Device(cuda_id).use()
File "cupy/cuda/device.pyx", line 192, in cupy.cuda.device.Device.use
File "cupy/cuda/device.pyx", line 198, in cupy.cuda.device.Device.use
File "cupy_backends/cuda/api/runtime.pyx", line 375, in cupy_backends.cuda.api.runtime.setDevice
File "cupy_backends/cuda/api/runtime.pyx", line 144, in cupy_backends.cuda.api.runtime.check_status
cupy_backends.cuda.api.runtime.CUDARuntimeError: cudaErrorInvalidDevice: invalid device ordinal
Traceback (most recent call last):
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 478, in <module>
main()
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 397, in main
init_communicators(args)
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/comm_utils.py", line 103, in init_communicators
_PIPELINE_PARALLEL_COMM = NCCLCommunicator(_PIPELINE_PARALLEL_RANK, args.cuda_id, args.pipeline_group_size,
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/nccl_backend.py", line 31, in __init__
cupy.cuda.Device(cuda_id).use()
File "cupy/cuda/device.pyx", line 192, in cupy.cuda.device.Device.use
File "cupy/cuda/device.pyx", line 198, in cupy.cuda.device.Device.use
File "cupy_backends/cuda/api/runtime.pyx", line 375, in cupy_backends.cuda.api.runtime.setDevice
File "cupy_backends/cuda/api/runtime.pyx", line 144, in cupy_backends.cuda.api.runtime.check_status
cupy_backends.cuda.api.runtime.CUDARuntimeError: cudaErrorInvalidDevice: invalid device ordinal
Traceback (most recent call last):
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 478, in <module>
main()
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 397, in main
init_communicators(args)
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/comm_utils.py", line 103, in init_communicators
_PIPELINE_PARALLEL_COMM = NCCLCommunicator(_PIPELINE_PARALLEL_RANK, args.cuda_id, args.pipeline_group_size,
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/nccl_backend.py", line 31, in __init__
cupy.cuda.Device(cuda_id).use()
File "cupy/cuda/device.pyx", line 192, in cupy.cuda.device.Device.use
File "cupy/cuda/device.pyx", line 198, in cupy.cuda.device.Device.use
File "cupy_backends/cuda/api/runtime.pyx", line 375, in cupy_backends.cuda.api.runtime.setDevice
File "cupy_backends/cuda/api/runtime.pyx", line 144, in cupy_backends.cuda.api.runtime.check_status
cupy_backends.cuda.api.runtime.CUDARuntimeError: cudaErrorInvalidDevice: invalid device ordinal
Traceback (most recent call last):
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 478, in <module>
main()
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 397, in main
init_communicators(args)
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/comm_utils.py", line 103, in init_communicators
_PIPELINE_PARALLEL_COMM = NCCLCommunicator(_PIPELINE_PARALLEL_RANK, args.cuda_id, args.pipeline_group_size,
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/nccl_backend.py", line 31, in __init__
cupy.cuda.Device(cuda_id).use()
File "cupy/cuda/device.pyx", line 192, in cupy.cuda.device.Device.use
File "cupy/cuda/device.pyx", line 198, in cupy.cuda.device.Device.use
File "cupy_backends/cuda/api/runtime.pyx", line 375, in cupy_backends.cuda.api.runtime.setDevice
File "cupy_backends/cuda/api/runtime.pyx", line 144, in cupy_backends.cuda.api.runtime.check_status
cupy_backends.cuda.api.runtime.CUDARuntimeError: cudaErrorInvalidDevice: invalid device ordinal
Traceback (most recent call last):
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 478, in <module>
main()
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 397, in main
init_communicators(args)
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/comm_utils.py", line 103, in init_communicators
_PIPELINE_PARALLEL_COMM = NCCLCommunicator(_PIPELINE_PARALLEL_RANK, args.cuda_id, args.pipeline_group_size,
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/nccl_backend.py", line 31, in __init__
cupy.cuda.Device(cuda_id).use()
File "cupy/cuda/device.pyx", line 192, in cupy.cuda.device.Device.use
File "cupy/cuda/device.pyx", line 198, in cupy.cuda.device.Device.use
File "cupy_backends/cuda/api/runtime.pyx", line 375, in cupy_backends.cuda.api.runtime.setDevice
File "cupy_backends/cuda/api/runtime.pyx", line 144, in cupy_backends.cuda.api.runtime.check_status
cupy_backends.cuda.api.runtime.CUDARuntimeError: cudaErrorInvalidDevice: invalid device ordinal