RuntimeError: Default process group has not been initialized, please make sure to call init_process_group
Traceback (most recent call last):
File "/home/software/pycharm-2018.3.1/helpers/pydev/pydevd.py", line 1741, in
Traceback (most recent call last): File "/home/software/pycharm-2018.3.1/helpers/pydev/pydevd.py", line 1741, in main() File "/home/software/pycharm-2018.3.1/helpers/pydev/pydevd.py", line 1735, in main globals = debugger.run(setup['file'], None, None, is_module) File "/home/software/pycharm-2018.3.1/helpers/pydev/pydevd.py", line 1135, in run pydev_imports.execfile(file, globals, locals) # execute the script File "/home/software/pycharm-2018.3.1/helpers/pydev/_pydev_imps/_pydev_execfile.py", line 18, in execfile exec(compile(contents+"\n", file, 'exec'), glob, loc) File "/home/PycharmProjects/finetune/ViT-Adapter-main/detection/train.py", line 195, in main() File "/home/PycharmProjects/finetune/ViT-Adapter-main/detection/train.py", line 191, in main meta=meta) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/mmdet/apis/train.py", line 208, in train_detector runner.run(data_loaders, cfg.workflow) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/mmcv/runner/epoch_based_runner.py", line 127, in run epoch_runner(data_loaders[i], **kwargs) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/mmcv/runner/epoch_based_runner.py", line 50, in train self.run_iter(data_batch, train_mode=True, **kwargs) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/mmcv/runner/epoch_based_runner.py", line 30, in run_iter **kwargs) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/mmcv/parallel/data_parallel.py", line 75, in train_step return self.module.train_step(*inputs[0], **kwargs[0]) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/mmdet/models/detectors/base.py", line 248, in train_step losses = self(**data) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, **kwargs) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/mmcv/runner/fp16_utils.py", line 128, in new_func output = old_func(*new_args, **new_kwargs) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/mmdet/models/detectors/base.py", line 172, in forward return self.forward_train(img, img_metas, **kwargs) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/mmdet/models/detectors/two_stage.py", line 127, in forward_train x = self.extract_feat(img) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/mmdet/models/detectors/two_stage.py", line 67, in extract_feat x = self.backbone(img) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, **kwargs) File "/home/PycharmProjects/finetune/ViT-Adapter-main/detection/mmdet_custom/models/backbones/vit_adapter.py", line 94, in forward c1, c2, c3, c4 = self.spm(x) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, **kwargs) File "/home/PycharmProjects/finetune/ViT-Adapter-main/detection/mmdet_custom/models/backbones/adapter_modules.py", line 207, in forward c1 = self.stem(x) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, **kwargs) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/torch/nn/modules/container.py", line 139, in forward input = module(input) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, **kwargs) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/torch/nn/modules/batchnorm.py", line 731, in forward world_size = torch.distributed.get_world_size(process_group) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/torch/distributed/distributed_c10d.py", line 748, in get_world_size return _get_group_size(group) File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/torch/distributed/distributed_c10d.py", line 274, in _get_group_size default_pg = _get_default_group() File "/home/anaconda/envs/vit_pytorch/lib/python3.7/site-packages/torch/distributed/distributed_c10d.py", line 358, in _get_default_group raise RuntimeError("Default process group has not been initialized, " RuntimeError: Default process group has not been initialized, please make sure to call init_process_group.
Hi! Your problem seems to be an error in distributed launching. You can check if you have run torch.distributed.init_process_group() function in your program.
I got same issue. Can you be more specific about your solution?
Hi, you need to replace nn.SyncBatchNorm with nn.BatchNorm2d in adapter_modules.py when training the model using 1 GPU.