Hi author,
When I run with the above environment, an error happend:
terminate called after throwing an instance of 'std::runtime_error'
what(): NCCL Error 1: unhandled cuda error
Traceback (most recent call last):
File "main.py", line 239, in
main(args)
File "main.py", line 207, in main
test_stats, coco_evaluator = evaluate(model, criterion, postprocessors, data_loader_val, base_ds, device, args)
File "/home/vsw/miniconda3/envs/detr/lib/python3.8/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/vsw/Desktop/RelTR/engine.py", line 114, in evaluate
loss_dict = criterion(outputs, targets)
File "/home/vsw/miniconda3/envs/detr/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/vsw/Desktop/RelTR/models/reltr.py", line 298, in forward
indices = self.matcher(outputs_without_aux, targets)
File "/home/vsw/miniconda3/envs/detr/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/vsw/miniconda3/envs/detr/lib/python3.8/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/vsw/Desktop/RelTR/models/matcher.py", line 92, in forward
indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
File "/home/vsw/Desktop/RelTR/models/matcher.py", line 92, in
indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
ValueError: matrix contains invalid numeric entries