Fine_tune_RT_DETR_on_a_custom_dataset:

Open msaqib17 opened this issue 1 year ago • 0 comments

RuntimeError Traceback (most recent call last) Cell In[17], line 13 1 from transformers import Trainer 3 trainer = Trainer( 4 model=model, 5 args=training_args, (...) 10 compute_metrics=eval_compute_metrics_fn, 11 ) ---> 13 trainer.train()

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/trainer.py:2052, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs) 2050 hf_hub_utils.enable_progress_bars() 2051 else: -> 2052 return inner_training_loop( 2053 args=args, 2054 resume_from_checkpoint=resume_from_checkpoint, 2055 trial=trial, 2056 ignore_keys_for_eval=ignore_keys_for_eval, 2057 )

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/trainer.py:2388, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval) 2385 self.control = self.callback_handler.on_step_begin(args, self.state, self.control) 2387 with self.accelerator.accumulate(model): -> 2388 tr_loss_step = self.training_step(model, inputs) 2390 if ( 2391 args.logging_nan_inf_filter 2392 and not is_torch_xla_available() 2393 and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step)) 2394 ): 2395 # if loss is nan or inf simply add the average of previous logged losses 2396 tr_loss = tr_loss + tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/trainer.py:3485, in Trainer.training_step(self, model, inputs) 3482 return loss_mb.reduce_mean().detach().to(self.args.device) 3484 with self.compute_loss_context_manager(): -> 3485 loss = self.compute_loss(model, inputs) 3487 del inputs 3488 if ( 3489 self.args.torch_empty_cache_steps is not None 3490 and self.state.global_step % self.args.torch_empty_cache_steps == 0 3491 ):

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/trainer.py:3532, in Trainer.compute_loss(self, model, inputs, return_outputs) 3530 else: 3531 labels = None -> 3532 outputs = model(**inputs) 3533 # Save past state if it exists 3534 # TODO: this needs to be fixed and made cleaner later. 3535 if self.args.past_index >= 0:

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs) 1551 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1552 else: -> 1553 return self._call_impl(*args, **kwargs)

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs) 1557 # If we don't have any hooks, we want to skip the rest of the logic in 1558 # this function, and just call forward. 1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1560 or _global_backward_pre_hooks or _global_backward_hooks 1561 or _global_forward_hooks or _global_forward_pre_hooks): -> 1562 return forward_call(*args, **kwargs) 1564 try: 1565 result = None

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/parallel/data_parallel.py:186, in DataParallel.forward(self, *inputs, **kwargs) 184 return self.module(*inputs[0], **module_kwargs[0]) 185 replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) --> 186 outputs = self.parallel_apply(replicas, inputs, module_kwargs) 187 return self.gather(outputs, self.output_device)

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/parallel/data_parallel.py:201, in DataParallel.parallel_apply(self, replicas, inputs, kwargs) 200 def parallel_apply(self, replicas: Sequence[T], inputs: Sequence[Any], kwargs: Any) -> List[Any]: --> 201 return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/parallel/parallel_apply.py:109, in parallel_apply(modules, inputs, kwargs_tup, devices) 107 output = results[i] 108 if isinstance(output, ExceptionWrapper): --> 109 output.reraise() 110 outputs.append(output) 111 return outputs

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/_utils.py:706, in ExceptionWrapper.reraise(self) 702 except TypeError: 703 # If the exception takes multiple arguments, don't try to 704 # instantiate since we don't know how to 705 raise RuntimeError(msg) from None --> 706 raise exception

RuntimeError: Caught RuntimeError in replica 0 on device 0. Original Traceback (most recent call last): File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/parallel/parallel_apply.py", line 84, in _worker output = module(*input, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl return self._call_impl(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl return forward_call(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/models/rt_detr/modeling_rt_detr.py", line 2659, in forward outputs = self.model( ^^^^^^^^^^^ File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl return self._call_impl(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl return forward_call(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/models/rt_detr/modeling_rt_detr.py", line 1892, in forward reference_points_unact = torch.concat([denoising_bbox_unact, reference_points_unact], 1) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 16 but got size 8 for tensor number 1 in the list.

Oct 01 '24 00:10 msaqib17