unilm
unilm copied to clipboard
When using trocr to train your own data set, increase max_target_length to 1024 and report an error
/usr/local/src/pytorch/aten/src/ATen/native/cuda/Indexing.cu:702: indexSelectLargeIndex: block: [291,0,0], thread: [127,0,0] Assertion srcIndex < srcSelectDimSize
failed.
RuntimeError Traceback (most recent call last)
/tmp/ipykernel_33/1936936806.py in
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1050 or _global_forward_hooks or _global_forward_pre_hooks): -> 1051 return forward_call(*input, **kwargs) 1052 # Do not call functions when jit is used 1053 full_backward_hooks, non_full_backward_hooks = [], []
/opt/conda/lib/python3.7/site-packages/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py in forward(self, pixel_values, decoder_input_ids, decoder_attention_mask, encoder_outputs, past_key_values, decoder_inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, **kwargs) 493 past_key_values=past_key_values, 494 return_dict=return_dict, --> 495 **kwargs_decoder, 496 ) 497
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 1069 input = bw_hook.setup_input_hook(input) 1070 -> 1071 result = forward_call(*input, **kwargs) 1072 if _global_forward_hooks or self._forward_hooks: 1073 for hook in itertools.chain(
/opt/conda/lib/python3.7/site-packages/transformers/models/trocr/modeling_trocr.py in forward(self, input_ids, attention_mask, encoder_hidden_states, encoder_attention_mask, head_mask, cross_attn_head_mask, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict) 924 output_attentions=output_attentions, 925 output_hidden_states=output_hidden_states, --> 926 return_dict=return_dict, 927 ) 928
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 1069 input = bw_hook.setup_input_hook(input) 1070 -> 1071 result = forward_call(*input, **kwargs) 1072 if _global_forward_hooks or self._forward_hooks: 1073 for hook in itertools.chain(
/opt/conda/lib/python3.7/site-packages/transformers/models/trocr/modeling_trocr.py in forward(self, input_ids, attention_mask, encoder_hidden_states, encoder_attention_mask, head_mask, cross_attn_head_mask, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict) 639 640 if self.config.use_learned_position_embeddings: --> 641 embed_pos = self.embed_positions(input_shape, past_key_values_length=past_key_values_length) 642 else: 643 embed_pos = self.embed_positions(input_ids, past_key_values_length=past_key_values_length)
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1050 or _global_forward_hooks or _global_forward_pre_hooks): -> 1051 return forward_call(*input, **kwargs) 1052 # Do not call functions when jit is used 1053 full_backward_hooks, non_full_backward_hooks = [], []
/opt/conda/lib/python3.7/site-packages/transformers/models/trocr/modeling_trocr.py in forward(self, input_ids_shape, past_key_values_length) 95 past_key_values_length, past_key_values_length + seq_len, dtype=torch.long, device=self.weight.device 96 ) ---> 97 return super().forward(positions + self.offset) 98 99
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/sparse.py in forward(self, input) 158 return F.embedding( 159 input, self.weight, self.padding_idx, self.max_norm, --> 160 self.norm_type, self.scale_grad_by_freq, self.sparse) 161 162 def extra_repr(self) -> str:
/opt/conda/lib/python3.7/site-packages/torch/nn/functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse) 2041 # remove once script supports set_grad_enabled 2042 no_grad_embedding_renorm(weight, input, max_norm, norm_type) -> 2043 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) 2044 2045
RuntimeError: CUDA error: device-side assert triggered
When max_target_length is 512, it can work normally, but the maximum value in the label is 998
Has this issue been solved?
Not really, may need to train from scratch