codellama icon indicating copy to clipboard operation
codellama copied to clipboard

Finetuning 7B codellama: Runtime error

Open Kushalamummigatti opened this issue 10 months ago • 10 comments

Am trying to finetune codellama with the same idea of llama2 and using the same script to finetune. Am not sure whether am right as the repo or blog not talking about finetune approach.

Am facing this error. RuntimeError: shape '[-1, 32000]' is invalid for input of size 131073504

RuntimeError Traceback (most recent call last) Cell In[10], line 29 20 trainer = Trainer( 21 model=model, 22 args=training_args, (...) 25 callbacks=[profiler_callback] if enable_profiler else [], 26 ) 28 # Start training ---> 29 trainer.train()

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/transformers/trainer.py:1662, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs) 1657 self.model_wrapped = self.model 1659 inner_training_loop = find_executable_batch_size( 1660 self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size 1661 ) -> 1662 return inner_training_loop( 1663 args=args, 1664 resume_from_checkpoint=resume_from_checkpoint, 1665 trial=trial, 1666 ignore_keys_for_eval=ignore_keys_for_eval, 1667 )

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/transformers/trainer.py:1929, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval) 1927 tr_loss_step = self.training_step(model, inputs) 1928 else: -> 1929 tr_loss_step = self.training_step(model, inputs) 1931 if ( 1932 args.logging_nan_inf_filter 1933 and not is_torch_tpu_available() 1934 and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step)) 1935 ): 1936 # if loss is nan or inf simply add the average of previous logged losses 1937 tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/transformers/trainer.py:2699, in Trainer.training_step(self, model, inputs) 2696 return loss_mb.reduce_mean().detach().to(self.args.device) 2698 with self.compute_loss_context_manager(): -> 2699 loss = self.compute_loss(model, inputs) 2701 if self.args.n_gpu > 1: 2702 loss = loss.mean() # mean() to average on multi-gpu parallel training

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/transformers/trainer.py:2731, in Trainer.compute_loss(self, model, inputs, return_outputs) 2729 else: 2730 labels = None -> 2731 outputs = model(**inputs) 2732 # Save past state if it exists 2733 # TODO: this needs to be fixed and made cleaner later. 2734 if self.args.past_index >= 0:

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(*args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/peft/peft_model.py:947, in PeftModelForCausalLM.forward(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, **kwargs) 936 raise AssertionError("forward in MPTForCausalLM does not support inputs_embeds") 937 return self.base_model( 938 input_ids=input_ids, 939 attention_mask=attention_mask, (...) 944 **kwargs, 945 ) --> 947 return self.base_model( 948 input_ids=input_ids, 949 attention_mask=attention_mask, 950 inputs_embeds=inputs_embeds, 951 labels=labels, 952 output_attentions=output_attentions, 953 output_hidden_states=output_hidden_states, 954 return_dict=return_dict, 955 **kwargs, 956 ) 958 batch_size = input_ids.shape[0] 959 if attention_mask is not None: 960 # concat prompt attention mask

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(*args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/accelerate/hooks.py:165, in add_hook_to_module..new_forward(*args, **kwargs) 163 output = old_forward(*args, **kwargs) 164 else: --> 165 output = old_forward(*args, **kwargs) 166 return module._hf_hook.post_forward(module, output)

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:709, in LlamaForCausalLM.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict) 707 # Flatten the tokens 708 loss_fct = CrossEntropyLoss() --> 709 shift_logits = shift_logits.view(-1, self.config.vocab_size) 710 shift_labels = shift_labels.view(-1) 711 # Enable model parallelism

RuntimeError: shape '[-1, 32000]' is invalid for input of size 131073504

Kushalamummigatti avatar Aug 29 '23 17:08 Kushalamummigatti