deep-learning-pytorch-huggingface
deep-learning-pytorch-huggingface copied to clipboard
Re. fine-tune-llms-in-2024-with-trl.ipynb
from datasets import load_dataset
from random import randint
# Load our test dataset
eval_dataset = load_dataset("json", data_files="test_dataset.json", split="train")
rand_idx = randint(0, len(eval_dataset))
# Test on sample
prompt = pipe.tokenizer.apply_chat_template(eval_dataset[rand_idx]["messages"][:2], tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id)
print(f"Query:\n{eval_dataset[rand_idx]['messages'][1]['content']}")
print(f"Original Answer:\n{eval_dataset[rand_idx]['messages'][2]['content']}")
print(f"Generated Answer:\n{outputs[0]['generated_text'][len(prompt):].strip()}")
gave error:
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
Cell In[14], line 11
9 # Test on sample
10 prompt = pipe.tokenizer.apply_chat_template(eval_dataset[rand_idx]["messages"][:2], tokenize=False, add_generation_prompt=True)
---> 11 outputs = pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id)
13 print(f"Query:\n{eval_dataset[rand_idx]['messages'][1]['content']}")
14 print(f"Original Answer:\n{eval_dataset[rand_idx]['messages'][2]['content']}")
File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/text_generation.py:208, in TextGenerationPipeline.__call__(self, text_inputs, **kwargs)
167 def __call__(self, text_inputs, **kwargs):
168 """
169 Complete the prompt(s) given as inputs.
170
(...)
206 ids of the generated text.
207 """
--> 208 return super().__call__(text_inputs, **kwargs)
File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/base.py:1140, in Pipeline.__call__(self, inputs, num_workers, batch_size, *args, **kwargs)
1132 return next(
1133 iter(
1134 self.get_iterator(
(...)
1137 )
1138 )
1139 else:
-> 1140 return self.run_single(inputs, preprocess_params, forward_params, postprocess_params)
File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/base.py:1147, in Pipeline.run_single(self, inputs, preprocess_params, forward_params, postprocess_params)
1145 def run_single(self, inputs, preprocess_params, forward_params, postprocess_params):
1146 model_inputs = self.preprocess(inputs, **preprocess_params)
-> 1147 model_outputs = self.forward(model_inputs, **forward_params)
1148 outputs = self.postprocess(model_outputs, **postprocess_params)
1149 return outputs
File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/base.py:1046, in Pipeline.forward(self, model_inputs, **forward_params)
1044 with inference_context():
1045 model_inputs = self._ensure_tensor_on_device(model_inputs, device=self.device)
-> 1046 model_outputs = self._forward(model_inputs, **forward_params)
1047 model_outputs = self._ensure_tensor_on_device(model_outputs, device=torch.device("cpu"))
1048 else:
File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/text_generation.py:271, in TextGenerationPipeline._forward(self, model_inputs, **generate_kwargs)
268 generate_kwargs["min_length"] += prefix_length
270 # BS x SL
--> 271 generated_sequence = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, **generate_kwargs)
272 out_b = generated_sequence.shape[0]
273 if self.framework == "pt":
File /usr/local/lib/python3.10/dist-packages/peft/peft_model.py:1140, in PeftModelForCausalLM.generate(self, **kwargs)
1138 self.base_model.generation_config = self.generation_config
1139 try:
-> 1140 outputs = self.base_model.generate(**kwargs)
1141 except:
1142 self.base_model.prepare_inputs_for_generation = self.base_model_prepare_inputs_for_generation
File /usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py:115, in context_decorator.<locals>.decorate_context(*args, **kwargs)
112 @functools.wraps(func)
113 def decorate_context(*args, **kwargs):
114 with ctx_factory():
--> 115 return func(*args, **kwargs)
File /usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1718, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
1701 return self.assisted_decoding(
1702 input_ids,
1703 assistant_model=assistant_model,
(...)
1714 **model_kwargs,
1715 )
1716 if generation_mode == GenerationMode.GREEDY_SEARCH:
1717 # 11. run greedy search
-> 1718 return self.greedy_search(
1719 input_ids,
1720 logits_processor=logits_processor,
1721 stopping_criteria=stopping_criteria,
1722 pad_token_id=generation_config.pad_token_id,
1723 eos_token_id=generation_config.eos_token_id,
1724 output_scores=generation_config.output_scores,
1725 return_dict_in_generate=generation_config.return_dict_in_generate,
1726 synced_gpus=synced_gpus,
1727 streamer=streamer,
1728 **model_kwargs,
1729 )
1731 elif generation_mode == GenerationMode.CONTRASTIVE_SEARCH:
1732 if not model_kwargs["use_cache"]:
File /usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:2579, in GenerationMixin.greedy_search(self, input_ids, logits_processor, stopping_criteria, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, streamer, **model_kwargs)
2576 model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
2578 # forward pass to get next token
-> 2579 outputs = self(
2580 **model_inputs,
2581 return_dict=True,
2582 output_attentions=output_attentions,
2583 output_hidden_states=output_hidden_states,
2584 )
2586 if synced_gpus and this_peer_finished:
2587 continue # don't waste resources running the code we don't need
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
-> 1518 return self._call_impl(*args, **kwargs)
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs)
1522 # If we don't have any hooks, we want to skip the rest of the logic in
1523 # this function, and just call forward.
1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527 return forward_call(*args, **kwargs)
1529 try:
1530 result = None
File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
163 output = module._old_forward(*args, **kwargs)
164 else:
--> 165 output = module._old_forward(*args, **kwargs)
166 return module._hf_hook.post_forward(module, output)
File /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:1199, in LlamaForCausalLM.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict)
1197 logits = torch.cat(logits, dim=-1)
1198 else:
-> 1199 logits = self.lm_head(hidden_states)
1200 logits = logits.float()
1202 loss = None
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
-> 1518 return self._call_impl(*args, **kwargs)
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs)
1522 # If we don't have any hooks, we want to skip the rest of the logic in
1523 # this function, and just call forward.
1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527 return forward_call(*args, **kwargs)
1529 try:
1530 result = None
File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:160, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
159 def new_forward(module, *args, **kwargs):
--> 160 args, kwargs = module._hf_hook.pre_forward(module, *args, **kwargs)
161 if module._hf_hook.no_grad:
162 with torch.no_grad():
File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:293, in AlignDevicesHook.pre_forward(self, module, *args, **kwargs)
291 if self.weights_map[name].dtype == torch.int8:
292 fp16_statistics = self.weights_map[name.replace("weight", "SCB")]
--> 293 set_module_tensor_to_device(
294 module, name, self.execution_device, value=self.weights_map[name], fp16_statistics=fp16_statistics
295 )
297 return send_to_device(args, self.execution_device), send_to_device(
298 kwargs, self.execution_device, skip_keys=self.skip_keys
299 )
File /usr/local/lib/python3.10/dist-packages/accelerate/utils/modeling.py:347, in set_module_tensor_to_device(module, tensor_name, device, value, dtype, fp16_statistics)
345 module._parameters[tensor_name] = param_cls(new_value, requires_grad=old_value.requires_grad)
346 elif isinstance(value, torch.Tensor):
--> 347 new_value = value.to(device)
348 else:
349 new_value = torch.tensor(value, device=device)
NotImplementedError: Cannot copy out of meta tensor; no data!
Try again with restarting the Kernel it seems you GPU is already busy