transformers
transformers copied to clipboard
LlamaForSequenceClassification forward method show different results with input_ids/inputs_embeds
System Info
transformers 4.44.0
Who can help?
@ArthurZucker
Information
- [ ] The official example scripts
- [X] My own modified scripts
Tasks
- [ ] An officially supported task in the
examplesfolder (such as GLUE/SQuAD, ...) - [x] My own task or dataset (give details below)
Reproduction
llama_tokenizer = AutoTokenizer.from_pretrained("../Meta-Llama-3.2-1B-Instruct",padding_side="right")
llama_tokenizer.pad_token = "<|finetune_right_pad_id|>"
llama_model = LlamaForSequenceClassification.from_pretrained(
"../Meta-Llama-3.2-1B-Instruct",
num_labels=1,
torch_dtype=torch.bfloat16
)
class CustomEmbeddingModel_input_embeds(nn.Module):
def __init__(self, original_model,tokenizer):
super(CustomEmbeddingModel_input_embeds, self).__init__()
self.original_model = original_model
def forward(
self,
input_ids: Optional[torch.LongTensor] = None,
attention_mask: Optional[torch.FloatTensor] = None,
past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
labels: Optional[torch.LongTensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
):
if inputs_embeds is None:
inputs_embeds = self.model.model.embed_tokens(input_ids)
return self.original_model(
input_ids=None,
attention_mask=attention_mask,
past_key_values=past_key_values,
inputs_embeds=inputs_embeds,
labels=labels,
use_cache=use_cache,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
llama_model_input_embeds=CustomEmbeddingModel_input_embeds(llama_model,llama_tokenizer)
class CustomEmbeddingModel_input_ids(nn.Module):
def __init__(self, original_model,tokenizer):
super(CustomEmbeddingModel_input_ids, self).__init__()
self.original_model = original_model
def forward(
self,
input_ids: Optional[torch.LongTensor] = None,
attention_mask: Optional[torch.FloatTensor] = None,
past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
labels: Optional[torch.LongTensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
):
if inputs_embeds is None:
inputs_embeds = self.model.model.embed_tokens(input_ids)
return self.original_model(
input_ids=input_ids,
attention_mask=attention_mask,
past_key_values=past_key_values,
inputs_embeds=None
labels=labels,
use_cache=use_cache,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
llama_model_input_ids=CustomEmbeddingModel_input_ids(llama_model,llama_tokenizer)
Expected behavior
https://github.com/huggingface/transformers/blob/3f06f95ebe617b192251ef756518690f5bc7ff76/src/transformers/models/llama/modeling_llama.py#L1314 sequence_lengths is only related to input_ids, when we use inputs_embeds instead, it will be default -1 however, the forward method of LlamaModel doesnt support the input of both input_ids and inputs_embeds