optimum
optimum copied to clipboard
ValueError: Required inputs (['token_type_ids']) are missing from input feed (['input_ids', 'attention_mask']).
System Info
- optimum: 1.13.2
- platform: Ubuntu 20.04
- python: 3.11.4
Who can help?
@JingyaHuang @echarlaix
Information
- [ ] The official example scripts
- [X] My own modified scripts
Tasks
- [X] An officially supported task in the
examples
folder (such as GLUE/SQuAD, ...) - [ ] My own task or dataset (give details below)
Reproduction (minimal, reproducible, runnable)
First export onnx model:
$ optimum-cli export onnx --model my_model/ --task text-classification --device cpu onnx_model/
Framework not specified. Using pt to export to ONNX.
Using the export variant default. Available variants are:
- default: The default ONNX variant.
Using framework PyTorch: 2.0.1
Overriding 1 configuration item(s)
- use_cache -> False
================ Diagnostic Run torch.onnx.export version 2.0.1 ================
verbose: False, log level: Level.ERROR
======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================
Post-processing the exported models...
Deduplicating shared (tied) weights...
Validating models in subprocesses...
Validating ONNX model onnx_model/model.onnx...
-[✓] ONNX model output names match reference model (logits)
- Validating ONNX Model output "logits":
-[✓] (2, 2) matches (2, 2)
-[✓] all values close (atol: 0.0001)
The ONNX export succeeded and the exported model was saved at: onnx_model
Then run:
import time
from pathlib import Path
import torch
from loguru import logger
from transformers import XLMRobertaTokenizer
from optimum.onnxruntime import ORTModelForSequenceClassification
def run():
logits = []
batch_size = 32
start = time.time()
for start_index in range(0, len(texts), batch_size):
# tokenize
encoded = tokenizer(
texts[start_index : start_index + batch_size], # type: ignore
padding="longest",
truncation=True,
return_tensors="pt",
max_length=512,
)
batch_logits = model(**encoded).logits # shape: (batch_size, num_labels)
logits.append(batch_logits)
logits = torch.cat(logits, dim=0) # shape: (texts_len, num_labels)
pred_probs = logits.softmax(dim=-1) # shape: (texts_len, num_labels)
logger.info(
f"inference speed: {len(texts) / (time.time() - start):.2f} texts/s, {len(texts)} texts total."
)
return pred_probs
if __name__ == "__main__":
device = "cpu"
texts = Path("data.txt").read_text(encoding="utf8").splitlines()
BASE_MODEL = "microsoft/Multilingual-MiniLM-L12-H384"
MODELPATH = "onnx_model/"
tokenizer = XLMRobertaTokenizer.from_pretrained(BASE_MODEL, use_fast=True)
model = ORTModelForSequenceClassification.from_pretrained(MODELPATH)
id2label = model.config.id2label
label_list = list(id2label.values())
run1 = run()
run2 = run()
logger.info(torch.equal(run1, run2))
The model can't be accessed publicly.
Expected behavior
Complete without error.
I got similar issue from accessing a fine-tuned version of gte-small
model ValueError: Required inputs (['segment_ids']) are missing from input feed (['input_ids', 'token_type_ids', 'attention_mask']).
when trying to do model(tokens)
from a created onnxruntime.InferenceSession
model.
The error
ValueError: Required inputs (['token_type_ids']) are missing
from input feed (['input_ids', 'attention_mask'])
Means that currently your input feed contains keys input_ids
and attention_mask
, but is missing token_type_ids
. You can see the complete list of required inputs by inspecting the onnxruntime session
import onnxruntime
sess = onnxruntime.InferenceSession("model.onnx")
print([inp.name for inp in sess.get_inputs()])
Within Hugging Face, the session appears to be abstracted inside ORTModelForSequenceClassification
, so I can't say exactly how to access it in this situation
I have the same error with exporting model multilingual-e5-small
to onnx finetuned for token classification task
Here the my trace:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[5], line 1
----> 1 pipe('Apple iPhone 14 Pro 256GB')
File ~/gpt/lib/python3.10/site-packages/transformers/pipelines/token_classification.py:249, in TokenClassificationPipeline.__call__(self, inputs, **kwargs)
246 if offset_mapping:
247 kwargs["offset_mapping"] = offset_mapping
--> 249 return super().__call__(inputs, **kwargs)
File ~/gpt/lib/python3.10/site-packages/transformers/pipelines/base.py:1111, in Pipeline.__call__(self, inputs, num_workers, batch_size, *args, **kwargs)
1109 return self.iterate(inputs, preprocess_params, forward_params, postprocess_params)
1110 elif self.framework == "pt" and isinstance(self, ChunkPipeline):
-> 1111 return next(
1112 iter(
1113 self.get_iterator(
1114 [inputs], num_workers, batch_size, preprocess_params, forward_params, postprocess_params
1115 )
1116 )
1117 )
1118 else:
1119 return self.run_single(inputs, preprocess_params, forward_params, postprocess_params)
File ~/gpt/lib/python3.10/site-packages/transformers/pipelines/pt_utils.py:124, in PipelineIterator.__next__(self)
121 return self.loader_batch_item()
123 # We're out of items within a batch
--> 124 item = next(self.iterator)
125 processed = self.infer(item, **self.params)
126 # We now have a batch of "inferred things".
File ~/gpt/lib/python3.10/site-packages/transformers/pipelines/pt_utils.py:266, in PipelinePackIterator.__next__(self)
263 return accumulator
265 while not is_last:
--> 266 processed = self.infer(next(self.iterator), **self.params)
267 if self.loader_batch_size is not None:
268 if isinstance(processed, torch.Tensor):
File ~/gpt/lib/python3.10/site-packages/transformers/pipelines/base.py:1025, in Pipeline.forward(self, model_inputs, **forward_params)
1023 with inference_context():
1024 model_inputs = self._ensure_tensor_on_device(model_inputs, device=self.device)
-> 1025 model_outputs = self._forward(model_inputs, **forward_params)
1026 model_outputs = self._ensure_tensor_on_device(model_outputs, device=torch.device("cpu"))
1027 else:
File ~/gpt/lib/python3.10/site-packages/transformers/pipelines/token_classification.py:286, in TokenClassificationPipeline._forward(self, model_inputs)
284 logits = self.model(**model_inputs)[0]
285 else:
--> 286 output = self.model(**model_inputs)
287 logits = output["logits"] if isinstance(output, dict) else output[0]
289 return {
290 "logits": logits,
291 "special_tokens_mask": special_tokens_mask,
(...)
295 **model_inputs,
296 }
File ~/gpt/lib/python3.10/site-packages/optimum/modeling_base.py:91, in OptimizedModel.__call__(self, *args, **kwargs)
90 def __call__(self, *args, **kwargs):
---> 91 return self.forward(*args, **kwargs)
File ~/gpt/lib/python3.10/site-packages/optimum/onnxruntime/modeling_ort.py:1480, in ORTModelForTokenClassification.forward(self, input_ids, attention_mask, token_type_ids, **kwargs)
1477 onnx_inputs["token_type_ids"] = token_type_ids
1479 # run inference
-> 1480 outputs = self.model.run(None, onnx_inputs)
1481 logits = outputs[self.output_names["logits"]]
1483 if use_torch:
File ~/gpt/lib/python3.10/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py:216, in Session.run(self, output_names, input_feed, run_options)
202 def run(self, output_names, input_feed, run_options=None):
203 """
204 Compute the predictions.
205
(...)
214 sess.run([output_name], {input_name: x})
215 """
--> 216 self._validate_input(list(input_feed.keys()))
217 if not output_names:
218 output_names = [output.name for output in self._outputs_meta]
File ~/gpt/lib/python3.10/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py:198, in Session._validate_input(self, feed_input_names)
196 missing_input_names.append(input.name)
197 if missing_input_names:
--> 198 raise ValueError(
199 f"Required inputs ({missing_input_names}) are missing from input feed ({feed_input_names})."
200 )
ValueError: Required inputs (['token_type_ids']) are missing from input feed (['input_ids', 'attention_mask']).
@JingyaHuang @echarlaix I think it's maybe problem with inference token_type_ids and attention_mask