panml
panml copied to clipboard
tiiuae/falcon-7b-instruct : ValueError: The following `model_kwargs` are not used by the model: ['token_type_ids']
In [7]: x=LLM()
tiiuae/falcon-7b-instruct
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:06<00:00, 3.31s/it]
Model processing is set on CPU.
In [8]: x.lm.predict('hi')
/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1201: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation)
warnings.warn(
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-8-421d79812622> in <module>
----> 1 x.lm.predict('hi')
/usr/local/lib/python3.10/dist-packages/panml/llm/huggingface.py in predict(self, text, max_length, skip_special_tokens, display_probability, num_return_sequences, temperature, top_p, top_k, no_repeat_ngram_size, prompt_modifier, keep_history)
285
286 # Call model for text generation
--> 287 output_context = self._predict(context, max_length=max_length, skip_special_tokens=skip_special_tokens,
288 display_probability=display_probability, num_return_sequences=num_return_sequences,
289 temperature=temperature, top_p=top_p, top_k=top_k, no_repeat_ngram_size=no_repeat_ngram_size)
/usr/local/lib/python3.10/dist-packages/panml/llm/huggingface.py in _predict(self, text, max_length, skip_special_tokens, display_probability, num_return_sequences, temperature, top_p, top_k, no_repeat_ngram_size)
190
191 input_batch = self.tokenizer(text, return_tensors='pt').to(torch.device(self.device))
--> 192 output = self.model_hf.generate(**input_batch,
193 max_length=max_length,
194 pad_token_id=self.model_hf.config.eos_token_id,
/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py in decorate_context(*args, **kwargs)
113 def decorate_context(*args, **kwargs):
114 with ctx_factory():
--> 115 return func(*args, **kwargs)
116
117 return decorate_context
/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py in generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, **kwargs)
1211 model_kwargs = generation_config.update(**kwargs) # All unused kwargs must be model kwargs
1212 generation_config.validate()
-> 1213 self._validate_model_kwargs(model_kwargs.copy())
1214
1215 # 2. Set generation parameters if not already defined
/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py in _validate_model_kwargs(self, model_kwargs)
1103
1104 if unused_model_args:
-> 1105 raise ValueError(
1106 f"The following `model_kwargs` are not used by the model: {unused_model_args} (note: typos in the"
1107 " generate arguments will also show up in this list)"
ValueError: The following `model_kwargs` are not used by the model: ['token_type_ids'] (note: typos in the generate arguments will also show up in this list)
In [9]: