NotImplementedError when using CTransformers AutoTokenizer
env:
transformers ==4.35.2 ctransformers==0.2.27+cu121
from ctransformers import AutoModelForCausalLM, AutoTokenizer
model_name = "/home/me/project/search_engine/text-generation-webui/models/OpenHermes-2.5-Mistral-7B-GGUF/openhermes-2.5-mistral-7b.Q5_K_M.gguf"
def load_model(model_name: str):
model = AutoModelForCausalLM.from_pretrained(model_name, hf=True)
tokenizer = AutoTokenizer.from_pretrained(model)
return model, tokenizer
tokenizer, model = load_model(model_name)
gives output:
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
/project/project/eda.ipynb Cell 13 line 1
7 tokenizer = AutoTokenizer.from_pretrained(model)
9 return model, tokenizer
---> 11 tokenizer, model = load_model(model_name)
/project/project/eda.ipynb Cell 13 line 7
5 def load_model(model_name: str):
6 model = AutoModelForCausalLM.from_pretrained(model_name, hf=True)
----> 7 tokenizer = AutoTokenizer.from_pretrained(model)
9 return model, tokenizer
File ~/project/search_engine/text-generation-webui/installer_files/env/lib/python3.11/site-packages/ctransformers/hub.py:268, in AutoTokenizer.from_pretrained(cls, model)
261 if not isinstance(model, CTransformersModel):
262 raise TypeError(
263 f"Currently `AutoTokenizer.from_pretrained` only accepts a model object. Please use:\n\n"
264 " model = AutoModelForCausalLM.from_pretrained(..., hf=True)\n"
265 " tokenizer = AutoTokenizer.from_pretrained(model)"
266 )
--> 268 return CTransformersTokenizer(model._llm)
File ~/project/search_engine/text-generation-webui/installer_files/env/lib/python3.11/site-packages/ctransformers/transformers.py:84, in CTransformersTokenizer.__init__(self, llm, **kwargs)
83 def __init__(self, llm: LLM, **kwargs):
---> 84 super().__init__(**kwargs)
85 self._llm = llm
File ~/project/search_engine/text-generation-webui/installer_files/env/lib/python3.11/site-packages/transformers/tokenization_utils.py:367, in PreTrainedTokenizer.__init__(self, **kwargs)
363 super().__init__(**kwargs)
365 # 4. If some of the special tokens are not part of the vocab, we add them, at the end.
366 # the order of addition is the same as self.SPECIAL_TOKENS_ATTRIBUTES following `tokenizers`
--> 367 self._add_tokens(
368 [token for token in self.all_special_tokens_extended if token not in self._added_tokens_encoder],
369 special_tokens=True,
370 )
372 self._decode_use_source_tokenizer = False
File ~/project/search_engine/text-generation-webui/installer_files/env/lib/python3.11/site-packages/transformers/tokenization_utils.py:467, in PreTrainedTokenizer._add_tokens(self, new_tokens, special_tokens)
465 return added_tokens
466 # TODO this is fairly slow to improve!
--> 467 current_vocab = self.get_vocab().copy()
468 new_idx = len(current_vocab) # only call this once, len gives the last index + 1
469 for token in new_tokens:
File ~/project/search_engine/text-generation-webui/installer_files/env/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1677, in PreTrainedTokenizerBase.get_vocab(self)
1667 def get_vocab(self) -> Dict[str, int]:
1668 """
1669 Returns the vocabulary as a dictionary of token to index.
1670
(...)
1675 `Dict[str, int]`: The vocabulary.
1676 """
-> 1677 raise NotImplementedError()
NotImplementedError:
Seems to be a similar issue to #154.
Any suggestions on how to resolve this would be appreciated.
Downgrading transformers fixed this for me, pip install --upgrade transformers==4.33
Just for any future folks stumbling upon this issue (as did I);
pip install --upgrade transformers==4.33 works as expected but does bring the outdated transformers with it (pretty obvious).
I ended up just upgrading transformers to latest and using the ctransformers package for the AutoModelForCausalLM and using the transformers package for the AutoTokenizer
Although it works, it pops up the following error when I run:
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper_CUDA_gather)
Here is my code:
model = AutoModelForCausalLM.from_pretrained(model_name, model_file="tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf",
model_type="llama", hf=True)
tokenizer = AutoTokenizer.from_pretrained(model)
terminators = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|assistant|>")]
text_generation_pipeline = pipeline(
model=model, device="cuda",
tokenizer=tokenizer,
task="text-generation",
temperature=0.2,
do_sample=True,
repetition_penalty=1.1,
return_full_text=False,
max_new_tokens=256,
eos_token_id = terminators,
)
llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
llm_chain = LLMChain(llm=llm, prompt=chat_prompt, llm_kwargs={'device': "cuda"})
lm_chain.invoke(input={"user_input" : user_input, "history" : memory.chat_memory.messages},
stop=["<|user|>", "Human:"])['text']