outlines
outlines copied to clipboard
Get the RuntimeError even when I run the examples of the website when using transformers backend.
Describe the issue as clearly as possible:
Hello, I am using (for the first time) the outlines library with transformers backend. Even with the examples of documentation I get RuntimeError. Best.
1977 except Exception as e:
RuntimeError: Failed to import transformers.models.llama.tokenization_llama because of the following error (look up to see its traceback):
No module named 'sentencepiece'
This is the error that I get.
Steps/code to reproduce the bug:
run this code
from pydantic import BaseModel
from outlines import models, generate
class User(BaseModel):
name: str
last_name: str
id: int
model = models.transformers("microsoft/Phi-3-mini-4k-instruct", device="cuda")
generator = generate.json(model, User)
result = generator(
"Create a user profile with the fields name, last_name and id"
)
print(result)
# User(name="John", last_name="Doe", id=11)
Expected result:
To run Smoothly
Error message:
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
File /mnt/bulk-vega/shahin/LLM/projects/MS/Barcelona/envarcelona/lib/python3.11/site-packages/transformers/utils/import_utils.py:1967, in _LazyModule._get_module(self, module_name)
1966 try:
-> 1967 return importlib.import_module("." + module_name, self.__name__)
1968 except Exception as e:
File /usr/lib/python3.11/importlib/__init__.py:126, in import_module(name, package)
125 level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)
File <frozen importlib._bootstrap>:1206, in _gcd_import(name, package, level)
File <frozen importlib._bootstrap>:1178, in _find_and_load(name, import_)
File <frozen importlib._bootstrap>:1149, in _find_and_load_unlocked(name, import_)
File <frozen importlib._bootstrap>:690, in _load_unlocked(spec)
File <frozen importlib._bootstrap_external>:940, in exec_module(self, module)
File <frozen importlib._bootstrap>:241, in _call_with_frames_removed(f, *args, **kwds)
File /mnt/bulk-vega/shahin/LLM/projects/MS/Barcelona/envarcelona/lib/python3.11/site-packages/transformers/models/llama/tokenization_llama.py:27
25 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
---> 27 import sentencepiece as spm
29 from ...convert_slow_tokenizer import import_protobuf
ModuleNotFoundError: No module named 'sentencepiece'
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
Cell In[1], line 13
9 last_name: str
10 id: int
---> 13 model = models.transformers("microsoft/Phi-3-mini-4k-instruct", device="cuda")
14 generator = generate.json(model, User)
15 result = generator(
16 "Create a user profile with the fields name, last_name and id"
17 )
File /mnt/bulk-vega/shahin/LLM/projects/MS/Barcelona/envarcelona/lib/python3.11/site-packages/outlines/models/transformers.py:435, in transformers(model_name, device, model_kwargs, tokenizer_kwargs, model_class, tokenizer_class)
432 tokenizer_kwargs.setdefault("padding_side", "left")
433 tokenizer = tokenizer_class.from_pretrained(model_name, **tokenizer_kwargs)
--> 435 return Transformers(model, tokenizer)
File /mnt/bulk-vega/shahin/LLM/projects/MS/Barcelona/envarcelona/lib/python3.11/site-packages/outlines/models/transformers.py:138, in Transformers.__init__(self, model, tokenizer)
132 def __init__(
133 self,
134 model: "PreTrainedModel",
135 tokenizer: "PreTrainedTokenizer",
136 ):
137 self.model = model
--> 138 self.tokenizer = TransformerTokenizer(tokenizer)
File /mnt/bulk-vega/shahin/LLM/projects/MS/Barcelona/envarcelona/lib/python3.11/site-packages/outlines/models/transformers.py:80, in TransformerTokenizer.__init__(self, tokenizer, **kwargs)
77 self.special_tokens = set(self.tokenizer.all_special_tokens)
79 self.vocabulary = self.tokenizer.get_vocab()
---> 80 self.is_llama = isinstance(self.tokenizer, get_llama_tokenizer_types())
File /mnt/bulk-vega/shahin/LLM/projects/MS/Barcelona/envarcelona/lib/python3.11/site-packages/outlines/models/transformers.py:27, in get_llama_tokenizer_types()
21 """Get all the Llama tokenizer types/classes that need work-arounds.
22
23 When they can't be imported, a dummy class is created.
24
25 """
26 try:
---> 27 from transformers.models.llama import LlamaTokenizer
28 except ImportError:
30 class LlamaTokenizer: # type: ignore
File <frozen importlib._bootstrap>:1231, in _handle_fromlist(module, fromlist, import_, recursive)
File /mnt/bulk-vega/shahin/LLM/projects/MS/Barcelona/envarcelona/lib/python3.11/site-packages/transformers/utils/import_utils.py:1955, in _LazyModule.__getattr__(self, name)
1953 value = Placeholder
1954 elif name in self._class_to_module.keys():
-> 1955 module = self._get_module(self._class_to_module[name])
1956 value = getattr(module, name)
1957 elif name in self._modules:
File /mnt/bulk-vega/shahin/LLM/projects/MS/Barcelona/envarcelona/lib/python3.11/site-packages/transformers/utils/import_utils.py:1969, in _LazyModule._get_module(self, module_name)
1967 return importlib.import_module("." + module_name, self.__name__)
1968 except Exception as e:
-> 1969 raise RuntimeError(
1970 f"Failed to import {self.__name__}.{module_name} because of the following error (look up to see its"
1971 f" traceback):\n{e}"
1972 ) from e
RuntimeError: Failed to import transformers.models.llama.tokenization_llama because of the following error (look up to see its traceback):
No module named 'sentencepiece'
Outlines/Python version information:
Outlines version 0.2.3 Python Version 3.11.2
Context for the issue:
No response
pip install sentencepiece is the quick fix. This should also be addressed by using pip install 'outlines[transformers]', but I don't currently see it in the pyproject extras.
Closing the issue as I can't reproduce with v1. The new syntax for the code snippet provided is the following:
from pydantic import BaseModel
import outlines
import transformers
class User(BaseModel):
name: str
last_name: str
id: int
model = outlines.from_transformers(
transformers.AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct", device_map="cuda"),
transformers.AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct"),
)
generator = outlines.Generator(model, User)
result = generator(
"Create a user profile with the fields name, last_name and id",
max_new_tokens=50
)
print(result)
# { "name": "John", "last_name": "Doe", "id": 3 }