guidance
guidance copied to clipboard
Multi-token healing off-by-one in block output?
import guidance
import transformers
class Transformer(guidance.llms.Transformers):
@staticmethod
def role_start(role):
return role + ':'
@staticmethod
def role_end(role):
return '</s>'
def test_healing_prefix():
model_str = 'TheBloke/vicuna-7B-1.1-HF'
model = transformers.AutoModelForCausalLM.from_pretrained(model_str, load_in_8bit=True, device_map='auto')
tokenizer = transformers.AutoTokenizer.from_pretrained(model_str, use_fast=False)
program = guidance(
"""{{#system}}You are a helpful assistant.{{/system}}
{{~#user}}What is the capital of the USA?{{/user}}
{{~#assistant}}{{~gen 'response' max_tokens=20}}{{/assistant}}""",
llm=Transformer(model, tokenizer)
)
output = program()
print(output["response"])
assert not output["response"].startswith(':')
Save the above to guidance/tests/test_healing.py and run pytest -rx tests/test_healing.py
E AssertionError: assert not True
E + where True = <built-in method startswith of str object at 0x7fe024f410b0>(':')
E + where <built-in method startswith of str object at 0x7fe024f410b0> = ': The capital of the USA is Washington D.C'.startswith
The test passes before 0185eb1, eg
git checkout 0185eb1^
I could not reproduce this on master. But I'll try again after I finish the grammar cleanup I am working on.
Hmm I'm still seeing this on main (commit 79fd06a72c9d5c6c98a1864b43e417335c580f59)