sglang
sglang copied to clipboard
select() on first assistant token broken (in different ways in Mistral and Llama). Likely tokenization issue.
Below is simple code with the output, showing that Llama and Mistral choose a clearly nonsensical first token in select()
depending on whether the assistant message contains a leading space, but each in the opposite way.
@sglang.function
def selected(s):
s += sglang.user('Hi!')
s += sglang.assistant_begin()
s += sglang.gen('var', choices=('Hello', 'Goodbye', '^*B^&A'))
s += sglang.assistant_end()
print(selected.run(backend=runtime).text())
# Mistral: [INST] Hi! [/INST]^*B^&A </s><s>
# LLAMA: [INST] Hi! [/INST]Hello </s><s>
@sglang.function
def selected_space(s):
s += sglang.user('Hi!')
s += sglang.assistant_begin()
s += ' '
s += sglang.gen('var', choices=('Hello', 'Goodbye', '^*B^&A'))
s += sglang.assistant_end()
print(selected_space.run(backend=runtime).text())
# Mistral: [INST] Hi! [/INST] Hello </s><s>
# LLAMA: [INST] Hi! [/INST] ^*B^&A </s><s>
@sglang.function
def freeform(s):
s += sglang.user('Hi!')
s += sglang.assistant_begin()
s += sglang.gen('var', max_tokens=3)
s += sglang.assistant_end()
print(freeform.run(backend=runtime).text())
# Mistral: [INST] Hi! [/INST] Hello! How </s><s>
# LLAMA: [INST] Hi! [/INST] Hello! </s><s>
@sglang.function
def freeformp_space(s):
s += sglang.user('Hi!')
s += sglang.assistant_begin()
s += ' '
s += sglang.gen('var', max_tokens=3)
s += sglang.assistant_end()
print(freeformp_space.run(backend=runtime).text())
# Mistral: [INST] Hi! [/INST] Hello! How </s><s>
# LLAMA: [INST] Hi! [/INST] Hello! It </s><s>