lorax
lorax copied to clipboard
Stop word is included on phi-2
System Info
When using predibase serverless I see stop words included in the stream. I assumed it is supposed to stop and not include them
Information
- [ ] Docker
- [ ] The CLI directly
Tasks
- [ ] An officially supported command
- [ ] My own modifications
Reproduction
import json
import sys
import time
import requests
predibase_key = ""
tenant = ""
base_url = f"https://serving.app.predibase.com/{tenant}/deployments/v2/llms/phi-2"
# base_url = f"https://serving.app.predibase.com/{tenant}/deployments/v2/llms/llama-3-8b-instruct"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {predibase_key}"
}
request_body = {
"model": "",
"messages": [
{"role": "user", "content": f"Hi how are you?"},
{"role": "assistant", "content": f"I am good. how are you?"},
{"role": "user", "content": f"I am good, are you an a diet today?"},
{"role": "assistant", "content": f"Yes, I am. What would you like to know?"},
{"role": "user", "content": f"What diet is it?"},
],
"stop": ["<|im_end|>"],
"stream": True
}
def run_for_adapter(adapter_id):
body1 = {}
body1.update(request_body)
body1["model"] = adapter_id
start = time.time()
response = requests.post(
f"{base_url}/v1/chat/completions",
headers=headers,
data=json.dumps(body1),
stream=True
)
print(f"{response.status_code} : {adapter_id}")
end = None
for line in response.iter_lines():
if not line:
continue
if not end:
end = time.time()
print(f"TFT: {end - start}")
try:
chunk_str = line.decode('utf-8').split("data:")[-1].strip()
chunk = json.loads(chunk_str)
if "error" in chunk:
print(f"ERROR: {chunk['error']}")
sys.exit(1)
else:
print(chunk["choices"][0]["delta"].get('content', ''), end='')
except:
print(line)
return response
if __name__ == "__main__":
run_for_adapter("")
It is a low-carb diet. Would you like to know more about it?<|im_end|>
Expected behavior
It is a low-carb diet. Would you like to know more about it?