lorax icon indicating copy to clipboard operation
lorax copied to clipboard

Stop word is included on phi-2

Open yunmanger1 opened this issue 7 months ago • 0 comments

System Info

When using predibase serverless I see stop words included in the stream. I assumed it is supposed to stop and not include them

Information

  • [ ] Docker
  • [ ] The CLI directly

Tasks

  • [ ] An officially supported command
  • [ ] My own modifications

Reproduction

import json
import sys
import time
import requests

predibase_key = ""
tenant = ""
base_url = f"https://serving.app.predibase.com/{tenant}/deployments/v2/llms/phi-2"
# base_url = f"https://serving.app.predibase.com/{tenant}/deployments/v2/llms/llama-3-8b-instruct"

headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {predibase_key}"
}

request_body = {
    "model": "",
    "messages": [
        {"role": "user", "content": f"Hi how are you?"},
        {"role": "assistant", "content": f"I am good. how are you?"},
        {"role": "user", "content": f"I am good, are you an a diet today?"},
        {"role": "assistant", "content": f"Yes, I am. What would you like to know?"},
        {"role": "user", "content": f"What diet is it?"},

    ],
    "stop": ["<|im_end|>"],
    "stream": True
}


def run_for_adapter(adapter_id):
    body1 = {}
    body1.update(request_body)
    body1["model"] = adapter_id

    start = time.time()
    response = requests.post(
        f"{base_url}/v1/chat/completions",
        headers=headers,
        data=json.dumps(body1),
        stream=True
    )
    print(f"{response.status_code} : {adapter_id}")
    end = None
    for line in response.iter_lines():
        if not line:
            continue
        if not end:
            end = time.time()
            print(f"TFT: {end - start}")

        try:
            chunk_str = line.decode('utf-8').split("data:")[-1].strip()
            chunk = json.loads(chunk_str)
            if "error" in chunk:
                print(f"ERROR: {chunk['error']}")
                sys.exit(1)
            else:
                print(chunk["choices"][0]["delta"].get('content', ''), end='')
        except:
            print(line)

    return response


if __name__ == "__main__":
    run_for_adapter("")

It is a low-carb diet. Would you like to know more about it?<|im_end|>

Expected behavior

It is a low-carb diet. Would you like to know more about it?

yunmanger1 avatar Jul 11 '24 23:07 yunmanger1