FastChat
FastChat copied to clipboard
Support more OpenAI-compatible APIs (embedding, completion)
This PR adds support for a subset of OpenAI API features, including completion, create embeddings, and chat completion. With these changes, users will be able to leverage the local LLM to generate completions, embeddings, and chat completions.
Please start your worker and controller with FastChat API
Try chat completion with
curl http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "vicuna-7b-v1.1",
"messages": [{"role": "user", "content": "Hello, can you tell me a joke for me?"}],
"temperature": 0.5
}'
Try create embedding with
curl http://localhost:8000/v1/create_embeddings \
-H "Content-Type: application/json" \
-d '{
"model": "vicuna-7b-v1.1",
"input": "Hello, can you tell me a joke for me?"
}'
Try text completion with
curl http://localhost:8000/v1/completions \
-H "Content-Type: application/json" \
-d '{
"model": "vicuna-7b-v1.1",
"prompt": "Once upon a time",
"max_tokens": 20,
"temperature": 0.5
}'
Try create_embeddings
to analyze the prompts!
import json
import numpy as np
import requests
from scipy.spatial.distance import cosine
def get_embedding_from_api(word, model='vicuna-7b-v1.1'):
url = 'http://localhost:8000/v1/create_embeddings'
headers = {'Content-Type': 'application/json'}
data = json.dumps({
'model': model,
'input': word
})
response = requests.post(url, headers=headers, data=data)
if response.status_code == 200:
embedding = np.array(response.json()['data'][0]['embedding'])
return embedding
else:
print(f"Error: {response.status_code} - {response.text}")
return None
def cosine_similarity(vec1, vec2):
return 1 - cosine(vec1, vec2)
def print_cosine_similarity(embeddings, texts):
for i in range(len(texts)):
for j in range(i + 1, len(texts)):
sim = cosine_similarity(embeddings[texts[i]], embeddings[texts[j]])
print(f"Cosine similarity between '{texts[i]}' and '{texts[j]}': {sim:.2f}")
texts = [
'The quick brown fox',
'The quick brown dog',
'The fast brown fox',
'A completely different sentence'
]
embeddings = {}
for text in texts:
embeddings[text] = get_embedding_from_api(text)
print_cosine_similarity(embeddings, texts)
create_embeddings
can even do semantic search just like openai apis! see openai semantic search
def search_reviews(df, product_description, n=3, pprint=True):
product_embedding = get_embedding_from_api(
product_description,
)
df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, product_embedding))
results = (
df.sort_values("similarity", ascending=False)
.head(n)
.combined.str.replace("Title: ", "")
.str.replace("; Content:", ": ")
)
if pprint:
for r in results:
print(r[:200])
print()
return results
input_datapath = "data/fine_food_reviews_1k.csv"
df = pd.read_csv(input_datapath, index_col=0)
df = df[["Time", "ProductId", "UserId", "Score", "Summary", "Text"]]
df = df.dropna()
df["combined"] = (
"Title: " + df.Summary.str.strip() + "; Content: " + df.Text.str.strip()
)
top_n = 1000
df = df.sort_values("Time").tail(top_n * 2)
df.drop("Time", axis=1, inplace=True)
df["n_tokens"] = df.combined.apply(lambda x: len(x))
df = df[df.n_tokens <= 500].tail(top_n)
df["embedding"] = df.combined.apply(lambda x: np.array2string(get_embedding_from_api(x), separator=',', max_line_width=np.inf))
results = search_reviews(df, "delicious beans", n=20)
print(results)
Feel free to use create_embedding
to build your own classifier! You can find all examples on the playground.
@suquark please take another look
this is a really cool implementation. waiting for this to unlock.
Very cool! Great work @andy-yang-1 can't wait until this release!
Thank you for this PR! I was busy at that time and missed the PR review. But it is good job!