openchat
openchat copied to clipboard
Minimal Hugginface Example for OpenChat
Hi, I am interested in evaluating OpenChat (https://github.com/evalplus/evalplus/issues/60, https://github.com/evalplus/evalplus/issues/61) and want to understand what could be a minimal and self-contained HuggingFace example for me to follow.
cc: @imoneoi
here is my working code in python 3.8.10 ubuntu 20.04 Im using thebloke's quantised version (4Gb) https://huggingface.co/TheBloke/openchat-3.5-0106-GPTQ on rtx3090 but I think it needs far less resources
this version has streaming output and remembers context between turns
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import transformers
from transformers_stream_generator import init_stream_support
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="transformers")
transformers.logging.set_verbosity_error()
init_stream_support()
model_name_or_path = "/home/sujit/Downloads/text-generation-webui-main/models/TheBloke_openchat-3.5-0106-GPTQ"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,device_map="cuda")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
context=""
while True:
prompt=""
while prompt=="":
prompt=input("User: ")
prompt=context+" User: "+prompt+"<|end_of_turn|>\nAssistant: "
input_ids = tokenizer(prompt, return_tensors='pt').input_ids.cuda()
try:
generator = model.generate(inputs=input_ids, temperature=0.1, do_stream=True, do_sample=True, top_p=0.15, top_k=20, max_new_tokens=512, stream=True)
except:
break
output = words = ""
last_tokens = last_decoded_tokens = []
for index, x in enumerate(generator):
if prompt=="": break
tokens = x.cpu().tolist()
tokens = last_tokens + tokens
word = tokenizer.decode(tokens)
if "�" in word:
last_tokens = tokens
else:
if " " in tokenizer.decode(
last_decoded_tokens+tokens,skip_special_tokens=True):
word = " " + word
last_tokens = []
last_decoded_tokens = tokens
if word!="<|end_of_turn|>":
print(word,end="")
output = output+word
else:
context=prompt+output+word
print()
break