Baichuan-13B
Baichuan-13B copied to clipboard
提问它是不是baichuan-13B,回答却是baichuan-7B,请问是为啥?
1 下载模型
from huggingface_hub import snapshot_download
snapshot_download(repo_id="baichuan-inc/Baichuan-13B-Chat", cache_dir=".")
2 推理模型
from fastapi import FastAPI, Request
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
from transformers.generation.utils import GenerationConfig
import uvicorn, json, datetime
import torch
import os
from typing import Dict, Tuple, Union, Optional
from torch.nn import Module
# 自动设置device_map
def auto_configure_device_map(num_gpus: int):
num_trans_layers = 40
per_gpu_layers = num_trans_layers / num_gpus
device_map = {'model.embed_tokens': 0,
'model.norm': num_gpus-1, 'lm_head': num_gpus-1}
for i in range(num_trans_layers):
device_map[f'model.layers.{i}'] = int(i//per_gpu_layers)
return device_map
# GPU数量
NUM_GPUS = torch.cuda.device_count() if torch.cuda.is_available() else None
# device_map
device_map = auto_configure_device_map(NUM_GPUS) if NUM_GPUS>0 else None
device = torch.device("cuda") if NUM_GPUS>0 else torch.device("cpu")
device_dtype = torch.half if NUM_GPUS>0 else torch.float
# 显存回收
def torch_gc():
if torch.cuda.is_available():
with torch.cuda.device(device):
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
app = FastAPI()
@app.post("/")
async def create_item(request: Request):
global model, tokenizer
json_post_raw = await request.json()
json_post = json.dumps(json_post_raw)
json_post_list = json.loads(json_post)
prompt = json_post_list.get('prompt')
history = json_post_list.get('history')
max_length = json_post_list.get('max_length')
top_p = json_post_list.get('top_p')
temperature = json_post_list.get('temperature')
messages = []
messages.append({"role": "user", "content": prompt})
response = model.chat(tokenizer, messages)
now = datetime.datetime.now()
time = now.strftime("%Y-%m-%d %H:%M:%S")
#response = tokenizer.decode(pred.cpu()[0], skip_special_tokens=True)
answer = {
"response": response,
"status": 200,
"time": time
}
log = "[" + time + "] " + '", prompt:"' + prompt + '", response:"' + repr(response) + '"'
print(log)
torch_gc()
return answer
if __name__ == '__main__':
model_dir = "./baichuan-inc--Baichuan-13B-Chat/snapshots/d0a98e13222c6e82d24062f60ff491519e249744"
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_dir, trust_remote_code=True, device_map=device_map, torch_dtype=torch.float16)
model.generation_config = GenerationConfig.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir, use_fast=False, trust_remote_code=True)
print(model)
model.eval()
uvicorn.run(app, host='0.0.0.0', port=8080, workers=1)
3 访问模型
curl --location 'http://localhost:8080' \
--header 'Content-Type: application/json' \
--data '{"prompt": "你是baichuan-13b吗?", "history": []}'
{"response":"不是,我是Baichuan-7B,一个人工智能程序,可以在多个任务中提供帮助,包括但不限于回答问题、提供建议、生成代码和解释算法。","status":200,"time":"2023-07-11 17:40:08"
4 服务日志
[2023-07-11 17:40:08] ", prompt:"你是baichuan-13b吗?", response:"'不是,我是Baichuan-7B,一个人工智能程序,可以在多个任务中提供帮助,包括但不限于回答问题、提供建议、生成代码和解释算法。'"
INFO: xx.xx.xx.xx:32134 - "POST / HTTP/1.1" 200 OK
所以你需要用模型编辑把他改过来 https://github.com/hiyouga/FastEdit
所以你需要用模型编辑把他改过来 https://github.com/hiyouga/FastEdit
这个链接我使用了,跟着编辑后,怎么保存编辑过的模型啊,我model.save_pretrain()后的模型,进行推理时报错RuntimeError: probability tensor contains either inf, nan or element < 0
问他:你是baichuan-7b吗? 回答:不是,我是Baichuan-13B,一款由百川智能的工程师们创造的人工智能程序。我的研究包含自然语言处理、机器学习、计算机科学等多个领域。我被设计用来回答问题、聊天互动、文本创作、逻辑推理、数学计算、代码生成等智能需求。
j结论,语言模型就就这样。
是的,有时会有错误回复