fastllm
fastllm copied to clipboard
pyfastllm 内存泄露
测试脚本:
from typing import List
import json
import time
import os
import sys
cur_dir = os.path.dirname(os.path.abspath(__file__))
fastllm_dir = os.path.join(cur_dir, "fastllm")
sys.path.append(fastllm_dir)
import pyfastllm
glm2_model_path = os.path.join(fastllm_dir, "chatglm2-6b-fp16.flm")
glm2_model = pyfastllm.create_llm(glm2_model_path)
def _receive(idx: int, content_list: List[bytes]):
#print(idx)
pass
def gen_llm_config(llm_config):
_gen_config = pyfastllm.GenerationConfig()
_gen_config.enable_hash_id = True
_gen_config.max_length = llm_config["cfg_max_length"]
_gen_config.top_k = llm_config["cfg_top_k"]
_gen_config.top_p = llm_config["cfg_top_p"]
_gen_config.temperature = llm_config["cfg_temperature"]
_gen_config.repeat_penalty = llm_config["cfg_repeat_penalty"]
_gen_config.last_n = llm_config["cfg_last_n"]
return _gen_config
def response(model, prompt_input, config):
gmask_token_id = 130001
bos_token_id = 130004
input_ids = model.weight.tokenizer.encode(prompt_input)
input_ids = input_ids.to_list()
input_ids.extend([gmask_token_id, bos_token_id])
input_ids = [int(v) for v in input_ids]
# print(input_ids)
handle = model.launch_response(input_ids, config)
continue_token = True
ret_byte = b""
ret_str = ""
while continue_token:
resp_token = model.fetch_response(handle)
continue_token = (resp_token != -1)
content = model.weight.tokenizer.decode_byte([resp_token])
ret_byte += content
ret_str = ret_byte.decode(errors='ignore')
return ret_str
def main():
with open("data.json", "r") as f:
d = json.load(f)
with open("params.json", "r") as f:
d2 = json.load(f)
prompt = d["data"][0]
config = d["config"]
llm_config = gen_llm_config(config)
for i in range(0, 100):
glm2_model.response(prompt, _receive, llm_config)
#time.sleep(1)
print("done 1")
"""
for i in range(0, 100):
ret = response(glm2_model, d2["query"], llm_config)
print(ret)
#time.sleep(1)
print("done 2")
"""
time.sleep(10000000)
if __name__ == "__main__":
main()
使用google-perftools定位FillLLMInputs一直在分配内存但是没有释放,如下图分配30多G:
请问是我的使用方式不对吗