InternLM-XComposer
InternLM-XComposer copied to clipboard
视频理解是否可以使用4bit模型吗?
请问视频理解可以使用4bit量化的模型吗?我看文档例子使用的是全量模型,我尝试使用4bit模型提示显存不够,请问一般情况下需要多少显存?
ps: 测试理解单张图像,使用4bit模型是没问题的。
视频理解的代码:
from lmdeploy import pipeline, GenerationConfig from transformers.dynamic_module_utils import get_class_from_dynamic_module
from lmdeploy import TurbomindEngineConfig, pipeline from lmdeploy.vl import load_image
HF_MODEL = 'internlm/internlm-xcomposer2d5-7b' load_video = get_class_from_dynamic_module('ixc_utils.load_video', HF_MODEL) frame2img = get_class_from_dynamic_module('ixc_utils.frame2img', HF_MODEL) Video_transform = get_class_from_dynamic_module('ixc_utils.Video_transform', HF_MODEL) get_font = get_class_from_dynamic_module('ixc_utils.get_font', HF_MODEL)
video = load_video('/home/ma/work/InternLM-XComposer/test.mp4') # https://github.com/InternLM/InternLM-XComposer/raw/main/examples/liuxiang.mp4 img = frame2img(video, get_font()) img = Video_transform(img)
engine_config = TurbomindEngineConfig(model_format='awq', tp=2, cache_max_entry_count=0.5) pipe = pipeline('internlm/internlm-xcomposer2d5-7b-4bit', backend_config=engine_config) gen_config = GenerationConfig(top_k=50, top_p=0.8, temperature=1.0) query = 'Here are some frames of a video. Describe this video in detail 输出中文' sess = pipe.chat((query, img), gen_config=gen_config) print(sess.response.text)
#query = 'tell me the athlete code of Liu Xiang' #sess = pipe.chat(query, session=sess, gen_config=gen_config) #print(sess.response.text)