5秒的视频耗时5分钟,32秒的视频27分钟 14秒的视频生成用了10分钟。请问如何提高视频生成速度?
5秒的视频耗时5分钟,32秒的视频27分钟 14秒的视频生成用了10分钟。请问如何提高视频生成速度? 用的独立服务器,GPU 24G,内存503G,CPU AMD EPYC 7742 64-Core Processor 主要 Face Enhancer比较费时, Face Renderer:: 98%|█████████▊| 161/164 [00:52<00:01, 2.97it/s] Face Renderer:: 99%|█████████▉| 162/164 [00:52<00:00, 2.99it/s] Face Renderer:: 99%|█████████▉| 163/164 [00:52<00:00, 3.03it/s] Face Renderer:: 100%|██████████| 164/164 [00:53<00:00, 2.96it/s] Face Renderer:: 100%|██████████| 164/164 [00:53<00:00, 3.08it/s] seamlessClone:: 99%|█████████▉| 811/818 [02:38<00:01, 5.16it/s] seamlessClone:: 99%|█████████▉| 812/818 [02:38<00:01, 5.11it/s] seamlessClone:: 99%|█████████▉| 813/818 [02:38<00:00, 5.01it/s] seamlessClone:: 100%|█████████▉| 814/818 [02:38<00:00, 5.12it/s] seamlessClone:: 100%|█████████▉| 815/818 [02:39<00:00, 5.11it/s] seamlessClone:: 100%|█████████▉| 816/818 [02:39<00:00, 5.16it/s] seamlessClone:: 100%|█████████▉| 817/818 [02:39<00:00, 5.12it/s] seamlessClone:: 100%|██████████| 818/818 [02:39<00:00, 5.15it/s] seamlessClone:: 100%|██████████| 818/818 [02:39<00:00, 5.12it/s] Face Enhancer:: 23%|██▎ | 187/818 [04:09<14:12, 1.35s/it] Face Enhancer:: 23%|██▎ | 188/818 [04:10<13:55, 1.33s/it]
我不知道你用的什么显卡,我之前测试用的tesla p4 ,face render基本在2.xx,然后换了3090,依然2.xx,不明道理
face render可以通过设置batch_size加快。 seamlessClone可以通过增加线程池的办法并行处理加快, src/utils/paste_pic.py
tmp_path = str(uuid.uuid4())+'.mp4'
# out_tmp = cv2.VideoWriter(tmp_path, cv2.VideoWriter_fourcc(*'MP4V'), fps, (frame_w, frame_h))
# for crop_frame in tqdm(crop_frames, 'seamlessClone:'):
# p = cv2.resize(crop_frame.astype(np.uint8), (ox2-ox1, oy2 - oy1))
# mask = 255*np.ones(p.shape, p.dtype)
# location = ((ox1+ox2) // 2, (oy1+oy2) // 2)
# gen_img = cv2.seamlessClone(p, full_img, mask, location, cv2.NORMAL_CLONE)
# out_tmp.write(gen_img)
# 自定义修改开始
def process_image(crop_frame):
p = cv2.resize(crop_frame.astype(np.uint8), (ox2-ox1, oy2 - oy1))
mask = 255*np.ones(p.shape, p.dtype)
location = ((ox1+ox2) // 2, (oy1+oy2) // 2)
gen_img = cv2.seamlessClone(p, full_img, mask, location, cv2.NORMAL_CLONE)
return gen_img
tmp_path = str(uuid.uuid4())+'.mp4'
out_tmp = cv2.VideoWriter(tmp_path, cv2.VideoWriter_fourcc(*'MP4V'), fps, (frame_w, frame_h))
processed_frames = [] # 存储处理后的图像
# 创建线程池
# 指定线程池的最大线程数
max_threads = 5
# 创建线程池并设置max_workers参数
with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
# 提交任务并获取处理结果
processed_frames = []
for gen_img in tqdm(executor.map(process_image, crop_frames), total=len(crop_frames) ,desc='seamlessClone:'):
processed_frames.append(gen_img)
# 一次将所有处理后的图像写入视频文件
for frame in processed_frames:
out_tmp.write(frame)
# 自定义修改结束
face enhancer没找到加速办法,用线程池没成功。用下面注释掉的代码并行会报错,有没有对python研究比较深入的看下如何加速enhancer的步骤。 src/utils/face_enhancer.py
------------------------ restore ------------------------
for idx in tqdm(range(len(images)), 'Face Enhancer:'):
img = cv2.cvtColor(images[idx], cv2.COLOR_RGB2BGR)
# restore faces and background if necessary
cropped_faces, restored_faces, r_img = restorer.enhance(
img,
has_aligned=False,
only_center_face=False,
paste_back=True)
r_img = cv2.cvtColor(r_img, cv2.COLOR_BGR2RGB)
yield r_img
# max_threads = 5
# def process_image(idx):
# print('idx',idx)
# img = cv2.cvtColor(images[idx], cv2.COLOR_RGB2BGR)
# # restore faces and background if necessary
# cropped_faces, restored_faces, r_img = restorer.enhance(
# img,
# has_aligned=False,
# only_center_face=False,
# paste_back=True)
# r_img = cv2.cvtColor(r_img, cv2.COLOR_BGR2RGB)
# return r_img
# with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
# # 提交任务并获取生成器
# process_images = tqdm(executor.map(process_image, range(len(images))), total=len(images) ,desc='Face Enhancer:')
# for gen_img in process_images:
# yield gen_img
face_enhancer 速度非常慢,成为限制整个项目的瓶颈, 项目owner 有关注这个问题吗? 腾讯的GPFGAN 推理优化上,有没有谁有好的实践经验? 我的环境配置是:ubuntu 22.04 LTS, RTX 4090, 64G ddr4 2666mhz 内存。
这么夸张吗,一般用5分钟合成1分钟,还不错啦
AMD的显卡能运行嘛? 现在都是cpu在跑太慢了
@newstargo 你是什么配置呢
主要是显卡问题,没有匹配的显卡,全用CPU模式跑是真的慢
face render可以通过设置batch_size加快。 seamlessClone可以通过增加线程池的办法并行处理加快, src/utils/paste_pic.py
tmp_path = str(uuid.uuid4())+'.mp4'
# out_tmp = cv2.VideoWriter(tmp_path, cv2.VideoWriter_fourcc(*'MP4V'), fps, (frame_w, frame_h)) # for crop_frame in tqdm(crop_frames, 'seamlessClone:'): # p = cv2.resize(crop_frame.astype(np.uint8), (ox2-ox1, oy2 - oy1)) # mask = 255*np.ones(p.shape, p.dtype) # location = ((ox1+ox2) // 2, (oy1+oy2) // 2) # gen_img = cv2.seamlessClone(p, full_img, mask, location, cv2.NORMAL_CLONE) # out_tmp.write(gen_img) # 自定义修改开始 def process_image(crop_frame): p = cv2.resize(crop_frame.astype(np.uint8), (ox2-ox1, oy2 - oy1)) mask = 255*np.ones(p.shape, p.dtype) location = ((ox1+ox2) // 2, (oy1+oy2) // 2) gen_img = cv2.seamlessClone(p, full_img, mask, location, cv2.NORMAL_CLONE) return gen_img tmp_path = str(uuid.uuid4())+'.mp4' out_tmp = cv2.VideoWriter(tmp_path, cv2.VideoWriter_fourcc(*'MP4V'), fps, (frame_w, frame_h)) processed_frames = [] # 存储处理后的图像 # 创建线程池 # 指定线程池的最大线程数 max_threads = 5 # 创建线程池并设置max_workers参数 with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor: # 提交任务并获取处理结果 processed_frames = [] for gen_img in tqdm(executor.map(process_image, crop_frames), total=len(crop_frames) ,desc='seamlessClone:'): processed_frames.append(gen_img) # 一次将所有处理后的图像写入视频文件 for frame in processed_frames: out_tmp.write(frame) # 自定义修改结束face enhancer没找到加速办法,用线程池没成功。用下面注释掉的代码并行会报错,有没有对python研究比较深入的看下如何加速enhancer的步骤。 src/utils/face_enhancer.py
------------------------ restore ------------------------
for idx in tqdm(range(len(images)), 'Face Enhancer:'): img = cv2.cvtColor(images[idx], cv2.COLOR_RGB2BGR) # restore faces and background if necessary cropped_faces, restored_faces, r_img = restorer.enhance( img, has_aligned=False, only_center_face=False, paste_back=True) r_img = cv2.cvtColor(r_img, cv2.COLOR_BGR2RGB) yield r_img # max_threads = 5 # def process_image(idx): # print('idx',idx) # img = cv2.cvtColor(images[idx], cv2.COLOR_RGB2BGR) # # restore faces and background if necessary # cropped_faces, restored_faces, r_img = restorer.enhance( # img, # has_aligned=False, # only_center_face=False, # paste_back=True) # r_img = cv2.cvtColor(r_img, cv2.COLOR_BGR2RGB) # return r_img # with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor: # # 提交任务并获取生成器 # process_images = tqdm(executor.map(process_image, range(len(images))), total=len(images) ,desc='Face Enhancer:') # for gen_img in process_images: # yield gen_img
请问可以详细解释一下吗?感谢!!