5秒的视频耗时5分钟，32秒的视频27分钟 14秒的视频生成用了10分钟。请问如何提高视频生成速度？用的独立服务器，GPU 24G，内存503G，CPU AMD EPYC 7742 64-Core Processor 主要 Face Enhancer比较费时, Face Renderer:: 98%|█████████▊| 161/164 [00:52<00:01, 2.97it/s] Face Renderer:: 99%|█████████▉| 162/164 [00:52<00:00, 2.99it/s] Face Renderer:: 99%|█████████▉| 163/164 [00:52<00:00, 3.03it/s] Face Renderer:: 100%|██████████| 164/164 [00:53<00:00, 2.96it/s] Face Renderer:: 100%|██████████| 164/164 [00:53<00:00, 3.08it/s] seamlessClone:: 99%|█████████▉| 811/818 [02:38<00:01, 5.16it/s] seamlessClone:: 99%|█████████▉| 812/818 [02:38<00:01, 5.11it/s] seamlessClone:: 99%|█████████▉| 813/818 [02:38<00:00, 5.01it/s] seamlessClone:: 100%|█████████▉| 814/818 [02:38<00:00, 5.12it/s] seamlessClone:: 100%|█████████▉| 815/818 [02:39<00:00, 5.11it/s] seamlessClone:: 100%|█████████▉| 816/818 [02:39<00:00, 5.16it/s] seamlessClone:: 100%|█████████▉| 817/818 [02:39<00:00, 5.12it/s] seamlessClone:: 100%|██████████| 818/818 [02:39<00:00, 5.15it/s] seamlessClone:: 100%|██████████| 818/818 [02:39<00:00, 5.12it/s] Face Enhancer:: 23%|██▎ | 187/818 [04:09<14:12, 1.35s/it] Face Enhancer:: 23%|██▎ | 188/818 [04:10<13:55, 1.33s/it]

Jul 20 '23 13:07 Laixinsz

我不知道你用的什么显卡，我之前测试用的tesla p4 ，face render基本在2.xx，然后换了3090，依然2.xx，不明道理

Jul 21 '23 17:07 myrisc

face render可以通过设置batch_size加快。 seamlessClone可以通过增加线程池的办法并行处理加快， src/utils/paste_pic.py

tmp_path = str(uuid.uuid4())+'.mp4'

# out_tmp = cv2.VideoWriter(tmp_path, cv2.VideoWriter_fourcc(*'MP4V'), fps, (frame_w, frame_h))
# for crop_frame in tqdm(crop_frames, 'seamlessClone:'):
#     p = cv2.resize(crop_frame.astype(np.uint8), (ox2-ox1, oy2 - oy1)) 

#     mask = 255*np.ones(p.shape, p.dtype)
#     location = ((ox1+ox2) // 2, (oy1+oy2) // 2)
#     gen_img = cv2.seamlessClone(p, full_img, mask, location, cv2.NORMAL_CLONE)
#     out_tmp.write(gen_img)
# 自定义修改开始
def process_image(crop_frame):
    p = cv2.resize(crop_frame.astype(np.uint8), (ox2-ox1, oy2 - oy1)) 

    mask = 255*np.ones(p.shape, p.dtype)
    location = ((ox1+ox2) // 2, (oy1+oy2) // 2)
    gen_img = cv2.seamlessClone(p, full_img, mask, location, cv2.NORMAL_CLONE)

    return gen_img
tmp_path = str(uuid.uuid4())+'.mp4'
out_tmp = cv2.VideoWriter(tmp_path, cv2.VideoWriter_fourcc(*'MP4V'), fps, (frame_w, frame_h))

processed_frames = []  # 存储处理后的图像

# 创建线程池
# 指定线程池的最大线程数
max_threads = 5

# 创建线程池并设置max_workers参数
with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
    # 提交任务并获取处理结果
    processed_frames = []
    for gen_img in tqdm(executor.map(process_image, crop_frames), total=len(crop_frames) ,desc='seamlessClone:'):
        processed_frames.append(gen_img)

# 一次将所有处理后的图像写入视频文件
for frame in processed_frames:
    out_tmp.write(frame)
# 自定义修改结束

face enhancer没找到加速办法，用线程池没成功。用下面注释掉的代码并行会报错，有没有对python研究比较深入的看下如何加速enhancer的步骤。 src/utils/face_enhancer.py

------------------------ restore ------------------------

for idx in tqdm(range(len(images)), 'Face Enhancer:'):
    
    img = cv2.cvtColor(images[idx], cv2.COLOR_RGB2BGR)
    
    # restore faces and background if necessary
    cropped_faces, restored_faces, r_img = restorer.enhance(
        img,
        has_aligned=False,
        only_center_face=False,
        paste_back=True)
    
    r_img = cv2.cvtColor(r_img, cv2.COLOR_BGR2RGB)
    yield r_img
# max_threads = 5
# def process_image(idx):
#     print('idx',idx)
#     img = cv2.cvtColor(images[idx], cv2.COLOR_RGB2BGR)
   
#     # restore faces and background if necessary
#     cropped_faces, restored_faces, r_img = restorer.enhance(
#         img,
#         has_aligned=False,
#         only_center_face=False,
#         paste_back=True)

#     r_img = cv2.cvtColor(r_img, cv2.COLOR_BGR2RGB)
#     return r_img
# with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
#     # 提交任务并获取生成器
#     process_images = tqdm(executor.map(process_image, range(len(images))), total=len(images) ,desc='Face Enhancer:')
#     for gen_img in process_images:
#         yield gen_img

Jul 22 '23 05:07 Laixinsz

face_enhancer 速度非常慢，成为限制整个项目的瓶颈，项目owner 有关注这个问题吗？腾讯的GPFGAN 推理优化上，有没有谁有好的实践经验？我的环境配置是：ubuntu 22.04 LTS, RTX 4090, 64G ddr4 2666mhz 内存。

Aug 01 '23 03:08 arispeng

这么夸张吗，一般用5分钟合成1分钟，还不错啦

Aug 06 '23 08:08 newstargo

AMD的显卡能运行嘛？现在都是cpu在跑太慢了

Aug 09 '23 05:08 xyyyuuan

@newstargo 你是什么配置呢

Sep 23 '23 14:09 shanchuan0999

主要是显卡问题，没有匹配的显卡，全用CPU模式跑是真的慢

Jan 22 '25 08:01 FunkZero

face render可以通过设置batch_size加快。 seamlessClone可以通过增加线程池的办法并行处理加快， src/utils/paste_pic.py

tmp_path = str(uuid.uuid4())+'.mp4'

# out_tmp = cv2.VideoWriter(tmp_path, cv2.VideoWriter_fourcc(*'MP4V'), fps, (frame_w, frame_h))
# for crop_frame in tqdm(crop_frames, 'seamlessClone:'):
#     p = cv2.resize(crop_frame.astype(np.uint8), (ox2-ox1, oy2 - oy1)) 

#     mask = 255*np.ones(p.shape, p.dtype)
#     location = ((ox1+ox2) // 2, (oy1+oy2) // 2)
#     gen_img = cv2.seamlessClone(p, full_img, mask, location, cv2.NORMAL_CLONE)
#     out_tmp.write(gen_img)
# 自定义修改开始
def process_image(crop_frame):
    p = cv2.resize(crop_frame.astype(np.uint8), (ox2-ox1, oy2 - oy1)) 

    mask = 255*np.ones(p.shape, p.dtype)
    location = ((ox1+ox2) // 2, (oy1+oy2) // 2)
    gen_img = cv2.seamlessClone(p, full_img, mask, location, cv2.NORMAL_CLONE)

    return gen_img
tmp_path = str(uuid.uuid4())+'.mp4'
out_tmp = cv2.VideoWriter(tmp_path, cv2.VideoWriter_fourcc(*'MP4V'), fps, (frame_w, frame_h))

processed_frames = []  # 存储处理后的图像

# 创建线程池
# 指定线程池的最大线程数
max_threads = 5

# 创建线程池并设置max_workers参数
with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
    # 提交任务并获取处理结果
    processed_frames = []
    for gen_img in tqdm(executor.map(process_image, crop_frames), total=len(crop_frames) ,desc='seamlessClone:'):
        processed_frames.append(gen_img)

# 一次将所有处理后的图像写入视频文件
for frame in processed_frames:
    out_tmp.write(frame)
# 自定义修改结束

face enhancer没找到加速办法，用线程池没成功。用下面注释掉的代码并行会报错，有没有对python研究比较深入的看下如何加速enhancer的步骤。 src/utils/face_enhancer.py

------------------------ restore ------------------------

for idx in tqdm(range(len(images)), 'Face Enhancer:'):
    
    img = cv2.cvtColor(images[idx], cv2.COLOR_RGB2BGR)
    
    # restore faces and background if necessary
    cropped_faces, restored_faces, r_img = restorer.enhance(
        img,
        has_aligned=False,
        only_center_face=False,
        paste_back=True)
    
    r_img = cv2.cvtColor(r_img, cv2.COLOR_BGR2RGB)
    yield r_img
# max_threads = 5
# def process_image(idx):
#     print('idx',idx)
#     img = cv2.cvtColor(images[idx], cv2.COLOR_RGB2BGR)
   
#     # restore faces and background if necessary
#     cropped_faces, restored_faces, r_img = restorer.enhance(
#         img,
#         has_aligned=False,
#         only_center_face=False,
#         paste_back=True)

#     r_img = cv2.cvtColor(r_img, cv2.COLOR_BGR2RGB)
#     return r_img
# with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
#     # 提交任务并获取生成器
#     process_images = tqdm(executor.map(process_image, range(len(images))), total=len(images) ,desc='Face Enhancer:')
#     for gen_img in process_images:
#         yield gen_img

请问可以详细解释一下吗？感谢！！

Jun 18 '25 09:06 hwhw97