使用lightx2v/Wan2.2-Distill-Models-FP8量化后的权重,生成的视频噪声很大(成马赛克状),用BF16 原版权重是正常的。
wan_moe_i2v_distill.json: { "infer_steps": 4, "target_video_length": 81, "text_len": 512, "target_height": 720, "target_width": 1280, "self_attn_1_type": "sage_attn2", "cross_attn_1_type": "sage_attn2", "cross_attn_2_type": "sage_attn2", "dit_quantized": true, "dit_quant_scheme": "fp8-vllm" "high_noise_original_ckpt": "/mnt/cache1/LightX2V/Wan2.2-Distill-Models/high_noise_model/wan2.2_i2v_A14b_high_noise_scaled_fp8_e4m3_lightx2v_4step.safetensors", "low_noise_original_ckpt": "/mnt/cache1/LightX2V/Wan2.2-Distill-Models/low_noise_model/wan2.2_i2v_A14b_low_noise_scaled_fp8_e4m3_lightx2v_4step.safetensors", "sample_guide_scale": [ 7.0, 11.0 ], "sample_shift": 7.0, "enable_cfg": false, "cpu_offload": true, "offload_granularity": "block", "t5_cpu_offload": false, "vae_cpu_offload": false, "use_image_encoder": false, "boundary_step_index": 2,
"denoising_step_list": [
1000,
750,
500,
250
]
}
sh脚本:
lightx2v_path=/mnt/cache1/LightX2V
model_path=/mnt/cache1/LightX2V/Wan2.2-Distill-Models
export CUDA_VISIBLE_DEVICES=6
export SAGEATTENTION_MODE=1
export SAGEATTENTION_BACKEND=triton
export TORCH_CUDA_ARCH_LIST="9.0a"
export SAGEATTENTION_OPT_LEVEL=2
set environment variables
source ${lightx2v_path}/scripts/base/base.sh
python -m lightx2v.infer
--model_cls wan2.2_moe_distill
--task i2v
--model_path $model_path
--config_json ${lightx2v_path}/configs/wan22/wan_moe_i2v_distill.json
--prompt "参考图像主体整理衣服,然后原地做出360度旋转的动作,展示自己的服饰。"
--negative_prompt "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
--image_path /mnt/cache1/Wan2.2-I2V-A14B-Moe-Distill-Lightx2v/inputs/image/1.jpg
--save_result_path /mnt/cache1/LightX2V/Wan2.2-Distill-Models/outputs/result111111111111111.mp4