ai-toolkit icon indicating copy to clipboard operation
ai-toolkit copied to clipboard

Missing control images for QwenImageEditPlusModel

Open yalsp opened this issue 2 months ago • 4 comments

Running 1 job { "type": "diffusion_trainer", "training_folder": "/root/ai-toolkit/output", "sqlite_db_path": "/root/ai-toolkit/aitk_db.db", "device": "cuda", "trigger_word": null, "performance_log_every": 10, "network": { "type": "lora", "linear": 32, "linear_alpha": 32, "conv": 16, "conv_alpha": 16, "lokr_full_rank": true, "lokr_factor": -1, "network_kwargs": { "ignore_if_contains": [] } }, "save": { "dtype": "bf16", "save_every": 250, "max_step_saves_to_keep": 4, "save_format": "diffusers", "push_to_hub": false }, "datasets": [ { "folder_path": "/root/ai-toolkit/datasets/target", "mask_path": null, "mask_min_value": 0.1, "default_caption": "", "caption_ext": "txt", "caption_dropout_rate": 0.05, "cache_latents_to_disk": false, "is_reg": false, "network_weight": 1, "resolution": [ 1024 ], "controls": [], "shrink_video_to_frames": true, "num_frames": 1, "do_i2v": true, "flip_x": false, "flip_y": false, "control_path_1": "/root/ai-toolkit/datasets/control_1", "control_path_2": "/root/ai-toolkit/datasets/control_2" } ], "train": { "batch_size": 1, "bypass_guidance_embedding": false, "steps": 1000, "gradient_accumulation": 1, "train_unet": true, "train_text_encoder": false, "gradient_checkpointing": true, "noise_scheduler": "flowmatch", "optimizer": "adamw8bit", "timestep_type": "weighted", "content_or_style": "balanced", "optimizer_params": { "weight_decay": 0.0001 }, "unload_text_encoder": false, "cache_text_embeddings": false, "lr": 0.0002, "ema_config": { "use_ema": false, "ema_decay": 0.99 }, "skip_first_sample": false, "force_first_sample": false, "disable_sampling": false, "dtype": "bf16", "diff_output_preservation": false, "diff_output_preservation_multiplier": 1, "diff_output_preservation_class": "person", "switch_boundary_every": 1, "loss_type": "mse" }, "model": { "name_or_path": "Qwen/Qwen-Image-Edit-2509", "quantize": true, "qtype": "uint3|ostris/accuracy_recovery_adapters/qwen_image_edit_2509_torchao_uint3.safetensors", "quantize_te": true, "qtype_te": "qfloat8", "arch": "qwen_image_edit_plus", "low_vram": true, "model_kwargs": { "match_target_res": false }, "layer_offloading": false, "layer_offloading_text_encoder_percent": 1, "layer_offloading_transformer_percent": 1 }, "sample": { "sampler": "flowmatch", "sample_every": 250, "width": 1024, "height": 1024, "samples": [ { "prompt": "Realistic style: relocate the Coca-Cola bottle from Figure 2 to the edge of the fruit bowl in Figure 1. Ensure the bottle's size is proportionate, its placement and angle are natural, and its lighting and reflections are accurately rendered. Maintain the clarity and original colour of the Coca-Cola bottle's typography as shown in Figure 2. Perfect detail, ultra-high detail, super high quality, 8K.", "ctrl_img_1": "/root/ai-toolkit/data/images/e095aed9-6dd1-41d9-aac5-811c469b9a79.png", "ctrl_img_2": "/root/ai-toolkit/data/images/118dc20c-6bc9-4b53-9e49-142839274352.png" }, { "prompt": "Realistic style: relocate the cosmetics from Figure 2 onto the desk in Figure 1. Ensure the cosmetics are appropriately sized, positioned, and angled, with accurate rendering of light and shadow effects. Preserve the clarity and original colour scheme of the cosmetic product labels in Figure 2. Exquisite detailing, ultra-high definition, premium quality, 8K resolution.", "ctrl_img_1": "/root/ai-toolkit/data/images/d54f5d00-97e8-4f5c-bdd3-74906e72f8e5.png", "ctrl_img_2": "/root/ai-toolkit/data/images/bb86abba-7521-458a-aac8-7ab04ef29029.png" }, { "prompt": "Realistic style: relocate the perfume from Figure 2 onto the stone in Figure 1. Ensure the perfume's size is proportionate, its placement and angle are appropriate, and its light and shadow rendering is accurate. Retain the clarity of the perfume's typography in Figure 2, maintaining both font style and colour unchanged. Perfect detail, ultra-high detail, super high quality, 8K.", "ctrl_img_1": "/root/ai-toolkit/data/images/721d08a6-22d1-4b63-b03a-aba967efd126.png", "ctrl_img_2": "/root/ai-toolkit/data/images/04301310-5465-495d-b6b9-84a52b57ddd3.png" }, { "prompt": "Realistic style: relocate the three cosmetics from Figure 2 to either side of the shampoo in Figure 1. Ensure the cosmetics are appropriately sized, positioned, and angled, with accurate rendering of light and shadow effects. Retain the clarity and colour scheme of the cosmetic product labels from Figure 2. Perfect detailing, ultra-high definition, premium quality, 8K resolution.", "ctrl_img_1": "/root/ai-toolkit/data/images/0f79c79b-2ff0-4a7b-8cd5-2ca6e27c6bc4.png", "ctrl_img_2": "/root/ai-toolkit/data/images/5c2ed405-b5a4-4413-8256-49eb27c273e1.png" } ], "neg": "", "seed": 42, "walk_seed": true, "guidance_scale": 4, "sample_steps": 25, "num_frames": 1, "fps": 1 } } Using SQLite database at /root/ai-toolkit/aitk_db.db Job ID: "dcf8d1fe-a49c-4163-a949-9ad5b5094d27" #############################################

Running job: my_first_lora_v1

############################################# Running 1 process Loading Qwen Image model Loading transformer Loading checkpoint shards: 100%|##########| 5/5 [00:00<00:00, 25.06it/s] Quantizing Transformer Grabbing lora from the hub: ostris/accuracy_recovery_adapters/qwen_image_edit_2509_torchao_uint3.safetensors create LoRA network. base dim (rank): 16, alpha: 16 neuron dropout: p=None, rank dropout: p=None, module dropout: p=None create LoRA for Text Encoder: 0 modules. create LoRA for U-Net: 846 modules. enable LoRA for U-Net Missing keys: [] Attaching quantization: 100%|##########| 846/846 [00:42<00:00, 20.02it/s]

  • quantizing additional layers Moving transformer to CPU Text Encoder Loading checkpoint shards: 100%|##########| 4/4 [00:00<00:00, 26.66it/s] Quantizing Text Encoder Loading VAE Making pipe Preparing Model Model Loaded create LoRA network. base dim (rank): 32, alpha: 32 neuron dropout: p=None, rank dropout: p=None, module dropout: p=None apply LoRA to Conv2d with kernel size (3,3). dim (rank): 16, alpha: 16 create LoRA for Text Encoder: 0 modules. create LoRA for U-Net: 840 modules. enable LoRA for U-Net Dataset: /root/ai-toolkit/datasets/target
  • Preprocessing image dimensions 100%|##########| 4/4 [00:00<00:00, 10186.53it/s]
  • Found 4 images Bucket sizes for /root/ai-toolkit/datasets/target: 1024x1024: 4 files 1 buckets made Generating baseline samples before training my_first_lora_v1: 0%| | 0/1000 [00:00<?, ?it/s]Error running job: Missing control images for QwenImageEditPlusModel ======================================== Result:
  • 0 completed jobs
  • 1 failure ======================================== Traceback (most recent call last): File "/root/ai-toolkit/run.py", line 120, in main() File "/root/ai-toolkit/run.py", line 108, in main raise e File "/root/ai-toolkit/run.py", line 96, in main job.run() File "/root/ai-toolkit/jobs/ExtensionJob.py", line 22, in run process.run() File "/root/ai-toolkit/jobs/process/BaseSDTrainProcess.py", line 2154, in run loss_dict = self.hook_train_loop(batch_list) File "/root/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 2024, in hook_train_loop loss = self.train_single_accumulation(batch) File "/root/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 1550, in train_single_accumulation conditional_embeds = self.sd.encode_prompt( File "/root/ai-toolkit/toolkit/models/base_model.py", line 1069, in encode_prompt return self.get_prompt_embeds(prompt, control_images=control_images) File "/root/ai-toolkit/extensions_built_in/diffusion_models/qwen_image/qwen_image_edit_plus.py", line 170, in get_prompt_embeds raise ValueError("Missing control images for QwenImageEditPlusModel") ValueError: Missing control images for QwenImageEditPlusModel Traceback (most recent call last): File "/root/ai-toolkit/run.py", line 120, in main() File "/root/ai-toolkit/run.py", line 108, in main raise e File "/root/ai-toolkit/run.py", line 96, in main job.run() File "/root/ai-toolkit/jobs/ExtensionJob.py", line 22, in run process.run() File "/root/ai-toolkit/jobs/process/BaseSDTrainProcess.py", line 2154, in run loss_dict = self.hook_train_loop(batch_list) File "/root/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 2024, in hook_train_loop loss = self.train_single_accumulation(batch) File "/root/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 1550, in train_single_accumulation conditional_embeds = self.sd.encode_prompt( File "/root/ai-toolkit/toolkit/models/base_model.py", line 1069, in encode_prompt return self.get_prompt_embeds(prompt, control_images=control_images) File "/root/ai-toolkit/extensions_built_in/diffusion_models/qwen_image/qwen_image_edit_plus.py", line 170, in get_prompt_embeds raise ValueError("Missing control images for QwenImageEditPlusModel") ValueError: Missing control images for QwenImageEditPlusModel my_first_lora_v1: 0%| | 0/1000 [00:01<?, ?it/s]

yalsp avatar Oct 21 '25 07:10 yalsp

same issue :

job: "extension" config: name: "my_first_lora_v1" process:

  • type: "diffusion_trainer" training_folder: "/workspace/GOKCEM/qwen-image/ai-toolkit/output" sqlite_db_path: "./aitk_db.db" device: "cuda" trigger_word: null performance_log_every: 10 network: type: "lora" linear: 32 linear_alpha: 32 conv: 16 conv_alpha: 16 lokr_full_rank: true lokr_factor: -1 network_kwargs: ignore_if_contains: [] save: dtype: "bf16" save_every: 250 max_step_saves_to_keep: 4 save_format: "diffusers" push_to_hub: false datasets:
  • folder_path: "/workspace/GOKCEM/qwen-image/ai-toolkit/datasets/target" mask_path: null mask_min_value: 0.1 default_caption: "" caption_ext: "txt" caption_dropout_rate: 0.05 cache_latents_to_disk: false is_reg: false network_weight: 1 resolution:
  • 512
  • 768
  • 1024 controls: [] shrink_video_to_frames: true num_frames: 1 do_i2v: true flip_x: false flip_y: false control_path_1: "/workspace/GOKCEM/qwen-image/ai-toolkit/datasets/control_img" control_path_2: "/workspace/GOKCEM/qwen-image/ai-toolkit/datasets/target" control_path_3: null train: batch_size: 1 bypass_guidance_embedding: false steps: 3000 gradient_accumulation: 1 train_unet: true train_text_encoder: false gradient_checkpointing: true noise_scheduler: "flowmatch" optimizer: "adamw8bit" timestep_type: "weighted" content_or_style: "balanced" optimizer_params: weight_decay: 0.0001 unload_text_encoder: false cache_text_embeddings: false lr: 0.0001 ema_config: use_ema: false ema_decay: 0.99 skip_first_sample: false force_first_sample: false disable_sampling: true dtype: "bf16" diff_output_preservation: false diff_output_preservation_multiplier: 1 diff_output_preservation_class: "person" switch_boundary_every: 1 loss_type: "mse" model: name_or_path: "Qwen/Qwen-Image-Edit-2509" quantize: false qtype: "qfloat8" quantize_te: false qtype_te: "qfloat8" arch: "qwen_image_edit_plus" low_vram: true model_kwargs: match_target_res: false layer_offloading: false layer_offloading_text_encoder_percent: 1 layer_offloading_transformer_percent: 1 sample: sampler: "flowmatch" sample_every: 250 width: 1024 height: 1024 samples: [] neg: "" seed: 42 walk_seed: true guidance_scale: 4 sample_steps: 25 num_frames: 1 fps: 1 meta: name: "[name]" version: "1.0"

Result:

0 completed jobs 1 failure

Traceback (most recent call last): File "/workspace/GOKCEM/qwen-image/ai-toolkit/run.py", line 120, in main() File "/workspace/GOKCEM/qwen-image/ai-toolkit/run.py", line 108, in main raise e File "/workspace/GOKCEM/qwen-image/ai-toolkit/run.py", line 96, in main job.run() File "/workspace/GOKCEM/qwen-image/ai-toolkit/jobs/ExtensionJob.py", line 22, in run process.run() File "/workspace/GOKCEM/qwen-image/ai-toolkit/jobs/process/BaseSDTrainProcess.py", line 2154, in run loss_dict = self.hook_train_loop(batch_list) File "/workspace/GOKCEM/qwen-image/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 2024, in hook_train_loop loss = self.train_single_accumulation(batch) File "/workspace/GOKCEM/qwen-image/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 1550, in train_single_accumulation conditional_embeds = self.sd.encode_prompt( File "/workspace/GOKCEM/qwen-image/ai-toolkit/toolkit/models/base_model.py", line 1069, in encode_prompt return self.get_prompt_embeds(prompt, control_images=control_images) File "/workspace/GOKCEM/qwen-image/ai-toolkit/extensions_built_in/diffusion_models/qwen_image/qwen_image_edit_plus.py", line 170, in get_prompt_embeds raise ValueError("Missing control images for QwenImageEditPlusModel") ValueError: Missing control images for QwenImageEditPlusModel Traceback (most recent call last): File "/workspace/GOKCEM/qwen-image/ai-toolkit/run.py", line 120, in main() File "/workspace/GOKCEM/qwen-image/ai-toolkit/run.py", line 108, in main raise e File "/workspace/GOKCEM/qwen-image/ai-toolkit/run.py", line 96, in main job.run() File "/workspace/GOKCEM/qwen-image/ai-toolkit/jobs/ExtensionJob.py", line 22, in run process.run() File "/workspace/GOKCEM/qwen-image/ai-toolkit/jobs/process/BaseSDTrainProcess.py", line 2154, in run loss_dict = self.hook_train_loop(batch_list) File "/workspace/GOKCEM/qwen-image/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 2024, in hook_train_loop loss = self.train_single_accumulation(batch) File "/workspace/GOKCEM/qwen-image/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 1550, in train_single_accumulation conditional_embeds = self.sd.encode_prompt( File "/workspace/GOKCEM/qwen-image/ai-toolkit/toolkit/models/base_model.py", line 1069, in encode_prompt return self.get_prompt_embeds(prompt, control_images=control_images) File "/workspace/GOKCEM/qwen-image/ai-toolkit/extensions_built_in/diffusion_models/qwen_image/qwen_image_edit_plus.py", line 170, in get_prompt_embeds raise ValueError("Missing control images for QwenImageEditPlusModel") ValueError: Missing control images for QwenImageEditPlusModel

gokcemkamisli avatar Oct 21 '25 09:10 gokcemkamisli

+1

NBSTpeterhill avatar Oct 22 '25 04:10 NBSTpeterhill

+1

jackshepy avatar Oct 22 '25 07:10 jackshepy

https://github.com/ostris/ai-toolkit/issues/472 I solved this problem according to the comment of this issue.

yangguoquan001 avatar Oct 24 '25 07:10 yangguoquan001