Missing control images for QwenImageEditPlusModel
Running 1 job { "type": "diffusion_trainer", "training_folder": "/root/ai-toolkit/output", "sqlite_db_path": "/root/ai-toolkit/aitk_db.db", "device": "cuda", "trigger_word": null, "performance_log_every": 10, "network": { "type": "lora", "linear": 32, "linear_alpha": 32, "conv": 16, "conv_alpha": 16, "lokr_full_rank": true, "lokr_factor": -1, "network_kwargs": { "ignore_if_contains": [] } }, "save": { "dtype": "bf16", "save_every": 250, "max_step_saves_to_keep": 4, "save_format": "diffusers", "push_to_hub": false }, "datasets": [ { "folder_path": "/root/ai-toolkit/datasets/target", "mask_path": null, "mask_min_value": 0.1, "default_caption": "", "caption_ext": "txt", "caption_dropout_rate": 0.05, "cache_latents_to_disk": false, "is_reg": false, "network_weight": 1, "resolution": [ 1024 ], "controls": [], "shrink_video_to_frames": true, "num_frames": 1, "do_i2v": true, "flip_x": false, "flip_y": false, "control_path_1": "/root/ai-toolkit/datasets/control_1", "control_path_2": "/root/ai-toolkit/datasets/control_2" } ], "train": { "batch_size": 1, "bypass_guidance_embedding": false, "steps": 1000, "gradient_accumulation": 1, "train_unet": true, "train_text_encoder": false, "gradient_checkpointing": true, "noise_scheduler": "flowmatch", "optimizer": "adamw8bit", "timestep_type": "weighted", "content_or_style": "balanced", "optimizer_params": { "weight_decay": 0.0001 }, "unload_text_encoder": false, "cache_text_embeddings": false, "lr": 0.0002, "ema_config": { "use_ema": false, "ema_decay": 0.99 }, "skip_first_sample": false, "force_first_sample": false, "disable_sampling": false, "dtype": "bf16", "diff_output_preservation": false, "diff_output_preservation_multiplier": 1, "diff_output_preservation_class": "person", "switch_boundary_every": 1, "loss_type": "mse" }, "model": { "name_or_path": "Qwen/Qwen-Image-Edit-2509", "quantize": true, "qtype": "uint3|ostris/accuracy_recovery_adapters/qwen_image_edit_2509_torchao_uint3.safetensors", "quantize_te": true, "qtype_te": "qfloat8", "arch": "qwen_image_edit_plus", "low_vram": true, "model_kwargs": { "match_target_res": false }, "layer_offloading": false, "layer_offloading_text_encoder_percent": 1, "layer_offloading_transformer_percent": 1 }, "sample": { "sampler": "flowmatch", "sample_every": 250, "width": 1024, "height": 1024, "samples": [ { "prompt": "Realistic style: relocate the Coca-Cola bottle from Figure 2 to the edge of the fruit bowl in Figure 1. Ensure the bottle's size is proportionate, its placement and angle are natural, and its lighting and reflections are accurately rendered. Maintain the clarity and original colour of the Coca-Cola bottle's typography as shown in Figure 2. Perfect detail, ultra-high detail, super high quality, 8K.", "ctrl_img_1": "/root/ai-toolkit/data/images/e095aed9-6dd1-41d9-aac5-811c469b9a79.png", "ctrl_img_2": "/root/ai-toolkit/data/images/118dc20c-6bc9-4b53-9e49-142839274352.png" }, { "prompt": "Realistic style: relocate the cosmetics from Figure 2 onto the desk in Figure 1. Ensure the cosmetics are appropriately sized, positioned, and angled, with accurate rendering of light and shadow effects. Preserve the clarity and original colour scheme of the cosmetic product labels in Figure 2. Exquisite detailing, ultra-high definition, premium quality, 8K resolution.", "ctrl_img_1": "/root/ai-toolkit/data/images/d54f5d00-97e8-4f5c-bdd3-74906e72f8e5.png", "ctrl_img_2": "/root/ai-toolkit/data/images/bb86abba-7521-458a-aac8-7ab04ef29029.png" }, { "prompt": "Realistic style: relocate the perfume from Figure 2 onto the stone in Figure 1. Ensure the perfume's size is proportionate, its placement and angle are appropriate, and its light and shadow rendering is accurate. Retain the clarity of the perfume's typography in Figure 2, maintaining both font style and colour unchanged. Perfect detail, ultra-high detail, super high quality, 8K.", "ctrl_img_1": "/root/ai-toolkit/data/images/721d08a6-22d1-4b63-b03a-aba967efd126.png", "ctrl_img_2": "/root/ai-toolkit/data/images/04301310-5465-495d-b6b9-84a52b57ddd3.png" }, { "prompt": "Realistic style: relocate the three cosmetics from Figure 2 to either side of the shampoo in Figure 1. Ensure the cosmetics are appropriately sized, positioned, and angled, with accurate rendering of light and shadow effects. Retain the clarity and colour scheme of the cosmetic product labels from Figure 2. Perfect detailing, ultra-high definition, premium quality, 8K resolution.", "ctrl_img_1": "/root/ai-toolkit/data/images/0f79c79b-2ff0-4a7b-8cd5-2ca6e27c6bc4.png", "ctrl_img_2": "/root/ai-toolkit/data/images/5c2ed405-b5a4-4413-8256-49eb27c273e1.png" } ], "neg": "", "seed": 42, "walk_seed": true, "guidance_scale": 4, "sample_steps": 25, "num_frames": 1, "fps": 1 } } Using SQLite database at /root/ai-toolkit/aitk_db.db Job ID: "dcf8d1fe-a49c-4163-a949-9ad5b5094d27" #############################################
Running job: my_first_lora_v1
############################################# Running 1 process Loading Qwen Image model Loading transformer Loading checkpoint shards: 100%|##########| 5/5 [00:00<00:00, 25.06it/s] Quantizing Transformer Grabbing lora from the hub: ostris/accuracy_recovery_adapters/qwen_image_edit_2509_torchao_uint3.safetensors create LoRA network. base dim (rank): 16, alpha: 16 neuron dropout: p=None, rank dropout: p=None, module dropout: p=None create LoRA for Text Encoder: 0 modules. create LoRA for U-Net: 846 modules. enable LoRA for U-Net Missing keys: [] Attaching quantization: 100%|##########| 846/846 [00:42<00:00, 20.02it/s]
- quantizing additional layers Moving transformer to CPU Text Encoder Loading checkpoint shards: 100%|##########| 4/4 [00:00<00:00, 26.66it/s] Quantizing Text Encoder Loading VAE Making pipe Preparing Model Model Loaded create LoRA network. base dim (rank): 32, alpha: 32 neuron dropout: p=None, rank dropout: p=None, module dropout: p=None apply LoRA to Conv2d with kernel size (3,3). dim (rank): 16, alpha: 16 create LoRA for Text Encoder: 0 modules. create LoRA for U-Net: 840 modules. enable LoRA for U-Net Dataset: /root/ai-toolkit/datasets/target
- Preprocessing image dimensions 100%|##########| 4/4 [00:00<00:00, 10186.53it/s]
- Found 4 images Bucket sizes for /root/ai-toolkit/datasets/target: 1024x1024: 4 files 1 buckets made Generating baseline samples before training my_first_lora_v1: 0%| | 0/1000 [00:00<?, ?it/s]Error running job: Missing control images for QwenImageEditPlusModel ======================================== Result:
- 0 completed jobs
- 1 failure
========================================
Traceback (most recent call last):
File "/root/ai-toolkit/run.py", line 120, in
main() File "/root/ai-toolkit/run.py", line 108, in main raise e File "/root/ai-toolkit/run.py", line 96, in main job.run() File "/root/ai-toolkit/jobs/ExtensionJob.py", line 22, in run process.run() File "/root/ai-toolkit/jobs/process/BaseSDTrainProcess.py", line 2154, in run loss_dict = self.hook_train_loop(batch_list) File "/root/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 2024, in hook_train_loop loss = self.train_single_accumulation(batch) File "/root/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 1550, in train_single_accumulation conditional_embeds = self.sd.encode_prompt( File "/root/ai-toolkit/toolkit/models/base_model.py", line 1069, in encode_prompt return self.get_prompt_embeds(prompt, control_images=control_images) File "/root/ai-toolkit/extensions_built_in/diffusion_models/qwen_image/qwen_image_edit_plus.py", line 170, in get_prompt_embeds raise ValueError("Missing control images for QwenImageEditPlusModel") ValueError: Missing control images for QwenImageEditPlusModel Traceback (most recent call last): File "/root/ai-toolkit/run.py", line 120, in main() File "/root/ai-toolkit/run.py", line 108, in main raise e File "/root/ai-toolkit/run.py", line 96, in main job.run() File "/root/ai-toolkit/jobs/ExtensionJob.py", line 22, in run process.run() File "/root/ai-toolkit/jobs/process/BaseSDTrainProcess.py", line 2154, in run loss_dict = self.hook_train_loop(batch_list) File "/root/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 2024, in hook_train_loop loss = self.train_single_accumulation(batch) File "/root/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 1550, in train_single_accumulation conditional_embeds = self.sd.encode_prompt( File "/root/ai-toolkit/toolkit/models/base_model.py", line 1069, in encode_prompt return self.get_prompt_embeds(prompt, control_images=control_images) File "/root/ai-toolkit/extensions_built_in/diffusion_models/qwen_image/qwen_image_edit_plus.py", line 170, in get_prompt_embeds raise ValueError("Missing control images for QwenImageEditPlusModel") ValueError: Missing control images for QwenImageEditPlusModel my_first_lora_v1: 0%| | 0/1000 [00:01<?, ?it/s]
same issue :
job: "extension" config: name: "my_first_lora_v1" process:
- type: "diffusion_trainer" training_folder: "/workspace/GOKCEM/qwen-image/ai-toolkit/output" sqlite_db_path: "./aitk_db.db" device: "cuda" trigger_word: null performance_log_every: 10 network: type: "lora" linear: 32 linear_alpha: 32 conv: 16 conv_alpha: 16 lokr_full_rank: true lokr_factor: -1 network_kwargs: ignore_if_contains: [] save: dtype: "bf16" save_every: 250 max_step_saves_to_keep: 4 save_format: "diffusers" push_to_hub: false datasets:
- folder_path: "/workspace/GOKCEM/qwen-image/ai-toolkit/datasets/target" mask_path: null mask_min_value: 0.1 default_caption: "" caption_ext: "txt" caption_dropout_rate: 0.05 cache_latents_to_disk: false is_reg: false network_weight: 1 resolution:
- 512
- 768
- 1024 controls: [] shrink_video_to_frames: true num_frames: 1 do_i2v: true flip_x: false flip_y: false control_path_1: "/workspace/GOKCEM/qwen-image/ai-toolkit/datasets/control_img" control_path_2: "/workspace/GOKCEM/qwen-image/ai-toolkit/datasets/target" control_path_3: null train: batch_size: 1 bypass_guidance_embedding: false steps: 3000 gradient_accumulation: 1 train_unet: true train_text_encoder: false gradient_checkpointing: true noise_scheduler: "flowmatch" optimizer: "adamw8bit" timestep_type: "weighted" content_or_style: "balanced" optimizer_params: weight_decay: 0.0001 unload_text_encoder: false cache_text_embeddings: false lr: 0.0001 ema_config: use_ema: false ema_decay: 0.99 skip_first_sample: false force_first_sample: false disable_sampling: true dtype: "bf16" diff_output_preservation: false diff_output_preservation_multiplier: 1 diff_output_preservation_class: "person" switch_boundary_every: 1 loss_type: "mse" model: name_or_path: "Qwen/Qwen-Image-Edit-2509" quantize: false qtype: "qfloat8" quantize_te: false qtype_te: "qfloat8" arch: "qwen_image_edit_plus" low_vram: true model_kwargs: match_target_res: false layer_offloading: false layer_offloading_text_encoder_percent: 1 layer_offloading_transformer_percent: 1 sample: sampler: "flowmatch" sample_every: 250 width: 1024 height: 1024 samples: [] neg: "" seed: 42 walk_seed: true guidance_scale: 4 sample_steps: 25 num_frames: 1 fps: 1 meta: name: "[name]" version: "1.0"
Result:
0 completed jobs 1 failure
Traceback (most recent call last): File "/workspace/GOKCEM/qwen-image/ai-toolkit/run.py", line 120, in main() File "/workspace/GOKCEM/qwen-image/ai-toolkit/run.py", line 108, in main raise e File "/workspace/GOKCEM/qwen-image/ai-toolkit/run.py", line 96, in main job.run() File "/workspace/GOKCEM/qwen-image/ai-toolkit/jobs/ExtensionJob.py", line 22, in run process.run() File "/workspace/GOKCEM/qwen-image/ai-toolkit/jobs/process/BaseSDTrainProcess.py", line 2154, in run loss_dict = self.hook_train_loop(batch_list) File "/workspace/GOKCEM/qwen-image/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 2024, in hook_train_loop loss = self.train_single_accumulation(batch) File "/workspace/GOKCEM/qwen-image/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 1550, in train_single_accumulation conditional_embeds = self.sd.encode_prompt( File "/workspace/GOKCEM/qwen-image/ai-toolkit/toolkit/models/base_model.py", line 1069, in encode_prompt return self.get_prompt_embeds(prompt, control_images=control_images) File "/workspace/GOKCEM/qwen-image/ai-toolkit/extensions_built_in/diffusion_models/qwen_image/qwen_image_edit_plus.py", line 170, in get_prompt_embeds raise ValueError("Missing control images for QwenImageEditPlusModel") ValueError: Missing control images for QwenImageEditPlusModel Traceback (most recent call last): File "/workspace/GOKCEM/qwen-image/ai-toolkit/run.py", line 120, in main() File "/workspace/GOKCEM/qwen-image/ai-toolkit/run.py", line 108, in main raise e File "/workspace/GOKCEM/qwen-image/ai-toolkit/run.py", line 96, in main job.run() File "/workspace/GOKCEM/qwen-image/ai-toolkit/jobs/ExtensionJob.py", line 22, in run process.run() File "/workspace/GOKCEM/qwen-image/ai-toolkit/jobs/process/BaseSDTrainProcess.py", line 2154, in run loss_dict = self.hook_train_loop(batch_list) File "/workspace/GOKCEM/qwen-image/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 2024, in hook_train_loop loss = self.train_single_accumulation(batch) File "/workspace/GOKCEM/qwen-image/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 1550, in train_single_accumulation conditional_embeds = self.sd.encode_prompt( File "/workspace/GOKCEM/qwen-image/ai-toolkit/toolkit/models/base_model.py", line 1069, in encode_prompt return self.get_prompt_embeds(prompt, control_images=control_images) File "/workspace/GOKCEM/qwen-image/ai-toolkit/extensions_built_in/diffusion_models/qwen_image/qwen_image_edit_plus.py", line 170, in get_prompt_embeds raise ValueError("Missing control images for QwenImageEditPlusModel") ValueError: Missing control images for QwenImageEditPlusModel
+1
+1
https://github.com/ostris/ai-toolkit/issues/472 I solved this problem according to the comment of this issue.