ai-toolkit
ai-toolkit copied to clipboard
image generation fail - default settings (apart of offloading and lora size set to 16)
This is for bugs only
Did you already ask in the discord?
No
You verified that this is a bug and not a feature request or question by asking in the discord?
Yes
Describe the bug
Create a flux2 job set transformer offloading to 30% keep tokeniser offloading on 100% start training After buckets are made it fails
30 buckets made
Generating baseline samples before training
Error running job: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat2 in method wrapper_CUDA_mm)
========================================
Result:
- 0 completed jobs
- 1 failure
========================================
Traceback (most recent call last):
Traceback (most recent call last):
File "D:\stability\Data\Packages\AI-Toolkit\run.py", line 120, in <module>
File "D:\stability\Data\Packages\AI-Toolkit\run.py", line 120, in <module>
main()main()
File "D:\stability\Data\Packages\AI-Toolkit\run.py", line 108, in main
File "D:\stability\Data\Packages\AI-Toolkit\run.py", line 108, in main
raise eraise e
File "D:\stability\Data\Packages\AI-Toolkit\run.py", line 96, in main
File "D:\stability\Data\Packages\AI-Toolkit\run.py", line 96, in main
job.run()job.run()
File "D:\stability\Data\Packages\AI-Toolkit\jobs\ExtensionJob.py", line 22, in run
File "D:\stability\Data\Packages\AI-Toolkit\jobs\ExtensionJob.py", line 22, in run
process.run()process.run()
File "D:\stability\Data\Packages\AI-Toolkit\jobs\process\BaseSDTrainProcess.py", line 2014, in run
File "D:\stability\Data\Packages\AI-Toolkit\jobs\process\BaseSDTrainProcess.py", line 2014, in run
self.sample(self.step_num)self.sample(self.step_num)
File "D:\stability\Data\Packages\AI-Toolkit\extensions_built_in\sd_trainer\DiffusionTrainer.py", line 306, in sample
File "D:\stability\Data\Packages\AI-Toolkit\extensions_built_in\sd_trainer\DiffusionTrainer.py", line 306, in sample
super().sample(step, is_first)super().sample(step, is_first)
File "D:\stability\Data\Packages\AI-Toolkit\jobs\process\BaseSDTrainProcess.py", line 368, in sample
File "D:\stability\Data\Packages\AI-Toolkit\jobs\process\BaseSDTrainProcess.py", line 368, in sample
self.sd.generate_images(gen_img_config_list, sampler=sample_config.sampler)self.sd.generate_images(gen_img_config_list, sampler=sample_config.sampler)
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\stability\Data\Packages\AI-Toolkit\toolkit\models\base_model.py", line 645, in generate_images
File "D:\stability\Data\Packages\AI-Toolkit\toolkit\models\base_model.py", line 645, in generate_images
img = self.generate_single_image(img = self.generate_single_image(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\stability\Data\Packages\AI-Toolkit\extensions_built_in\diffusion_models\flux2\flux2_model.py", line 284, in generate_single_image
File "D:\stability\Data\Packages\AI-Toolkit\extensions_built_in\diffusion_models\flux2\flux2_model.py", line 284, in generate_single_image
img = pipeline(img = pipeline(
^^^^^^^^^^^^^^^^^^
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\stability\Data\Packages\AI-Toolkit\extensions_built_in\diffusion_models\flux2\src\pipeline.py", line 323, in __call__
File "D:\stability\Data\Packages\AI-Toolkit\extensions_built_in\diffusion_models\flux2\src\pipeline.py", line 323, in __call__
pred = self.transformer(pred = self.transformer(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\nn\modules\module.py", line 1751, in _wrapped_call_impl
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\nn\modules\module.py", line 1751, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\nn\modules\module.py", line 1762, in _call_impl
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\nn\modules\module.py", line 1762, in _call_impl
return forward_call(*args, **kwargs)return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\stability\Data\Packages\AI-Toolkit\extensions_built_in\diffusion_models\flux2\src\model.py", line 124, in forward
File "D:\stability\Data\Packages\AI-Toolkit\extensions_built_in\diffusion_models\flux2\src\model.py", line 124, in forward
vec = self.time_in(timestep_emb)vec = self.time_in(timestep_emb)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\nn\modules\module.py", line 1751, in _wrapped_call_impl
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\nn\modules\module.py", line 1751, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\nn\modules\module.py", line 1762, in _call_impl
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\nn\modules\module.py", line 1762, in _call_impl
return forward_call(*args, **kwargs)return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\stability\Data\Packages\AI-Toolkit\extensions_built_in\diffusion_models\flux2\src\model.py", line 422, in forward
File "D:\stability\Data\Packages\AI-Toolkit\extensions_built_in\diffusion_models\flux2\src\model.py", line 422, in forward
return self.out_layer(self.silu(self.in_layer(x)))return self.out_layer(self.silu(self.in_layer(x)))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\nn\modules\module.py", line 1751, in _wrapped_call_impl
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\nn\modules\module.py", line 1751, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\nn\modules\module.py", line 1762, in _call_impl
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\nn\modules\module.py", line 1762, in _call_impl
return forward_call(*args, **kwargs)return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\stability\Data\Packages\AI-Toolkit\toolkit\memory_management\manager_modules.py", line 583, in _mm_forward
File "D:\stability\Data\Packages\AI-Toolkit\toolkit\memory_management\manager_modules.py", line 583, in _mm_forward
return _BouncingLinearFn.apply(x, weight_cpu, bias_cpu, device)return _BouncingLinearFn.apply(x, weight_cpu, bias_cpu, device)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\autograd\function.py", line 575, in apply
File "D:\stability\Data\Packages\AI-Toolkit\venv\Lib\site-packages\torch\autograd\function.py", line 575, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]return super().apply(*args, **kwargs) # type: ignore[misc]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\stability\Data\Packages\AI-Toolkit\toolkit\memory_management\manager_modules.py", line 186, in forward
File "D:\stability\Data\Packages\AI-Toolkit\toolkit\memory_management\manager_modules.py", line 186, in forward
out = F.linear(x, w_bufs[idx], b_bufs[idx])out = F.linear(x, w_bufs[idx], b_bufs[idx])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeErrorRuntimeError: : Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat2 in method wrapper_CUDA_mm)Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat2 in method wrapper_CUDA_mm)