kohya_ss icon indicating copy to clipboard operation
kohya_ss copied to clipboard

raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd)

Open AmeliaJaneMurphy opened this issue 2 years ago • 13 comments

keep running this error

raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) subprocess.CalledProcessError: Command '['C:\Users\ameli\OneDrive\Desktop\kohya\kohya_ss\venv\Scripts\python.exe', 'train_network.py', '--pretrained_model_name_or_path=runwayml/stable-diffusion-v1-5', '--train_data_dir=C:\Users\ameli\Creative Cloud Files\orphans-512x512\amliorphans lora\image', '--resolution=512,512', '--output_dir=C:\Users\ameli\Creative Cloud Files\orphans-512x512\amliorphans lora\model', '--logging_dir=C:\Users\ameli\Creative Cloud Files\orphans-512x512\amliorphans lora\log', '--network_alpha=128', '--save_model_as=safetensors', '--network_module=networks.lora', '--text_encoder_lr=5e-5', '--unet_lr=0.0001', '--network_dim=128', '--output_name=amliorphans', '--lr_scheduler_num_cycles=1', '--learning_rate=0.0001', '--lr_scheduler=constant', '--train_batch_size=2', '--max_train_steps=1500', '--save_every_n_epochs=1', '--mixed_precision=bf16', '--save_precision=bf16', '--seed=1234', '--caption_extension=.txt', '--cache_latents', '--max_data_loader_n_workers=1', '--clip_skip=2', '--bucket_reso_steps=64', '--xformers', '--use_8bit_adam', '--bucket_no_upscale']' returned non-zero exit status 1.

AmeliaJaneMurphy avatar Feb 19 '23 18:02 AmeliaJaneMurphy

same error here: File "C:\AI\Kohya\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py", line 567, in simple_launcher raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) subprocess.CalledProcessError: Command '['C:\AI\Kohya\kohya_ss\venv\Scripts\python.exe', 'train_network.py', '--pretrained_model_name_or_path=runwayml/stable-diffusion-v1-5', '--train_data_dir=C:/Users/ogn81/Downloads/Milka/image', '--resolution=512,512', '--output_dir=C:/Users/ogn81/Downloads/Milka/model', '--logging_dir=C:/Users/ogn81/Downloads/Milka/log', '--network_alpha=128', '--save_model_as=safetensors', '--network_module=networks.lora', '--text_encoder_lr=5e-5', '--unet_lr=0.0001', '--network_dim=128', '--output_name=BananaGuy', '--lr_scheduler_num_cycles=1', '--learning_rate=0.0001', '--lr_scheduler=constant', '--train_batch_size=1', '--max_train_steps=1505', '--save_every_n_epochs=1', '--mixed_precision=fp16', '--save_precision=fp16', '--seed=1234', '--caption_extension=.txt', '--cache_latents', '--max_data_loader_n_workers=1', '--clip_skip=2', '--bucket_reso_steps=64', '--mem_eff_attn', '--gradient_checkpointing', '--xformers', '--use_8bit_adam', '--bucket_no_upscale']' returned non-zero exit status 1.

DennLie avatar Feb 19 '23 22:02 DennLie

raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) subprocess.CalledProcessError: Command '['G:\anaconda3\envs\base310\python.exe', 'train_network.py', '--enable_bucket', '--pretrained_model_name_or_path=runwayml/stable-diffusion-v1-5', '--train_data_dir=F:/python/ai painting/sd-webui启动器/train/script/tifa', '--resolution=512,512', '--output_dir=F:/python/ai painting/sd-webui启动器/train/script/model', '--logging_dir=F:/python/ai painting/sd-webui启动器/train/script/log', '--network_alpha=1', '--save_model_as=safetensors', '--network_module=networks.lora', '--text_encoder_lr=1.5e-5', '--unet_lr=1.5e-4', '--network_dim=128', '--output_name=tifa', '--lr_scheduler_num_cycles=5', '--learning_rate=0.0001', '--lr_scheduler=constant_with_warmup', '--lr_warmup_steps=14', '--train_batch_size=3', '--max_train_steps=280', '--save_every_n_epochs=5', '--mixed_precision=fp16', '--save_precision=fp16', '--seed=31337', '--caption_extension=.txt', '--cache_latents', '--clip_skip=2', '--bucket_reso_steps=64', '--shuffle_caption', '--xformers', '--use_8bit_adam', '--bucket_no_upscale']' returned non-zero exit status 1.

Meennow avatar Feb 20 '23 20:02 Meennow

same

mengen-li avatar Feb 21 '23 03:02 mengen-li

Same problem here...

Mirxku avatar Feb 24 '23 01:02 Mirxku

Can you all give more context what came before that error? You might not all be dealing with the same problem specifically.

SaltySkegg avatar Feb 24 '23 08:02 SaltySkegg

Folder 01: 18 steps max_train_steps = 9 stop_text_encoder_training = 0 lr_warmup_steps = 0 accelerate launch --num_cpu_threads_per_process=2 "train_network.py" --pretrained_model_name_or_path="D:/ai/anything-v3-2.safetensors" --train_data_dir="D:/ai/resource" --resolution=512,512 --output_dir="D:/ai/bbduck" --logging_dir="" --network_alpha="128" --save_model_as=safetensors --network_module=networks.lora --text_encoder_lr=5e-5 --unet_lr=0.0001 --network_dim=128 --output_name="bbduck" --lr_scheduler_num_cycles="1" --learning_rate="0.0001" --lr_scheduler="constant" --train_batch_size="2" --max_train_steps="9" --save_every_n_epochs="1" --mixed_precision="fp16" --save_precision="fp16" --seed="1234" --caption_extension=".txt" --cache_latents --optimizer_type="AdamW" --max_data_loader_n_workers="1" --clip_skip=2 --bucket_reso_steps=64 --xformers --use_8bit_adam --bucket_no_upscale prepare tokenizer Use DreamBooth method. prepare train images. found directory 1_ contains 18 image files 18 train images with repeating. loading image sizes. 100%|█████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 644.56it/s] prepare dataset prepare accelerator Traceback (most recent call last): File "D:\kohya_ss\train_network.py", line 507, in train(args) File "D:\kohya_ss\train_network.py", line 90, in train accelerator, unwrap_model = train_util.prepare_accelerator(args) File "D:\kohya_ss\library\train_util.py", line 1817, in prepare_accelerator accelerator = Accelerator(gradient_accumulation_steps=args.gradient_accumulation_steps, mixed_precision=args.mixed_precision, File "D:\kohya_ss\venv\lib\site-packages\accelerate\accelerator.py", line 355, in init raise ValueError(err.format(mode="fp16", requirement="a GPU")) ValueError: fp16 mixed precision requires a GPU Traceback (most recent call last): File "C:\Users\asus\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 196, in _run_module_as_main return _run_code(code, main_globals, None, File "C:\Users\asus\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 86, in run_code exec(code, run_globals) File "D:\kohya_ss\venv\Scripts\accelerate.exe_main.py", line 7, in File "D:\kohya_ss\venv\lib\site-packages\accelerate\commands\accelerate_cli.py", line 45, in main args.func(args) File "D:\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py", line 1104, in launch_command simple_launcher(args) File "D:\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py", line 567, in simple_launcher raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) subprocess.CalledProcessError: Command '['D:\kohya_ss\venv\Scripts\python.exe', 'train_network.py', '--pretrained_model_name_or_path=D:/ai/anything-v3-2.safetensors', '--train_data_dir=D:/ai/resource', '--resolution=512,512', '--output_dir=D:/ai/bbduck', '--logging_dir=', '--network_alpha=128', '--save_model_as=safetensors', '--network_module=networks.lora', '--text_encoder_lr=5e-5', '--unet_lr=0.0001', '--network_dim=128', '--output_name=bbduck', '--lr_scheduler_num_cycles=1', '--learning_rate=0.0001', '--lr_scheduler=constant', '--train_batch_size=2', '--max_train_steps=9', '--save_every_n_epochs=1', '--mixed_precision=fp16', '--save_precision=fp16', '--seed=1234', '--caption_extension=.txt', '--cache_latents', '--optimizer_type=AdamW', '--max_data_loader_n_workers=1', '--clip_skip=2', '--bucket_reso_steps=64', '--xformers', '--use_8bit_adam', '--bucket_no_upscale']' returned non-zero exit status 1.

BBDUCK-0515 avatar Feb 24 '23 10:02 BBDUCK-0515

Gettting the same problem..

forestferns avatar Feb 24 '23 10:02 forestferns

@BBDUCK-0515 It looks like your issue breaks on "prepare accelerator." I will get that error if I have my training settings set higher than my gpu can handle. You could experiment with adjusting those try checking memory efficient attention and gradient checkpointing.

Also your folder structure and names aren't setup right. Check this video out it talks about settings at this timestamp https://youtu.be/7m522D01mh0?list=LL&t=1098

SaltySkegg avatar Feb 24 '23 20:02 SaltySkegg

@all 相同问题,还是用回kohya_ss-20.7.4。Same problem, return to kohya_ss-20.7.4. perfect!

jjandnn avatar Feb 26 '23 13:02 jjandnn

@BBDUCK-0515 It looks like your issue breaks on "prepare accelerator." I will get that error if I have my training settings set higher than my gpu can handle. You could experiment with adjusting those try checking memory efficient attention and gradient checkpointing.

Also your folder structure and names aren't setup right. Check this video out it talks about settings at this timestamp https://youtu.be/7m522D01mh0?list=LL&t=1098

I tried the parameters like the video. But it did nothing. I'm sure the parameters worked in the last version.

StavissssssT avatar Feb 26 '23 16:02 StavissssssT

same

h030162 avatar Mar 10 '23 08:03 h030162

Have also been getting this same error, full details attached

NB GPU is 6GB GTX980TI

Run01_Errors.txt

WinstonWoof avatar Mar 21 '23 16:03 WinstonWoof

Have also been getting this same error, full details attached

NB GPU is 6GB GTX980TI

Run01_Errors.txt

fixed it and currently training .

Switched the Optimizer on the Traning Tab from AdamW8bit(set by default) to AdamW

WinstonWoof avatar Mar 21 '23 16:03 WinstonWoof

I have the exact same problem. Nothing worked so far. My GPU is NVIDIA GeForce GTX 1050 and it has only 2GB of vram. Could this be the problem?

Python6979 avatar Apr 07 '23 14:04 Python6979

same

greenpipig avatar Apr 08 '23 13:04 greenpipig

mean ar error (without repeats): 0.0031540462894783274 prepare accelerator Using accelerator 0.15.0 or above. loading model for process 0/1 load StableDiffusion checkpoint Traceback (most recent call last): File "C:\Users\Administrator\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 197, in _run_module_as_main return _run_code(code, main_globals, None, File "C:\Users\Administrator\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 87, in run_code exec(code, run_globals) File "F:\kohya_ss\venv\Scripts\accelerate.exe_main.py", line 7, in File "F:\kohya_ss\venv\lib\site-packages\accelerate\commands\accelerate_cli.py", line 45, in main args.func(args) File "F:\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py", line 1104, in launch_command simple_launcher(args) File "F:\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py", line 567, in simple_launcher raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) subprocess.CalledProcessError: Command '['F:\kohya_ss\venv\Scripts\python.exe', 'train_network.py', '--enable_bucket', '--pretrained_model_name_or_path=F:/stable-diffusion-webui/models/Stable-diffusion/dreamshaper_4BakedVae.safetensors', '--train_data_dir=F:\picture', '--resolution=512,512', '--output_dir=F:/model_train', '--logging_dir=F:/logs', '--network_alpha=1', '--save_model_as=safetensors', '--network_module=networks.lora', '--text_encoder_lr=5e-5', '--unet_lr=0.0001', '--network_dim=8', '--output_name=last', '--lr_scheduler_num_cycles=1', '--learning_rate=0.0001', '--lr_scheduler=cosine', '--lr_warmup_steps=640', '--train_batch_size=1', '--max_train_steps=6400', '--save_every_n_epochs=1', '--mixed_precision=fp16', '--save_precision=fp16', '--cache_latents', '--optimizer_type=AdamW', '--max_data_loader_n_workers=0', '--bucket_reso_steps=64', '--xformers', '--bucket_no_upscale']' returned non-zero exit status 3221225477.

greenpipig avatar Apr 08 '23 13:04 greenpipig

Have also been getting this same error, full details attached NB GPU is 6GB GTX980TI Run01_Errors.txt

fixed it and currently training .

Switched the Optimizer on the Traning Tab from AdamW8bit(set by default) to AdamW

thank you. fix the problem for me

shlomitgueta avatar Apr 10 '23 08:04 shlomitgueta

same

KenChan008 avatar Apr 22 '23 13:04 KenChan008

Have also been getting this same error, full details attached NB GPU is 6GB GTX980TI Run01_Errors.txt

fixed it and currently training .

Switched the Optimizer on the Traning Tab from AdamW8bit(set by default) to AdamW

thanks. I changed to "AdamW" and enabled "Memory efficient attention", then it worked

agalloch21 avatar Apr 28 '23 17:04 agalloch21

@ALL 相同问题,还是用回kohya_ss-20.7.4。Same problem, return to kohya_ss-20.7.4. perfect!

How did you do that pls

jeanpierre8 avatar May 12 '23 18:05 jeanpierre8

Solved! I used all the methods and they didn't work.

Two mistakes I made:

  1. I put in a drive that didn't have enough space
  2. After first installing the Koyhass, I forgot to answer the configuration problems in the end and closed the PowerShell right away.

So I reinstall the entire thing and avoided those two problems and they worked! Hopefully it's helpful

Lwtt7766 avatar May 25 '23 17:05 Lwtt7766

也遇到了同样的错误,附上完整的细节 NB GPU 为 6GB GTX980TI Run01_Errors.txt

修好了,目前正在训练。

将训练选项卡上的优化器从 AdamW8bit(默认设置)切换到 AdamW

thank you. Great.

Xiuzhenpeng avatar Jun 14 '23 15:06 Xiuzhenpeng

buf if I want to use AdamW8bit model,How do I do ?

Leon371 avatar Jun 29 '23 15:06 Leon371

I used all the methods and they didn't work.

CUDA SETUP: Loading binary E:\kohya_ss\kohya_ss\venv\lib\site-packages\bitsandbytes\libbitsandbytes_cuda116.dll... use 8-bit AdamW optimizer | {} running training / 学習開始 num train images * repeats / 学習画像の数×繰り返し回数: 300 num reg images / 正則化画像の数: 0 num batches per epoch / 1epochのバッチ数: 300 num epochs / epoch数: 2 batch size per device / バッチサイズ: 1 gradient accumulation steps / 勾配を合計するステップ数 = 1 total optimization steps / 学習ステップ数: 600 steps: 0%| | 0/600 [00:00<?, ?it/s] epoch 1/2 Error no kernel image is available for execution on the device at line 167 in file D:\ai\tool\bitsandbytes\csrc\ops.cu ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ C:\Users\Administrator\AppData\Local\Programs\Python\Python310\lib\runpy.py:196 in │ │ _run_module_as_main │ │ │ │ 193 │ main_globals = sys.modules["main"].dict │ │ 194 │ if alter_argv: │ │ 195 │ │ sys.argv[0] = mod_spec.origin │ │ ❱ 196 │ return _run_code(code, main_globals, None, │ │ 197 │ │ │ │ │ "main", mod_spec) │ │ 198 │ │ 199 def run_module(mod_name, init_globals=None, │ │ │ │ C:\Users\Administrator\AppData\Local\Programs\Python\Python310\lib\runpy.py:86 in _run_code │ │ │ │ 83 │ │ │ │ │ loader = loader, │ │ 84 │ │ │ │ │ package = pkg_name, │ │ 85 │ │ │ │ │ spec = mod_spec) │ │ ❱ 86 │ exec(code, run_globals) │ │ 87 │ return run_globals │ │ 88 │ │ 89 def _run_module_code(code, init_globals=None, │ │ │ │ in :7 │ │ │ │ 4 from accelerate.commands.accelerate_cli import main │ │ 5 if name == 'main': │ │ 6 │ sys.argv[0] = re.sub(r'(-script.pyw|.exe)?$', '', sys.argv[0]) │ │ ❱ 7 │ sys.exit(main()) │ │ 8 │ │ │ │ E:\kohya_ss\kohya_ss\venv\lib\site-packages\accelerate\commands\accelerate_cli.py:45 in main │ │ │ │ 42 │ │ exit(1) │ │ 43 │ │ │ 44 │ # Run │ │ ❱ 45 │ args.func(args) │ │ 46 │ │ 47 │ │ 48 if name == "main": │ │ │ │ E:\kohya_ss\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py:918 in launch_command │ │ │ │ 915 │ elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMA │ │ 916 │ │ sagemaker_launcher(defaults, args) │ │ 917 │ else: │ │ ❱ 918 │ │ simple_launcher(args) │ │ 919 │ │ 920 │ │ 921 def main(): │ │ │ │ E:\kohya_ss\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py:580 in simple_launcher │ │ │ │ 577 │ process.wait() │ │ 578 │ if process.returncode != 0: │ │ 579 │ │ if not args.quiet: │ │ ❱ 580 │ │ │ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) │ │ 581 │ │ else: │ │ 582 │ │ │ sys.exit(1) │ │ 583 │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ CalledProcessError: Command '['E:\kohya_ss\kohya_ss\venv\Scripts\python.exe', './train_network.py', '--enable_bucket', '--pretrained_model_name_or_path=runwayml/stable-diffusion-v1-5', '--train_data_dir=E:\LoRA_test\image', '--resolution=512,512', '--output_dir=E:\LoRA_test\model', '--logging_dir=E:\LoRA_test\log', '--network_alpha=128', '--save_model_as=safetensors', '--network_module=networks.lora', '--text_encoder_lr=5e-05', '--unet_lr=0.0001', '--network_dim=128', '--output_name=aixiaolong', '--lr_scheduler_num_cycles=2', '--no_half_vae', '--learning_rate=0.0001', '--lr_scheduler=cosine_with_restarts', '--lr_warmup_steps=6', '--train_batch_size=1', '--max_train_steps=600', '--save_every_n_epochs=1', '--mixed_precision=fp16', '--save_precision=fp16', '--seed=1234', '--caption_extension=.txt', '--cache_latents', '--optimizer_type=AdamW8bit', '--max_data_loader_n_workers=0', '--clip_skip=2', '--bucket_reso_steps=64', '--xformers', '--bucket_no_upscale']' returned non-zero exit status 1.

xiaoming9802 avatar Jul 16 '23 12:07 xiaoming9802

Look like it can't find the GPU somehow.

On Sun, Jul 16, 2023, 8:39 a.m. xiaoming9802 @.***> wrote:

I used all the methods and they didn't work.

CUDA SETUP: Loading binary E:\kohya_ss\kohya_ss\venv\lib\site-packages\bitsandbytes\libbitsandbytes_cuda116.dll... use 8-bit AdamW optimizer | {} running training / 学習開始 num train images * repeats / 学習画像の数×繰り返し回数: 300 num reg images / 正則化画像の数: 0 num batches per epoch / 1epochのバッチ数: 300 num epochs / epoch数: 2 batch size per device / バッチサイズ: 1 gradient accumulation steps / 勾配を合計するステップ数 = 1 total optimization steps / 学習ステップ数: 600 steps: 0%| | 0/600 [00:00<?, ?it/s] epoch 1/2 Error no kernel image is available for execution on the device at line 167 in file D:\ai\tool\bitsandbytes\csrc\ops.cu ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ C:\Users\Administrator\AppData\Local\Programs\Python\Python310\lib\runpy.py:196 in │ │ _run_module_as_main │ │ │ │ 193 │ main_globals = sys.modules["main"].dict │ │ 194 │ if alter_argv: │ │ 195 │ │ sys.argv[0] = mod_spec.origin │ │ ❱ 196 │ return _run_code(code, main_globals, None, │ │ 197 │ │ │ │ │ "main", mod_spec) │ │ 198 │ │ 199 def run_module(mod_name, init_globals=None, │ │ │ │ C:\Users\Administrator\AppData\Local\Programs\Python\Python310\lib\runpy.py:86 in _run_code │ │ │ │ 83 │ │ │ │ │ loader = loader, │ │ 84 │ │ │ │ │ package = pkg_name, │ │ 85 │ │ │ │ │ spec = mod_spec) │ │ ❱ 86 │ exec(code, run_globals) │ │ 87 │ return run_globals │ │ 88 │ │ 89 def _run_module_code(code, init_globals=None, │ │ │ │ in :7 │ │ │ │ 4 from accelerate.commands.accelerate_cli import main │ │ 5 if name == 'main': │ │ 6 │ sys.argv[0] = re.sub(r'(-script.pyw|.exe)?$', '', sys.argv[0]) │ │ ❱ 7 │ sys.exit(main()) │ │ 8 │ │ │ │ E:\kohya_ss\kohya_ss\venv\lib\site-packages\accelerate\commands\accelerate_cli.py:45 in main │ │ │ │ 42 │ │ exit(1) │ │ 43 │ │ │ 44 │ # Run │ │ ❱ 45 │ args.func(args) │ │ 46 │ │ 47 │ │ 48 if name == "main": │ │ │ │ E:\kohya_ss\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py:918 in launch_command │ │ │ │ 915 │ elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMA │ │ 916 │ │ sagemaker_launcher(defaults, args) │ │ 917 │ else: │ │ ❱ 918 │ │ simple_launcher(args) │ │ 919 │ │ 920 │ │ 921 def main(): │ │ │ │ E:\kohya_ss\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py:580 in simple_launcher │ │ │ │ 577 │ process.wait() │ │ 578 │ if process.returncode != 0: │ │ 579 │ │ if not args.quiet: │ │ ❱ 580 │ │ │ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) │ │ 581 │ │ else: │ │ 582 │ │ │ sys.exit(1) │ │ 583 │

╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ CalledProcessError: Command '['E:\kohya_ss\kohya_ss\venv\Scripts\python.exe', './train_network.py', '--enable_bucket', '--pretrained_model_name_or_path=runwayml/stable-diffusion-v1-5', '--train_data_dir=E:\LoRA_test\image', '--resolution=512,512', '--output_dir=E:\LoRA_test\model', '--logging_dir=E:\LoRA_test\log', '--network_alpha=128', '--save_model_as=safetensors', '--network_module=networks.lora', '--text_encoder_lr=5e-05', '--unet_lr=0.0001', '--network_dim=128', '--output_name=aixiaolong', '--lr_scheduler_num_cycles=2', '--no_half_vae', '--learning_rate=0.0001', '--lr_scheduler=cosine_with_restarts', '--lr_warmup_steps=6', '--train_batch_size=1', '--max_train_steps=600', '--save_every_n_epochs=1', '--mixed_precision=fp16', '--save_precision=fp16', '--seed=1234', '--caption_extension=.txt', '--cache_latents', '--optimizer_type=AdamW8bit', '--max_data_loader_n_workers=0', '--clip_skip=2', '--bucket_reso_steps=64', '--xformers', '--bucket_no_upscale']' returned non-zero exit status 1.

— Reply to this email directly, view it on GitHub https://github.com/bmaltais/kohya_ss/issues/208#issuecomment-1637074480, or unsubscribe https://github.com/notifications/unsubscribe-auth/ABZA34RUFJT2YU6Z3CSN3N3XQPOJZANCNFSM6AAAAAAVBDGVIE . You are receiving this because you modified the open/close state.Message ID: @.***>

bmaltais avatar Jul 16 '23 14:07 bmaltais

Changed precision from bp16 to fp16 and it works

MuamerRamic avatar Oct 30 '23 08:10 MuamerRamic

Have also been getting this same error, full details attached NB GPU is 6GB GTX980TI Run01_Errors.txt

fixed it and currently training .

Switched the Optimizer on the Traning Tab from AdamW8bit(set by default) to AdamW

Thank you for help.

ingsilence avatar Feb 16 '24 11:02 ingsilence

AdamW8bit It still doesn't work after the change

Traceback (most recent call last): File "/Users/kleinsun/open/kohya_ss/venv/bin/accelerate", line 8, in sys.exit(main()) File "/Users/kleinsun/open/kohya_ss/venv/lib/python3.10/site-packages/accelerate/commands/accelerate_cli.py", line 47, in main args.func(args) File "/Users/kleinsun/open/kohya_ss/venv/lib/python3.10/site-packages/accelerate/commands/launch.py", line 1017, in launch_command simple_launcher(args) File "/Users/kleinsun/open/kohya_ss/venv/lib/python3.10/site-packages/accelerate/commands/launch.py", line 637, in simple_launcher raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) subprocess.CalledProcessError: Command '['/Users/kleinsun/open/kohya_ss/venv/bin/python', '/Users/kleinsun/Documents/open/kohya_ss/sd-scripts/sdxl_train.py', '--config_file', '/Users/kleinsun/Documents/open/kohya_ss/outputs/config_dreambooth-20240531-163833.toml']' returned non-zero exit status 1.

eratel avatar May 31 '24 08:05 eratel

Getting a very similar error to the others here: Traceback (most recent call last): File "C:\Users\Pavan\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 196, in _run_module_as_main return _run_code(code, main_globals, None, File "C:\Users\Pavan\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 86, in run_code exec(code, run_globals) File "C:\Users\Pavan\kohya_ss\venv\Scripts\accelerate.EXE_main.py", line 7, in File "C:\Users\Pavan\kohya_ss\venv\lib\site-packages\accelerate\commands\accelerate_cli.py", line 47, in main args.func(args) File "C:\Users\Pavan\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py", line 1017, in launch_command simple_launcher(args) File "C:\Users\Pavan\kohya_ss\venv\lib\site-packages\accelerate\commands\launch.py", line 637, in simple_launcher raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) subprocess.CalledProcessError: Command '['C:\Users\Pavan\kohya_ss\venv\Scripts\python.exe', 'C:/Users/Pavan/kohya_ss/sd-scripts/sdxl_train_network.py', '--config_file', 'C:/Users/Pavan/Favorites/Training Folders/Shriya\model/config_lora-20240701-141835.toml', '--network_train_unet_only']' returned non-zero exit status 3221225477. I have tried all the solutions suggested above but cant seem to make it work. Any ideas?

kappaman00 avatar Jul 01 '24 19:07 kappaman00