The model starts loading, but on summoning checkpoint this happens.
File "/home/ady/Desktop/toma/stable-diffusion/main.py", line 830, in
trainer.fit(model, data)
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 740, in fit
self._call_and_handle_interrupt(
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 685, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 777, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1199, in _run
self._dispatch()
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1279, in _dispatch
self.training_type_plugin.start_training(self)
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 202, in start_training
self._results = trainer.run_stage()
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1289, in run_stage
return self._run_train()
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1311, in _run_train
self._run_sanity_check(self.lightning_module)
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1375, in _run_sanity_check
self._evaluation_loop.run()
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/pytorch_lightning/loops/base.py", line 145, in run
self.advance(*args, **kwargs)
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/pytorch_lightning/loops/dataloader/evaluation_loop.py", line 110, in advance
dl_outputs = self.epoch_loop.run(dataloader, dataloader_idx, dl_max_batches, self.num_dataloaders)
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/pytorch_lightning/loops/base.py", line 145, in run
self.advance(*args, **kwargs)
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py", line 122, in advance
output = self._evaluation_step(batch, batch_idx, dataloader_idx)
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py", line 217, in _evaluation_step
output = self.trainer.accelerator.validation_step(step_kwargs)
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/pytorch_lightning/accelerators/accelerator.py", line 236, in validation_step
return self.training_type_plugin.validation_step(*step_kwargs.values())
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/pytorch_lightning/plugins/training_type/ddp.py", line 444, in validation_step
return self.model(*args, **kwargs)
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1156, in forward
output = self._run_ddp_forward(*inputs, **kwargs)
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1110, in _run_ddp_forward
return module_to_run(*inputs[0], **kwargs[0]) # type: ignore[index]
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/pytorch_lightning/overrides/base.py", line 92, in forward
output = self.module.validation_step(*inputs, **kwargs)
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/ady/Desktop/toma/stable-diffusion/ldm/models/diffusion/ddpm.py", line 368, in validation_step
_, loss_dict_no_ema = self.shared_step(batch)
File "/home/ady/Desktop/toma/stable-diffusion/ldm/models/diffusion/ddpm.py", line 908, in shared_step
loss = self(x, c)
File "/home/ady/Desktop/toma/toma/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ady/Desktop/toma/stable-diffusion/ldm/models/diffusion/ddpm.py", line 942, in forward
return self.p_losses(x, c, t, *args, **kwargs)
File "/home/ady/Desktop/toma/stable-diffusion/ldm/models/diffusion/ddpm.py", line 1093, in p_losses
logvar_t = self.logvar[t].to(self.device)
RuntimeError: indices should be either on cpu or on the same device as the indexed tensor (cpu)
What should I do to fix this?
I modify the code and then it can run (I am not sure, you can try it.)
File "/home/ady/Desktop/toma/stable-diffusion/ldm/models/diffusion/ddpm.py", line 1093, in p_losses
logvar_t = self.logvar[t].to(self.device)
->
logvar_t = self.logvar[t.cpu()].to(self.device)