diffusers
diffusers copied to clipboard
Load custom pipeline cause CUDA out of memory
Describe the bug
inpaintingPipe = DiffusionPipeline.from_pretrained(
'runwayml/stable-diffusion-inpainting',
torch_dtype = torch.float16,
custom_pipeline = 'hd_painter'
)
OutOfMemoryError: CUDA out of memory. Tried to allocate 3.96 GiB. GPU 0 has a total capacity of 44.32 GiB of which 2.01 GiB is free. Including non-PyTorch memory, this process has 42.29 GiB memory in use. Of the allocated memory 36.85 GiB is allocated by PyTorch, and 4.80 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Reproduction
inpaintingPipe = DiffusionPipeline.from_pretrained(
'runwayml/stable-diffusion-inpainting',
torch_dtype = torch.float16,
custom_pipeline = 'hd_painter'
)
Logs
---------------------------------------------------------------------------
OutOfMemoryError Traceback (most recent call last)
Cell In[5], line 32
29 display(input_image.size)
30 display((renderWidth, renderHeight))
---> 32 output_image0 = inpainting(
33 image = input_image.convert('RGB'),
34 mask_image = mask_image,
35 prompt = prompt,
36 negative_prompt = 'wtf, smile',
37 width = renderWidth,
38 height = renderHeight,
39 num_inference_steps = math.ceil(num_inference_steps [/](http://192.168.50.251:8888/) strength),
40 guidance_scale = guidance_scale,
41 # guidance_rescale = 0.5,
42 strength = strength,
43 # generator = _generator,
44 clip_skip = 2,
45 padding_mask_crop = 32,
46 use_rasg = True,
47 use_painta = True,
48 # cross_attention_kwargs = { 'scale': .5 },
49 # padding_mask_crop = 0,
50 # ip_adapter_image = Image.open('download-3.png')
51 ).images[0]
53 # image = np.array(input_image)
54
55 # # get canny image
(...)
96 # # ip_adapter_image = face
97 # )
99 output_image = output_image.resize(input_image.size)
File [~/workspace/ai-pipe/ai-pipe-replace-model/inpainting.py:127](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/workspace/ai-pipe/ai-pipe-replace-model/inpainting.py#line=126), in inpainting(**props)
126 def inpainting (**props):
--> 127 output = inpaintingPipe(**props)
128 return output
File [~/.local/lib/python3.10/site-packages/torch/utils/_contextlib.py:115](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/torch/utils/_contextlib.py#line=114), in context_decorator.<locals>.decorate_context(*args, **kwargs)
112 @functools.wraps(func)
113 def decorate_context(*args, **kwargs):
114 with ctx_factory():
--> 115 return func(*args, **kwargs)
File [~/.cache/huggingface/modules/diffusers_modules/git/hd_painter.py:759](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.cache/huggingface/modules/diffusers_modules/git/hd_painter.py#line=758), in StableDiffusionHDPainterPipeline.__call__(self, prompt, image, mask_image, masked_image_latents, height, width, padding_mask_crop, strength, num_inference_steps, timesteps, guidance_scale, positive_prompt, negative_prompt, num_images_per_prompt, eta, generator, latents, prompt_embeds, negative_prompt_embeds, ip_adapter_image, output_type, return_dict, cross_attention_kwargs, clip_skip, callback_on_step_end, callback_on_step_end_tensor_inputs, use_painta, use_rasg, self_attention_layer_name, cross_attention_layer_name, painta_scale_factors, rasg_scale_factor, list_of_painta_layer_names, list_of_rasg_layer_names, **kwargs)
756 attn_processor.encoder_hidden_states = prompt_embeds
758 # predict the noise residual
--> 759 noise_pred = self.unet(
760 latent_model_input,
761 t,
762 encoder_hidden_states=prompt_embeds,
763 timestep_cond=timestep_cond,
764 cross_attention_kwargs=self.cross_attention_kwargs,
765 added_cond_kwargs=added_cond_kwargs,
766 return_dict=False,
767 )[0]
769 # perform guidance
770 if self.do_classifier_free_guidance:
File [~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1511](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/torch/nn/modules/module.py#line=1510), in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File [~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1520](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/torch/nn/modules/module.py#line=1519), in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File [~/.local/lib/python3.10/site-packages/accelerate/hooks.py:166](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/accelerate/hooks.py#line=165), in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
164 output = module._old_forward(*args, **kwargs)
165 else:
--> 166 output = module._old_forward(*args, **kwargs)
167 return module._hf_hook.post_forward(module, output)
File [~/.local/lib/python3.10/site-packages/diffusers/models/unets/unet_2d_condition.py:1216](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/diffusers/models/unets/unet_2d_condition.py#line=1215), in UNet2DConditionModel.forward(self, sample, timestep, encoder_hidden_states, class_labels, timestep_cond, attention_mask, cross_attention_kwargs, added_cond_kwargs, down_block_additional_residuals, mid_block_additional_residual, down_intrablock_additional_residuals, encoder_attention_mask, return_dict)
1213 if is_adapter and len(down_intrablock_additional_residuals) > 0:
1214 additional_residuals["additional_residuals"] = down_intrablock_additional_residuals.pop(0)
-> 1216 sample, res_samples = downsample_block(
1217 hidden_states=sample,
1218 temb=emb,
1219 encoder_hidden_states=encoder_hidden_states,
1220 attention_mask=attention_mask,
1221 cross_attention_kwargs=cross_attention_kwargs,
1222 encoder_attention_mask=encoder_attention_mask,
1223 **additional_residuals,
1224 )
1225 else:
1226 sample, res_samples = downsample_block(hidden_states=sample, temb=emb)
File [~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1511](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/torch/nn/modules/module.py#line=1510), in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File [~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1520](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/torch/nn/modules/module.py#line=1519), in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File [~/.local/lib/python3.10/site-packages/diffusers/models/unets/unet_2d_blocks.py:1288](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/diffusers/models/unets/unet_2d_blocks.py#line=1287), in CrossAttnDownBlock2D.forward(self, hidden_states, temb, encoder_hidden_states, attention_mask, cross_attention_kwargs, encoder_attention_mask, additional_residuals)
1286 else:
1287 hidden_states = resnet(hidden_states, temb)
-> 1288 hidden_states = attn(
1289 hidden_states,
1290 encoder_hidden_states=encoder_hidden_states,
1291 cross_attention_kwargs=cross_attention_kwargs,
1292 attention_mask=attention_mask,
1293 encoder_attention_mask=encoder_attention_mask,
1294 return_dict=False,
1295 )[0]
1297 # apply additional residuals to the output of the last pair of resnet and attention blocks
1298 if i == len(blocks) - 1 and additional_residuals is not None:
File [~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1511](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/torch/nn/modules/module.py#line=1510), in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File [~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1520](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/torch/nn/modules/module.py#line=1519), in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File [~/.local/lib/python3.10/site-packages/diffusers/models/transformers/transformer_2d.py:442](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/diffusers/models/transformers/transformer_2d.py#line=441), in Transformer2DModel.forward(self, hidden_states, encoder_hidden_states, timestep, added_cond_kwargs, class_labels, cross_attention_kwargs, attention_mask, encoder_attention_mask, return_dict)
430 hidden_states = torch.utils.checkpoint.checkpoint(
431 create_custom_forward(block),
432 hidden_states,
(...)
439 **ckpt_kwargs,
440 )
441 else:
--> 442 hidden_states = block(
443 hidden_states,
444 attention_mask=attention_mask,
445 encoder_hidden_states=encoder_hidden_states,
446 encoder_attention_mask=encoder_attention_mask,
447 timestep=timestep,
448 cross_attention_kwargs=cross_attention_kwargs,
449 class_labels=class_labels,
450 )
452 # 3. Output
453 if self.is_input_continuous:
File [~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1511](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/torch/nn/modules/module.py#line=1510), in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File [~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1520](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/torch/nn/modules/module.py#line=1519), in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File [~/.local/lib/python3.10/site-packages/diffusers/models/attention.py:329](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/diffusers/models/attention.py#line=328), in BasicTransformerBlock.forward(self, hidden_states, attention_mask, encoder_hidden_states, encoder_attention_mask, timestep, cross_attention_kwargs, class_labels, added_cond_kwargs)
326 cross_attention_kwargs = cross_attention_kwargs.copy() if cross_attention_kwargs is not None else {}
327 gligen_kwargs = cross_attention_kwargs.pop("gligen", None)
--> 329 attn_output = self.attn1(
330 norm_hidden_states,
331 encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None,
332 attention_mask=attention_mask,
333 **cross_attention_kwargs,
334 )
335 if self.norm_type == "ada_norm_zero":
336 attn_output = gate_msa.unsqueeze(1) * attn_output
File [~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1511](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/torch/nn/modules/module.py#line=1510), in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File [~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1520](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/torch/nn/modules/module.py#line=1519), in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File [~/.local/lib/python3.10/site-packages/diffusers/models/attention_processor.py:519](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/diffusers/models/attention_processor.py#line=518), in Attention.forward(self, hidden_states, encoder_hidden_states, attention_mask, **cross_attention_kwargs)
514 logger.warning(
515 f"cross_attention_kwargs {unused_kwargs} are not expected by {self.processor.__class__.__name__} and will be ignored."
516 )
517 cross_attention_kwargs = {k: w for k, w in cross_attention_kwargs.items() if k in attn_parameters}
--> 519 return self.processor(
520 self,
521 hidden_states,
522 encoder_hidden_states=encoder_hidden_states,
523 attention_mask=attention_mask,
524 **cross_attention_kwargs,
525 )
File [~/.cache/huggingface/modules/diffusers_modules/git/hd_painter.py:130](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.cache/huggingface/modules/diffusers_modules/git/hd_painter.py#line=129), in PAIntAAttnProcessor.__call__(self, attn, hidden_states, encoder_hidden_states, attention_mask, temb, scale)
128 break
129 if mask is None:
--> 130 return self.default_processor(attn, hidden_states, encoder_hidden_states, attention_mask, temb, scale)
132 # STARTS HERE
133 residual = hidden_states
File [~/.local/lib/python3.10/site-packages/diffusers/models/attention_processor.py:778](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/diffusers/models/attention_processor.py#line=777), in AttnProcessor.__call__(self, attn, hidden_states, encoder_hidden_states, attention_mask, temb, *args, **kwargs)
775 key = attn.head_to_batch_dim(key)
776 value = attn.head_to_batch_dim(value)
--> 778 attention_probs = attn.get_attention_scores(query, key, attention_mask)
779 hidden_states = torch.bmm(attention_probs, value)
780 hidden_states = attn.batch_to_head_dim(hidden_states)
File [~/.local/lib/python3.10/site-packages/diffusers/models/attention_processor.py:599](http://192.168.50.251:8888/lab/tree/workspace/ai-pipe/ai-pipe-replace-model/.local/lib/python3.10/site-packages/diffusers/models/attention_processor.py#line=598), in Attention.get_attention_scores(self, query, key, attention_mask)
596 baddbmm_input = attention_mask
597 beta = 1
--> 599 attention_scores = torch.baddbmm(
600 baddbmm_input,
601 query,
602 key.transpose(-1, -2),
603 beta=beta,
604 alpha=self.scale,
605 )
606 del baddbmm_input
608 if self.upcast_softmax:
OutOfMemoryError: CUDA out of memory. Tried to allocate 3.96 GiB. GPU 0 has a total capacity of 44.32 GiB of which 25.75 MiB is free. Including non-PyTorch memory, this process has 44.28 GiB memory in use. Of the allocated memory 40.22 GiB is allocated by PyTorch, and 3.42 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
System Info
pip install git+https://github.com/huggingface/diffusers.git
Who can help?
@AndranikSargsyan @haikmanukyan
From your logs it looks like you tried to run inference with the pipeline? Can you share a full code example of exactly what you are running that is causing this error?
This issue has been automatically marked as stale because it has not had recent activity. If you think this still needs to be addressed please comment on this thread.
Please note that issues that do not follow the contributing guidelines are likely to be ignored.