add a `from_pipe` method to `DiffusionPipeline`
motivated by https://github.com/huggingface/diffusers/issues/6531
first, let's create a StableDiffusionPipeline with IP-Adapter
from diffusers import DiffusionPipeline, StableDiffusionSAGPipeline, MotionAdapter, PIAPipeline, UNet2DConditionModel, StableDiffusionPipeline, AnimateDiffPipeline, EulerDiscreteScheduler, DDIMScheduler
from diffusers.utils import load_image, export_to_gif
import torch
base_repo = "SG161222/Realistic_Vision_V6.0_B1_noVAE"
num_inference_steps = 50
image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/load_neg_embed.png")
# test1: pipe_sd.from_pretrained with ip_adapter
unet = UNet2DConditionModel.from_pretrained(base_repo, subfolder="unet")
pipe_sd = DiffusionPipeline.from_pretrained(base_repo, unet=unet)
pipe_sd.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
pipe_sd.set_ip_adapter_scale(0.6)
pipe_sd.enable_model_cpu_offload()
generator = torch.Generator(device="cpu").manual_seed(33)
out = pipe_sd(
prompt='best quality, high quality, wearing sunglasses',
ip_adapter_image=image,
negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality",
num_inference_steps=num_inference_steps,
generator=generator,
).images[0]
out.save(f"yiyi_test_6_out_1_sd_ip.png")
StableDiffusion -> SAG
# test1: pipe_sd -> pip_sag
pipe_sd.unload_ip_adapter()
pipe_sag = StableDiffusionSAGPipeline.from_pipe(
pipe_sd,
torch_dtype=torch.float16,
safety_checker=None,
feature_extractor=None,
)
pipe_sag.enable_model_cpu_offload()
generator = torch.Generator(device="cpu").manual_seed(33)
out = pipe_sag(
prompt = "a photo of an astronaut riding a horse on mars",
num_inference_steps=num_inference_steps,
generator=generator,
sag_scale=0.75).images[0]
out.save(f"yiyi_test_6_out_2_sag.png")
SAG -> PIA
# test3: pipe_sag -> pipe_pia
adapter = MotionAdapter.from_pretrained("openmmlab/PIA-condition-adapter")
pipe_pia = PIAPipeline.from_pipe(pipe_sag, motion_adapter=adapter)
pipe_pia.enable_model_cpu_offload()
pipe_pia.scheduler = EulerDiscreteScheduler.from_config(pipe_pia.scheduler.config)
generator = torch.Generator(device="cpu").manual_seed(33)
out = pipe_pia(
image=image,
prompt="bear eat pizza",
negative_prompt = "wrong white balance, dark, sketches,worst quality,low quality",
generator=generator,
num_inference_steps=num_inference_steps,
).frames[0]
export_to_gif(out, "yiyi_test_6_out_3_pia.gif")
PIA -> StableDiffusion
# test4: pipe_pia -> pipe_sd
# the pia unet is not compatible with the sd unet, will get an error
try:
pipe_sd = StableDiffusionPipeline.from_pipe(pipe_pia, safety_checker=None, feature_extractor=None)
except Exception as e:
print(e)
# make sure pia adapter won't get passed to stable diffusion pipeline
pipe_sd = StableDiffusionPipeline.from_pipe(pipe_pia, unet=unet, safety_checker=None, feature_extractor=None, torch_dtype=torch.float16)
pipe_sd.enable_model_cpu_offload()
generator = torch.Generator(device="cpu").manual_seed(33)
out = pipe_sd(
prompt="a photo of an astronaut riding a horse on mars",
num_inference_steps=num_inference_steps,
generator=generator,
).images[0]
out.save(f"yiyi_test_6_out_4_sd.png")
StableDiffusion -> AnimateDiff
# test5: pipe_sd -> pipe_animate
# make sure the adapter_animatediff is pass but not the one from pia_pipa
adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2", torch_dtype=torch.float16)
pipe_animate = AnimateDiffPipeline.from_pipe(pipe_sd, motion_adapter=adapter)
pipe_animate.scheduler = DDIMScheduler.from_config(pipe_animate.scheduler.config, beta_schedule="linear")
pipe_animate.enable_model_cpu_offload()
generator = torch.Generator(device="cpu").manual_seed(33)
out = pipe_animate(
prompt=(
"masterpiece, bestquality, highlydetailed, ultradetailed, sunset, "
"orange sky, warm lighting, fishing boats, ocean waves seagulls, "
"rippling water, wharf, silhouette, serene atmosphere, dusk, evening glow, "
"golden hour, coastal landscape, seaside scenery"
),
negative_prompt="bad quality, worse quality",
generator=generator,
num_frames=16,
guidance_scale=7.5,
num_inference_steps=num_inference_steps,
).frames[0]
export_to_gif(out, "yiyi_test_6_out_5_animate.gif")
StableDiffusion -> LPW (community pipeline)
# test6 (community pipeline) pipe_sd -> pipe_lpw
pipe_lpw = DiffusionPipeline.from_pipe(
pipe_sd,
custom_pipeline="lpw_stable_diffusion",
safety_checker=None,
feature_extractor=None,
torch_dtype=torch.float16
).to("cuda")
prompt = "best_quality (1girl:1.3) bow bride brown_hair closed_mouth frilled_bow frilled_hair_tubes frills (full_body:1.3) fox_ear hair_bow hair_tubes happy hood japanese_clothes kimono long_sleeves red_bow smile solo tabi uchikake white_kimono wide_sleeves cherry_blossoms"
neg_prompt = "lowres, bad_anatomy, error_body, error_hair, error_arm, error_hands, bad_hands, error_fingers, bad_fingers, missing_fingers, error_legs, bad_legs, multiple_legs, missing_legs, error_lighting, error_shadow, error_reflection, text, error, extra_digit, fewer_digits, cropped, worst_quality, low_quality, normal_quality, jpeg_artifacts, signature, watermark, username, blurry"
generator = torch.Generator(device="cpu").manual_seed(33)
out = pipe_lpw.text2img(
prompt,
negative_prompt=neg_prompt,
width=512,height=512,
max_embeddings_multiples=3,
num_inference_steps=num_inference_steps,
generator=generator,
).images[0]
out.save(f"yiyi_test_6_out_6_lpw.png")
LPW -> StableDiffusion
# test7: pipe_lpw -> pipe_sd
pipe_sd = StableDiffusionPipeline.from_pipe(pipe_lpw, safety_checker=None, feature_extractor=None)
pipe_sd.enable_model_cpu_offload()
generator = torch.Generator(device="cpu").manual_seed(33)
out = pipe_sd(
prompt="a photo of an astronaut riding a horse on mars",
generator=generator,
num_inference_steps=num_inference_steps,
).images[0]
out.save(f"yiyi_test_6_out_7_sd.png")
cc @vladmandic here still WIP, but let me know what you think about the API and the use cases it covers do you have any other specific use case in mind that I did not cover here?
The docs for this PR live here. All of your documentation changes will be reflected on that endpoint. The docs are available until 30 days after the last update.
thanks yiyi! from high level, it seems to cover all main use cases. if there is something borderline, we can think of that later. my two comments are:
- target pipeline should inherit pipeline settings, not just components. e.g. model_cpu_offload and all other enable_() methods should be applied on target if they were applied on source.
- testing. to make sure that target pipeline actually works when there are additional components added to it (e.g. different vae or pretty much anything). in my experience, this is where accelerate often breaks as it doesn't pull model components in time so you end up with runtimeerror cuda vs cpu.
thanks for the feedback! @vladmandic
to make sure that target pipeline actually works when there are additional components added to it
I have not run into any issues using enable_model_cpu_offload with additional components, and my tests are pretty extensive (we added a fast test to test all diffusers official pipeline that can use from_pipe), I think it is not a concern here because I remove all the hooks and reset the offload device in the beginning
target pipeline should inherit pipeline settings
I'm not so sure about this because:
- We allow adding and subtracting components with the
from_pipeAPI, so the new pipeline may have different memory requirements, and the user may want different settings. I think it would be simpler to reset instead of inheriting the settings unless they always want to have the same settings for the new pipeline. - not every pipeline has implemented all of these methods; e.g. in my testing, the LPW pipeline did not have the
enable_model_cpu_offloadmethod working correctly. This would more likely be an issue with community pipelines - I agree it is less convenient if you have to re-apply settings but I don't think it makes too much difference
with this being said, I think it won't be hard to implement and I'm open to it if you all think it's more intuitive and convenient to let the new pipelines inherit settings. cc @pcuenca here too, let me know what you think!
cc @DN6 @sayakpaul for a final review let me know what you think about this https://github.com/huggingface/diffusers/pull/7241#issuecomment-2013528351 too I'm slightly in favor of resetting the pipeline settings but I don't feel strongly either way
thanks @yiyixuxu
re: pipeline settings inheritance - IMO it would be more convenient and expected since its a pipeline switch using loaded model components (all or some), but its not a deal breaker - from_pipe has massive value either way.
actually, now I think most of the enable_* methods make stateful changes to the model components, and these changes are already naturally carried over to the new pipeline (e.g. these on StableDiffusionMixin https://github.com/huggingface/diffusers/blob/e4356d64885382c2bb61a28f2ba2b6e4d5f52d23/src/diffusers/pipelines/pipeline_utils.py#L1653) so we probably should make it consistent with enable_model_cpu_offload
on the other hand, these methods may not work probably with the potential addition or override of new components e.g.
if we have enable_vae_slicing enabled on the pipeline, and create a new pipeline with a new vae components, it won't work
pipeline1.enable_vae_slicing()
vae = AutoencoderKL.from_pretrained()
pipe2 = NewPipelineClass.from_pipe(pipe1, vae= vae)
should we handle this on our end or let the user address this? if they just re-apply the settings, it would work as expected; if we are going to handle this on our end, it would be pretty complicated I think
actually, now I think most of the enable_* methods make stateful changes to the model components, and these changes are already naturally carried over to the new pipeline so we probably should make it consistent with enable_model_cpu_offload on the other hand, these methods may not work probably with the potential addition or override of new components e.g. if we have enable_vae_slicing enabled on the pipeline, and create a new pipeline with a new vae components, it won't work
that exactly is the problem with model offload compatibiltiy i was referring to early in the conversation.
for example, a very common use-case is to load feature_extractor and image_encoder once they are needed by ipadapter.
and then we end up with split-brain pipe: parts want to do offloading, parts do not.
even worse, what if i want to now load a different base model and just reuse those two previously loaded component so i don't have to load them again as well (they are not tiny by any means)?
if we're not going to handle those internally, then at least we need to have opposite of enable_model_cpu_offload to force-disable it before loading components and then we call enable_model_cpu_offload it again once pipeline is reconstructed.
Thanks @vladmandic! I didn't think it through before
I updated the model offload methods in a separate PR here. So now, both enable_model_cpu_offload and enable_sequential_cpu_offload will work properly when you re-apply them, either from the same pipeline or from a different one.
now all these enable_* methods have consistent behavior in from_pipe: we do not do anything to guarantee these pipeline settings are inherited from the previous pipeline; they may not work as it is but if you re-apply them, they will work correctly on the new pipeline
I'm open to inheriting the settings too! let me know what you all think too @sayakpaul @pcuenca @DN6
thanks - if re-apply now works, that's imo sufficient.