direct-inversion
direct-inversion copied to clipboard
bad results
I tried to recreate with the examples shown on the paper but it fails miserably, the generated images look like they overlay the original image , is there something wrong?
from diffusers import StableDiffusionPipeline, DDIMScheduler
from IPython.display import display
scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False)
pipe = DirectInversionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16, revision="fp16", use_auth_token=True)
if remove_safety:
pipe.safety_checker = lambda images, clip_input: (images, False)
pipe = pipe.to(device)
in_img = Image.open("/content/test2.png").convert("RGB").resize((512,512))
prompt = "black dress shoes"
inversion_prompt = " "
num_images = 1
guidance_scale = 3 # in range [0, 10]
noise_merge_lambda = 0.5 # in range [0, 1]
num_inference_steps = 100 # in range [0, 1000] *Recommend keeping at 100*
num_inversion_steps = 100 # in range [0, 1000] *Recommend keeping at 100*
direct_injection = True # toggle if we start from random noise [False] or if we start from inverted noise [True]
stochastic_injection = True # toggle if we continually inject inverted noise
num_stochastic_injection_steps = num_inference_steps # how many steps we inject inverted noise for
stochastic_injection_exp_method = True # toggle if we want to use the scaled noises predicted during inversion for stochastic injection
log = []
output = []
prompts = [ prompt ] * num_images
with autocast("cuda"):
out = pipe(prompts,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
input_image=in_img,
direct_injection=direct_injection,
stochastic_injection=stochastic_injection,
inversion_steps=num_inversion_steps,
noise_merge_lambda=noise_merge_lambda,
num_stochastic_injection_steps=num_stochastic_injection_steps,
inversion_prompt=inversion_prompt,
debug_print=True,
stochastic_injection_exp_method=stochastic_injection_exp_method)
images = out[0]["sample"]
# DEBUG [Not enabled for release]
# noises = out[1]
# cleans = out[2]
images.insert(0, in_img)