What i am doing wrong?

Open elismasilva opened this issue 9 months ago • 0 comments
from latent_interposer import ComfyLatentInterposer
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
from diffusers import AutoencoderKL
from diffusers.image_processor import VaeImageProcessor
import torch

device="cuda"

pipe = StableDiffusionPipeline.from_single_file("F:\\models\\Stable-diffusion\\sd1\\epicrealism_pureEvolution.safetensors", torch_dtype=torch.bfloat16)
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.to(device)
pipe.enable_model_cpu_offload()
pipe.enable_vae_slicing()
pipe.enable_vae_tiling()

num_steps=10
seed=1830217805
generator = torch.Generator(device).manual_seed(seed)

images = pipe(prompt="(detailed face, detailed eyes, clear skin, clear eyes), lotr, fantasy, elf, female, full body, looking at viewer, portrait, photography, detailed skin, realistic, photo-realistic, 8k, highly detailed, full length frame, High detail RAW color art, piercing, diffused soft lighting, shallow depth of field, sharp focus, hyperrealism, cinematic lighting", 
              negative_prompt="canvas frame, cartoon, 3d, ((disfigured)), ((bad art)), ((deformed)),((extra limbs)),((close up)),((b&w)), wierd colors, blurry, (((duplicate))), ((morbid)), ((mutilated)), [out of frame], extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), ((ugly)), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))), out of frame, ugly, extra limbs, (bad anatomy), gross proportions, (malformed limbs), ((missing arms)), ((missing legs)), (((extra arms))), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck))), Photoshop, video game, ugly, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, mutation, mutated, extra limbs, extra legs, extra arms, disfigured, deformed, cross-eye, body out of frame, blurry, bad art, bad anatomy, 3d render", 
              generator=generator,
              guidance_scale=5,
              height=512,
              width=512,
              num_inference_steps=num_steps,
              output_type="latent")
                               
latents = 1 / pipe.vae.config.scaling_factor * images[0]
image = pipe.vae.decode(latents, return_dict=False, generator=generator)[0]

image, has_nsfw_concept = pipe.run_safety_checker(image, device, pipe.unet.dtype)
        
if has_nsfw_concept is None:
    do_denormalize = [True] * image.shape[0]
else:
    do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]

with torch.no_grad():
    image = pipe.image_processor.postprocess(image, output_type="pil", do_denormalize=do_denormalize)

image[0].save("test_sd1_default_vae.png")

vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix").to(device)
vae.use_tiling = True
vae.use_slicing = True

interposer = ComfyLatentInterposer()
sdxl_latents = interposer.convert(images[0].to(torch.float32), 'v1', 'xl', torch.float32).to(dtype=torch.float32)
sdxl_latents = sdxl_latents / vae.config.scaling_factor

image = vae.decode(sdxl_latents, return_dict=False, generator=generator)[0]
image_processor = VaeImageProcessor(vae_scale_factor=vae.config.scaling_factor, do_convert_rgb=True)

with torch.no_grad():
    image = image_processor.postprocess(image.detach(), output_type="pil")

image[0].save("test_sd1_madebyollin_vae.png")
SD 1.5
SD XL VAE
Mar 22 '25 20:03 elismasilva