PhotoMaker
PhotoMaker copied to clipboard
the result of running is noise, why? please help
trafficstars
i runned the demo by notebook, but I got the noise like this
infer code is
import torch
import os
from diffusers.utils import load_image
from diffusers import EulerDiscreteScheduler, DDIMScheduler
from photomaker import PhotoMakerStableDiffusionXLPipeline
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
base_model_path = "./SG161222"
photomaker_path = "./ckpt/photomaker-v1.bin"
### Load base model
pipe = PhotoMakerStableDiffusionXLPipeline.from_pretrained(
base_model_path, # can change to any base model based on SDXL
torch_dtype=torch.bfloat16,
use_safetensors=True,
variant="fp16"
).to(device)
### Load PhotoMaker checkpoint
pipe.load_photomaker_adapter(
os.path.dirname(photomaker_path),
subfolder="",
weight_name=os.path.basename(photomaker_path),
trigger_word="img" # define the trigger word
)
#pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
### Also can cooperate with other LoRA modules
# pipe.load_lora_weights(os.path.dirname(lora_path), weight_name=lora_model_name, adapter_name="xl_more_art-full")
# pipe.set_adapters(["photomaker", "xl_more_art-full"], adapter_weights=[1.0, 0.5])
pipe.fuse_lora()
### define the input ID images
input_folder_name = './examples/newton_man'
image_basename_list = os.listdir(input_folder_name)
image_path_list = sorted([os.path.join(input_folder_name, basename) for basename in image_basename_list])
input_id_images = []
for image_path in image_path_list:
input_id_images.append(load_image(image_path))
# Note that the trigger word `img` must follow the class word for personalization
num_steps = 50
style_strength_ratio = 20
start_merge_step = int(float(style_strength_ratio) / 100 * num_steps)
if start_merge_step > 30:
start_merge_step = 30
prompt = "a half-body portrait of a man img wearing the sunglasses in Iron man suit, best quality"
negative_prompt = "(asymmetry, worst quality, low quality, illustration, 3d, 2d, painting, cartoons, sketch), open mouth, grayscale"
generator = torch.Generator(device=device).manual_seed(42)
images = pipe(
prompt=prompt,
input_id_images=input_id_images,
negative_prompt=negative_prompt,
num_images_per_prompt=1,
num_inference_steps=num_steps,
start_merge_step=10,
generator=generator,
).images
print(len(images))
for i, image in enumerate(images):
image.save('out_photomaker_{}.png'.format(i))
base_model_path content are from SG161222--RealVisXL_V3.0-11ee564ebf4bd96d90ed5d473cb8e7f2e6450bcf.tar
SG161222/
├── model_index.json
├── scheduler
├── text_encoder
├── text_encoder_2
├── tokenizer
├── tokenizer_2
├── unet
└── vae
Did you try others base model, e.g., SDXL 1.0?
I found that my GPU do not support bfloat16, it was solved by changing bfloat16 to float16.