RN icon indicating copy to clipboard operation
RN copied to clipboard

About output from the pretrained model

Open im-aksh opened this issue 1 year ago • 0 comments

Hey Hi, I am writing to you as a user of your paper's pretrained model, specifically the model described in the code you provided. First of all, I would like to express my appreciation for your work and the effort you have put into developing this model.

Recently, I have been utilizing your pretrained model for a specific task in my research project. While I acknowledge the potential and effectiveness of the model, I must inform you that I am not getting correct output.

I have attached my code and the output, please look into it and let me know if there are changes needed in the code output

`from future import print_function import argparse import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable from PIL import Image, ImageOps from torchvision.transforms.functional import to_pil_image from models import InpaintingModel import lpips import os
from skimage.metrics import peak_signal_noise_ratio as compare_psnr from skimage.metrics import structural_similarity as compare_ssim

loss_fn_alex = lpips.LPIPS(net='alex')

Training settings

parser = argparse.ArgumentParser(description='PyTorch Video Inpainting with Background Auxiliary') parser.add_argument('--bs', type=int, default=256, help='training batch size') parser.add_argument('--lr', type=float, default=0.001, help='Learning Rate. Default=0.001') parser.add_argument('--cpu', default=False, action='store_true', help='Use CPU to test') parser.add_argument('--threads', type=int, default=1, help='number of threads for data loader to use') parser.add_argument('--seed', type=int, default=67454, help='random seed to use. Default=123') parser.add_argument('--gpus', default=0, type=int, help='number of GPUs') parser.add_argument('--threshold', type=float, default=0.8) parser.add_argument('--img_path', type=str, default="D:/FYP/input_image/input.jpg") parser.add_argument('--mask_path', type=str, default="D:/FYP/input_mask/00015.png") parser.add_argument('--model', default='C:/FYP/RN-master/pretrained_model/x_admin.cluster.localRN-0.8RN-Net_bs_14_epoch_3.pth', help='pretrained base model') parser.add_argument('--save', default=True, action='store_true', help='If save test images') parser.add_argument('--save_path', type=str, default='C:/FYP/RN-master/output') parser.add_argument('--input_size', type=int, default=512, help='input image size') parser.add_argument('--l1_weight', type=float, default=1.0) parser.add_argument('--gan_weight', type=float, default=.1)

opt = parser.parse_args()

def evaluate_single_image(image_path, mask_path, save=False, save_path=None): # Load the model device = torch.device('cpu' if opt.cpu else 'cuda') model = InpaintingModel(g_lr=opt.lr, d_lr=(0.1 * opt.lr), l1_weight=opt.l1_weight, gan_weight=opt.gan_weight, iter=0, threshold=opt.threshold) model.load_state_dict(torch.load(opt.model, map_location=device), strict=False)

pred, avg_lpips, mask, gt = eval_single_image(image_path, mask_path, model)

if save:
    image = Image.open(opt.img_path)
    mask = Image.open(opt.mask_path)
    inverted_mask = ImageOps.invert(mask)
    resized_mask = inverted_mask.resize(image.size, resample=Image.BILINEAR)
    masked_image = Image.composite(image, Image.new('RGB', image.size), resized_mask)
    masked_image.save(r'C:/FYP/RN-master/output/input.png')
    save_img(save_path, 'mask', mask)
    save_img(save_path, 'output', pred)
    save_img(save_path, 'gt', gt)

return avg_lpips

def eval_single_image(image_path, mask_path, model): model.eval() model.generator.eval() avg_lpips = 0.

with torch.no_grad():
    gt = np.array(Image.open(image_path))
    mask = np.array(Image.open(mask_path))

    gt = torch.from_numpy(gt.transpose((2, 0, 1))).float().unsqueeze(0) / 255.0
    mask = torch.from_numpy(mask).unsqueeze(0).unsqueeze(0)  # Add extra dimensions for channel and batch

    # Resize the input tensor and mask tensor to match the expected input size
    gt = F.interpolate(gt, size=(opt.input_size, opt.input_size), mode='bilinear', align_corners=False)
    mask = F.interpolate(mask, size=(opt.input_size, opt.input_size), mode='nearest')

    gt, mask = Variable(gt), Variable(mask)

    prediction = model.generator(gt, mask)
    prediction = prediction * mask + gt * (1 - mask)
    avg_lpips = loss_fn_alex(prediction, gt).mean().item()

return prediction, avg_lpips, mask, gt

def save_img(path, name, img): # img (H,W,C) or (H,W) np.uint8 or torch tensor if isinstance(img, torch.Tensor): img = to_pil_image(img.squeeze().cpu()) img.save(os.path.join(path, name + '.png'))

def main(): torch.manual_seed(opt.seed)

# Checking for GPU availability
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda and not opt.cpu else "cpu")

# Evaluate single image
avg_lpips = evaluate_single_image(opt.img_path, opt.mask_path, save=opt.save, save_path=opt.save_path)

print("Average LPIPS: {:.4f}".format(avg_lpips))

if name == 'main': main() `

im-aksh avatar Jul 04 '23 06:07 im-aksh