pytorch-grad-cam
pytorch-grad-cam copied to clipboard
How to Generate Attention Graphs on Custom Models ?
trafficstars
Hi, I'm working on the attention mechanism for face recognition models, I'm using the ir model as a backbone, but I don't know much about the details of the implementation of grad-cam, what exactly should I do, and do none of the targets defined in pytorch_grad_cam.utils.model_targets apply to face recognition and verification tasks? How do I rationalize the generation of attention maps? Is it possible to customize targets like cosine_similarity?
grad-cam 1.5.0
torch 1.8.1+cu101
torchvision 0.9.1+cu101
Here's how I realized it:
import warnings
import torch
from face_model import ir152
warnings.filterwarnings('ignore')
import numpy as np
import cv2
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image, \
preprocess_image
from PIL import Image
class FeatureVectorSimilarityTarget:
def __init__(self, target_feature):
self.target_feature_vector = target_feature
def __call__(self, model_output):
cosine_similarity = torch.nn.functional.cosine_similarity(model_output, self.target_feature_vector, dim=0)
return cosine_similarity
def load_img(path):
img = np.float32(cv2.resize(np.array(Image.open(path)), (112, 112))) / 255
tensor = preprocess_image(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
return img, tensor
model = ir152.IR_152((112, 112))
model.load_state_dict(torch.load('face_model_weights/ir152.pth'))
model.to('cuda')
model.eval()
input_img, input_tensor = load_img('001.jpg')
_, target_tensor = load_img('002.jpg')
target_feature = model(target_tensor.to('cuda'))
targets = [FeatureVectorSimilarityTarget(target_feature)]
target_layers = [model]
with GradCAM(model=model, target_layers=target_layers) as cam:
grayscale_cams = cam(input_tensor=input_tensor, targets=targets)
cam_image = show_cam_on_image(input_img, grayscale_cams[0, :], use_rgb=True)
cam = np.uint8(255 * grayscale_cams[0, :])
cam = cv2.merge([cam, cam, cam])
images = np.hstack((np.uint8(255 * input_img), cam, cam_image))
Image.fromarray(images).save('cam.jpg')
I get the following error:
grayscale_cams = cam(input_tensor=input_tensor, targets=targets)
... ...
“torch/autograd/__init__.py", line 50, in _make_grads
RuntimeError: grad can be implicitly created only for scalar outputs
Here's the model:
class Backbone(Module):
def __init__(self, input_size, num_layers, mode='ir'):
super(Backbone, self).__init__()
assert input_size[0] in [112, 224], "input_size should be [112, 112] or [224, 224]"
assert num_layers in [50, 100, 152], "num_layers should be 50, 100 or 152"
assert mode in ['ir', 'ir_se'], "mode should be ir or ir_se"
blocks = get_blocks(num_layers)
if mode == 'ir':
unit_module = bottleneck_IR
elif mode == 'ir_se':
unit_module = bottleneck_IR_SE
self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
BatchNorm2d(64),
PReLU(64))
if input_size[0] == 112:
self.output_layer = Sequential(BatchNorm2d(512),
Dropout(0.4),
Flatten(),
Linear(512 * 7 * 7, 512),
# BatchNorm1d(512, affine=False))
BatchNorm1d(512))
else:
self.output_layer = Sequential(BatchNorm2d(512),
Dropout(0.4),
Flatten(),
Linear(512 * 14 * 14, 512),
# BatchNorm1d(512, affine=False))
BatchNorm1d(512))
modules = [unit_module(bottleneck.in_channel, bottleneck.depth, bottleneck.stride)
for block in blocks for bottleneck in block]
self.body = Sequential(*modules)
self._initialize_weights()
def forward(self, x):
x = self.input_layer(x)
x = self.body(x)
conv_out = x.view(x.shape[0], -1)
x = self.output_layer(x)
# norm = torch.norm(x, p=2, dim=1)
# x = torch.div(x, norm)
# return x, conv_out
return x
def IR_152(input_size):
model = Backbone(input_size, 152, 'ir')
return model