pytorch-grad-cam
pytorch-grad-cam copied to clipboard
X3D Video on RGB Video
Hi, Thanks for the Repo
I am trying to use GradCAM on the X3D model from the tutorial below. https://pytorch.org/hub/facebookresearch_pytorchvideo_x3d/#define-input-transform
However, I am getting the error below.
Which I guess has to do with upsampling the GCAM to the original input size.
I am not sure if the upsampling should also get updated after this merge https://github.com/jacobgil/pytorch-grad-cam/pull/466?
Thanks,
target_layers = [model.blocks[4]]
#print(f'target_layers: {target_layers}\n \n \n')
cam = GradCAM(model=model, target_layers=target_layers)
#print(f'model: {model}')
input_tensor= inputs[None, ...] #input_tensor shape: torch.Size([1, 3, 13, 182, 182])
print(f'input_tensor shape: {input_tensor.shape}\n \n \n')
grayscale_cam = cam(input_tensor=input_tensor)#, targets=targets)
input_tensor shape: torch.Size([1, 3, 13, 182, 182])
---------------------------------------------------------------------------
error Traceback (most recent call last)
Input [In [25]](vscode-notebook-cell:?execution_count=25), in <cell line: 9>()
[7](vscode-notebook-cell:?execution_count=25&line=7) input_tensor= inputs[None, ...] #input_tensor shape: torch.Size([1, 3, 13, 182, 182])
[8](vscode-notebook-cell:?execution_count=25&line=8) print(f'input_tensor shape: {input_tensor.shape}\n \n \n')
----> [9](vscode-notebook-cell:?execution_count=25&line=9) grayscale_cam = cam(input_tensor=input_tensor)
File ~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:186, in BaseCAM.__call__(self, input_tensor, targets, aug_smooth, eigen_smooth)
[183](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:183) if aug_smooth is True:
[184](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:184) return self.forward_augmentation_smoothing(input_tensor, targets, eigen_smooth)
--> [186](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:186) return self.forward(input_tensor, targets, eigen_smooth)
File ~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:110, in BaseCAM.forward(self, input_tensor, targets, eigen_smooth)
[99](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:99) loss.backward(retain_graph=True)
[101](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:101) # In most of the saliency attribution papers, the saliency is
[102](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:102) # computed with a single target layer.
[103](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:103) # Commonly it is the last convolutional layer.
(...)
[108](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:108) # use all conv layers for example, all Batchnorm layers,
[109](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:109) # or something else.
--> [110](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:110) cam_per_layer = self.compute_cam_per_layer(input_tensor, targets, eigen_smooth)
[111](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:111) return self.aggregate_multi_layers(cam_per_layer)
File ~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:143, in BaseCAM.compute_cam_per_layer(self, input_tensor, targets, eigen_smooth)
[141](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:141) cam = self.get_cam_image(input_tensor, target_layer, targets, layer_activations, layer_grads, eigen_smooth)
[142](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:142) cam = np.maximum(cam, 0)
--> [143](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:143) scaled = scale_cam_image(cam, target_size)
[144](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:144) cam_per_target_layer.append(scaled[:, None, :])
[146](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/base_cam.py:146) return cam_per_target_layer
File ~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/utils/image.py:172, in scale_cam_image(cam, target_size)
[169](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/utils/image.py:169) img = zoom(np.float32(img), [
[170](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/utils/image.py:170) (t_s / i_s) for i_s, t_s in zip(img.shape, target_size[::-1])])
[171](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/utils/image.py:171) else:
--> [172](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/utils/image.py:172) img = cv2.resize(np.float32(img), target_size)
[174](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/utils/image.py:174) result.append(img)
[175](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a22524f554745227d.vscode-resource.vscode-cdn.net/home/c/cglee/abubakr5/abubakr5_Scratch/Misc/ssa/notebooks/~/abubakr5_Scratch/abubakr5_Scratch/.virtualenvs/BKRROugePython3_9/lib/python3.9/site-packages/pytorch_grad_cam/utils/image.py:175) result = np.float32(result)
error: OpenCV(4.10.0) :-1: error: (-5:Bad argument) in function 'resize'
> Overload resolution failed:
> - Can't parse 'dsize'. Expected sequence length 2, got 3
> - Can't parse 'dsize'. Expected sequence length 2, got 3