fasterrcnn-pytorch-training-pipeline
fasterrcnn-pytorch-training-pipeline copied to clipboard
Add Inception Backbone
import torchvision from torchvision.models.detection import FasterRCNN from torchvision.models.detection.backbone_utils import resnet_fpn_backbone from torchvision.models.detection.rpn import AnchorGenerator import torch.nn as nn
def create_model(num_classes, pretrained=True, coco_model=False): # Load a pre-trained InceptionV3 backbone backbone = torchvision.models.inception_v3(pretrained=pretrained) # Remove the classification head of the InceptionV3 backbone = nn.Sequential(*list(backbone.children())[:-2]) backbone.out_channels = 960 # Define the RPN (Region Proposal Network) rpn_anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),) * 5) rpn_head = torchvision.models.detection.rpn.RPNHead(2048, rpn_anchor_generator.num_anchors_per_location()[0])
# Create the Faster R-CNN model
model = FasterRCNN(backbone,
num_classes=num_classes,
rpn_anchor_generator=rpn_anchor_generator,
rpn_head=rpn_head)
if coco_model:
# If you want to use a COCO pre-trained model, you can load it here.
# This is just a placeholder and depends on the availability of such a model.
return model, coco_model
return model
if name == 'main': import torch from model_summary import summary num_classes = 2 # Change this to the number of classes in your dataset model = create_model(num_classes=num_classes, pretrained=True, coco_model=False)
Input tensor with appropriate dimensions
summary(model)
### i get this error when I am trying to implement this backbone
return self._conv_forward(input, self.weight, self.bias) File "C:\Users\user\AppData\Roaming\Python\Python39\site-packages\torch\nn\modules\conv.py", line 459, in _conv_forward return F.conv2d(input, weight, bias, self.stride, RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [2, 1000] wandb: Waiting for W&B process to finish... (failed 1). Press Ctrl-C to abort syncing.
You are inputting a 2D tensor to the model whereas it should be a 4D tensor, that is, [batch size, channels, height, width].