How to Run Inference on a Single Custom Video?
Hi, thank you for your great work!
I’m currently working on a project where your method would be highly beneficial. However, as someone new to the video tracking field, I’m unsure how to perform inference on a single custom video using your code. The instructions in test.md mainly cover evaluation on entire datasets.
Could you kindly provide some guidance or example code for running inference on a single video clip? Any help would be greatly appreciated!
Thanks again for your amazing contribution!
Currently, I have down a version. Hope to help guys:
import os
import torch
import numpy as np
from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch
from detectron2.config import get_cfg
from detectron2.projects.glee import add_glee_config, build_detection_train_loader, build_detection_test_loader
from detectron2.modeling import build_model
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.data import detection_utils as utils
from detectron2.data import transforms as T
from PIL import Image
OVIS_CATEGORIES = [
{"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "Truck"},
{"color": [0, 82, 0], "isthing": 1, "id": 2, "name": "Car"},
{"color": [119, 11, 32], "isthing": 1, "id": 3, "name": "Bus"},
]
def setup(args):
"""
Create configs and perform basic setups.
"""
cfg = get_cfg()
add_glee_config(cfg)
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
default_setup(cfg, args)
return cfg
def main(args):
cfg = setup(args)
model = build_model(cfg)
DetectionCheckpointer(model).load('GLEE_Plus_joint.pth')
img_dir = './CAM_FRONT_LEFT'
prompt = [cat['name'] for cat in OVIS_CATEGORIES]
# prompt = ['Vehical']
img_list = []
file_names = []
min_size = cfg.INPUT.MIN_SIZE_TEST
max_size = cfg.INPUT.MAX_SIZE_TEST
sample_style = "choice"
aug_list = [T.ResizeShortestEdge(min_size, max_size, sample_style)]
augumentations = T.AugmentationList(aug_list)
ori_height = 0
ori_width = 0
for frame in sorted(os.listdir(img_dir)):
img_path = os.path.join(img_dir, frame)
file_names.append(img_path)
image = utils.read_image(img_path, format='RGB')
ori_height, ori_width = image.shape[:2]
aug_input = T.AugInput(image)
transforms = augumentations(aug_input)
image = aug_input.image
image_shape = image.shape[:2]
img_list.append(torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))))
inputs = [{
'height': ori_height,
'width': ori_width,
'image': img_list,
'task': 'ovis', # TODO: for debug, current use task as 'ovis'
'file_names': file_names,
'prompt': None
}]
model.eval()
with torch.no_grad():
outputs = model(inputs)
print(outputs)
np_mask = np.zeros((len(file_names), ori_height, ori_width))
for idx, item in enumerate(outputs['pred_scores']):
if item > 0.3 and outputs['pred_labels'][idx] == 24:
for frame in range(len(file_names)):
if outputs['pred_masks'][idx][frame].sum() > 0:
np_mask[frame][outputs['pred_masks'][idx][frame]] = 1
img_path = file_names[frame]
img = np.array(Image.open(img_path))
img[~outputs['pred_masks'][idx][frame]] = 0
Image.fromarray(img).save(f'{frame}.png')
if __name__ == "__main__":
args = default_argument_parser().parse_args()
print("Command Line Args:", args)
main(args)
Is there any bugs? The prediction matches two different cars for one idx in the whole sequence, here's results: