mmtracking TypeError: forward_train() got an unexpected keyword argument 'ref_img

Hi, When I use vfnet(VarifocalNet) in mmdetection and bytetrack, I meet the following errors: My config is as follows:

model = dict(
    detector=dict(
        type='VFNet',
        backbone=dict(
            type='ResNet',
            depth=101,
            num_stages=4,
            out_indices=(0, 1, 2, 3),
            frozen_stages=1,
            norm_cfg=dict(type='BN', requires_grad=True),
            norm_eval=True,
            style='pytorch',
            init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
            stage_with_dcn=(False, True, True, True)),
        neck=dict(
            type='FPN',
            in_channels=[256, 512, 1024, 2048],
            out_channels=256,
            start_level=1,
            add_extra_convs='on_output',
            num_outs=5,
            relu_before_extra_convs=True),
        bbox_head=dict(
            type='VFNetHead',
            num_classes=1,
            in_channels=256,
            stacked_convs=3,
            feat_channels=256,
            strides=[8, 16, 32, 64, 128],
            center_sampling=False,
            dcn_on_last_conv=True,
            use_atss=True,
            use_vfl=True,
            loss_cls=dict(
                type='VarifocalLoss',
                use_sigmoid=True,
                alpha=0.75,
                gamma=2.0,
                iou_weighted=True,
                loss_weight=1.0),
            loss_bbox=dict(type='GIoULoss', loss_weight=1.5),
            loss_bbox_refine=dict(type='GIoULoss', loss_weight=2.0)),
        init_cfg=dict(type='Pretrained', checkpoint='https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-7729adb5.pth'),
        train_cfg=dict(
            assigner=dict(type='ATSSAssigner', topk=9),
            allowed_border=-1,
            pos_weight=-1,
            debug=False),
        test_cfg=dict(
            nms_pre=1000,
            min_bbox_size=0,
            score_thr=0.05,
            nms=dict(type='nms', iou_threshold=0.6),
            max_per_img=100)
        ),

    type='ByteTrack',
    motion=dict(type='KalmanFilter'),
    tracker=dict(
        type='ByteTracker',
        obj_score_thrs=dict(high=0.6, low=0.1),
        init_track_thr=0.7,
        weight_iou_with_det_scores=True,
        match_iou_thrs=dict(high=0.1, low=0.5, tentative=0.3),
        num_frames_retain=30))
dataset_type = 'MOTChallengeDataset'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadMultiImagesFromFile', to_float32=True),
    dict(type='SeqLoadAnnotations', with_bbox=True, with_track=True),
    dict(
        type='SeqResize',
        img_scale=(1088, 1088),
        multiscale_mode='range',
        share_params=True,
        ratio_range=(0.8, 1.2),
        keep_ratio=True,
        bbox_clip_border=False),
    dict(type='SeqPhotoMetricDistortion', share_params=True),
    dict(
        type='SeqRandomCrop',
        share_params=False,
        crop_size=(1088, 1088),
        bbox_clip_border=False),
    dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
    dict(
        type='SeqNormalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_rgb=True),
    dict(type='SeqPad', size_divisor=32),
    dict(type='MatchInstances', skip_nomatch=True),
    dict(
        type='VideoCollect',
        keys=[
            'img', 'gt_bboxes', 'gt_labels',
        ]),
    dict(type='SeqDefaultFormatBundle', ref_prefix='ref')
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1088, 1088),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='VideoCollect', keys=['img'])
        ])
]
data_root = 'data/MOT17/'
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type='MOTChallengeDataset',
        visibility_thr=-1,
        ann_file='data/MOT17/annotations/half-train_cocoformat.json',
        img_prefix='data/MOT17/train',
        ref_img_sampler=dict(
            num_ref_imgs=1,
            frame_range=10,
            filter_key_img=True,
            method='uniform'),
        pipeline=train_pipeline),
    val=dict(
        type='MOTChallengeDataset',
        ann_file='data/MOT17/annotations/half-val_cocoformat.json',
        img_prefix='data/MOT17/train',
        ref_img_sampler=None,
        pipeline=test_pipeline),
    test=dict(
        type='MOTChallengeDataset',
        ann_file='data/MOT17/annotations/half-val_cocoformat.json',
        img_prefix='data/MOT17/train',
        ref_img_sampler=None,
        pipeline=test_pipeline))
evaluation = dict(metric=['bbox', 'track'], interval=1)
optimizer = dict(
    type='SGD',
    lr=0.01,
    momentum=0.9,
    weight_decay=0.0001,
    paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0))
optimizer_config = dict(grad_clip=None)
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=0.1,
    step=[16, 22])
total_epochs = 50
checkpoint_config = dict(interval=1)
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
custom_hooks = [dict(type='NumClassCheckHook')]
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
opencv_num_threads = 0
mp_start_method = 'fork'
search_metrics = ['MOTA', 'IDF1', 'FN', 'FP', 'IDs', 'MT', 'ML']
work_dir = './work_dirs/bytetrack_vfnet'
gpu_ids = [0]

I will very appreciate it if you can help me.

Apr 27 '22 07:04 376498485

Hi, do you use the latest version of mmdet?

Apr 28 '22 11:04 dyhBUPT

Hi, do you use the latest version of mmdet?

My mmdet's version is 2.24. Maybe not all detectors in mmdet can be used with BYTE in mmtrack.

Apr 28 '22 14:04 376498485

Hi, do you use the latest version of mmdet?

My mmdet's version is 2.24. Maybe not all detectors in mmdet can be used with BYTE in mmtrack.

Have you tried other detectors? And do they work?

Apr 30 '22 03:04 dyhBUPT

Hi, do you use the latest version of mmdet?

My mmdet's version is 2.24. Maybe not all detectors in mmdet can be used with BYTE in mmtrack.

Have you tried other detectors? And do they work?

I have try Sparse RCNN, and it works. But deformable_detr、tood not work. I just modify the detector in config files. I think RCNN-like models in mmtrack can be used with bytetrack.

Apr 30 '22 04:04 376498485

Mayby you should set the "ref_img_sampler=None" in the "data" iterm.

Apr 30 '22 07:04 dyhBUPT

Hi, lijinrun I think there is something wrong with your data configuration file. Please use the following configuration, which is equivalent to mmtracking/configs/_base_/datasets/mot_challenge_det.py. When we train bytetrack, we don't need the ref_img, so it will report the error you pointed out. Using the following configuration file, it can be compatible with any detector.

dataset_type = 'CocoDataset'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadImageFromFile', to_float32=True, file_client_args=file_client_args,),
    dict(type='LoadAnnotations', with_bbox=True,file_client_args=file_client_args, ),
    dict(
        type='Resize',
        img_scale=(1088, 1088),
        ratio_range=(0.8, 1.2),
        keep_ratio=True,
        bbox_clip_border=False),
    dict(type='PhotoMetricDistortion'),
    dict(type='RandomCrop', crop_size=(1088, 1088), bbox_clip_border=False),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
test_pipeline = [
    dict(type='LoadImageFromFile',file_client_args=file_client_args, ),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1088, 1088),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img'])
        ])
]
data_root = 'data/MOT17/'
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        file_client_args=file_client_args,
        ann_file=data_root + 'annotations/half-train_cocoformat.json',
        img_prefix=data_root + 'train',
        classes=('pedestrian', ),
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        file_client_args=file_client_args,
        ann_file=data_root + 'annotations/half-val_cocoformat.json',
        img_prefix=data_root + 'train',
        classes=('pedestrian', ),
        pipeline=test_pipeline),
    test=dict(
        type=dataset_type,
        file_client_args=file_client_args,
        ann_file=data_root + 'annotations/half-val_cocoformat.json',
        img_prefix=data_root + 'train',
        classes=('pedestrian', ),
        pipeline=test_pipeline))

Apr 30 '22 07:04 pixeli99

Hi, do you use the latest version of mmdet?

My mmdet's version is 2.24. Maybe not all detectors in mmdet can be used with BYTE in mmtrack.

Have you tried other detectors? And do they work?

I have try Sparse RCNN, and it works. But deformable_detr、tood not work. I just modify the detector in config files. I think RCNN-like models in mmtrack can be used with bytetrack.

You said that it can run on sparse_rcnn, because the implementation of mmdet supports any parameters.

Apr 30 '22 07:04 pixeli99

mmtracking
mmtracking copied to clipboard

TypeError: forward_train() got an unexpected keyword argument 'ref_img_metas'

mmtracking mmtracking copied to clipboard

TypeError: forward_train() got an unexpected keyword argument 'ref_img_metas'

mmtracking
mmtracking copied to clipboard