mmtracking icon indicating copy to clipboard operation
mmtracking copied to clipboard

TypeError: forward_train() got an unexpected keyword argument 'ref_img_metas'

Open 376498485 opened this issue 3 years ago • 7 comments

Hi, When I use vfnet(VarifocalNet) in mmdetection and bytetrack, I meet the following errors: image My config is as follows:

model = dict(
    detector=dict(
        type='VFNet',
        backbone=dict(
            type='ResNet',
            depth=101,
            num_stages=4,
            out_indices=(0, 1, 2, 3),
            frozen_stages=1,
            norm_cfg=dict(type='BN', requires_grad=True),
            norm_eval=True,
            style='pytorch',
            init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
            stage_with_dcn=(False, True, True, True)),
        neck=dict(
            type='FPN',
            in_channels=[256, 512, 1024, 2048],
            out_channels=256,
            start_level=1,
            add_extra_convs='on_output',
            num_outs=5,
            relu_before_extra_convs=True),
        bbox_head=dict(
            type='VFNetHead',
            num_classes=1,
            in_channels=256,
            stacked_convs=3,
            feat_channels=256,
            strides=[8, 16, 32, 64, 128],
            center_sampling=False,
            dcn_on_last_conv=True,
            use_atss=True,
            use_vfl=True,
            loss_cls=dict(
                type='VarifocalLoss',
                use_sigmoid=True,
                alpha=0.75,
                gamma=2.0,
                iou_weighted=True,
                loss_weight=1.0),
            loss_bbox=dict(type='GIoULoss', loss_weight=1.5),
            loss_bbox_refine=dict(type='GIoULoss', loss_weight=2.0)),
        init_cfg=dict(type='Pretrained', checkpoint='https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-7729adb5.pth'),
        train_cfg=dict(
            assigner=dict(type='ATSSAssigner', topk=9),
            allowed_border=-1,
            pos_weight=-1,
            debug=False),
        test_cfg=dict(
            nms_pre=1000,
            min_bbox_size=0,
            score_thr=0.05,
            nms=dict(type='nms', iou_threshold=0.6),
            max_per_img=100)
        ),

    type='ByteTrack',
    motion=dict(type='KalmanFilter'),
    tracker=dict(
        type='ByteTracker',
        obj_score_thrs=dict(high=0.6, low=0.1),
        init_track_thr=0.7,
        weight_iou_with_det_scores=True,
        match_iou_thrs=dict(high=0.1, low=0.5, tentative=0.3),
        num_frames_retain=30))
dataset_type = 'MOTChallengeDataset'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadMultiImagesFromFile', to_float32=True),
    dict(type='SeqLoadAnnotations', with_bbox=True, with_track=True),
    dict(
        type='SeqResize',
        img_scale=(1088, 1088),
        multiscale_mode='range',
        share_params=True,
        ratio_range=(0.8, 1.2),
        keep_ratio=True,
        bbox_clip_border=False),
    dict(type='SeqPhotoMetricDistortion', share_params=True),
    dict(
        type='SeqRandomCrop',
        share_params=False,
        crop_size=(1088, 1088),
        bbox_clip_border=False),
    dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
    dict(
        type='SeqNormalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_rgb=True),
    dict(type='SeqPad', size_divisor=32),
    dict(type='MatchInstances', skip_nomatch=True),
    dict(
        type='VideoCollect',
        keys=[
            'img', 'gt_bboxes', 'gt_labels',
        ]),
    dict(type='SeqDefaultFormatBundle', ref_prefix='ref')
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1088, 1088),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='VideoCollect', keys=['img'])
        ])
]
data_root = 'data/MOT17/'
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type='MOTChallengeDataset',
        visibility_thr=-1,
        ann_file='data/MOT17/annotations/half-train_cocoformat.json',
        img_prefix='data/MOT17/train',
        ref_img_sampler=dict(
            num_ref_imgs=1,
            frame_range=10,
            filter_key_img=True,
            method='uniform'),
        pipeline=train_pipeline),
    val=dict(
        type='MOTChallengeDataset',
        ann_file='data/MOT17/annotations/half-val_cocoformat.json',
        img_prefix='data/MOT17/train',
        ref_img_sampler=None,
        pipeline=test_pipeline),
    test=dict(
        type='MOTChallengeDataset',
        ann_file='data/MOT17/annotations/half-val_cocoformat.json',
        img_prefix='data/MOT17/train',
        ref_img_sampler=None,
        pipeline=test_pipeline))
evaluation = dict(metric=['bbox', 'track'], interval=1)
optimizer = dict(
    type='SGD',
    lr=0.01,
    momentum=0.9,
    weight_decay=0.0001,
    paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0))
optimizer_config = dict(grad_clip=None)
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=0.1,
    step=[16, 22])
total_epochs = 50
checkpoint_config = dict(interval=1)
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
custom_hooks = [dict(type='NumClassCheckHook')]
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
opencv_num_threads = 0
mp_start_method = 'fork'
search_metrics = ['MOTA', 'IDF1', 'FN', 'FP', 'IDs', 'MT', 'ML']
work_dir = './work_dirs/bytetrack_vfnet'
gpu_ids = [0]

I will very appreciate it if you can help me.

376498485 avatar Apr 27 '22 07:04 376498485

Hi, do you use the latest version of mmdet?

dyhBUPT avatar Apr 28 '22 11:04 dyhBUPT

Hi, do you use the latest version of mmdet?

My mmdet's version is 2.24. Maybe not all detectors in mmdet can be used with BYTE in mmtrack.

376498485 avatar Apr 28 '22 14:04 376498485

Hi, do you use the latest version of mmdet?

My mmdet's version is 2.24. Maybe not all detectors in mmdet can be used with BYTE in mmtrack.

Have you tried other detectors? And do they work?

dyhBUPT avatar Apr 30 '22 03:04 dyhBUPT

Hi, do you use the latest version of mmdet?

My mmdet's version is 2.24. Maybe not all detectors in mmdet can be used with BYTE in mmtrack.

Have you tried other detectors? And do they work?

I have try Sparse RCNN, and it works. But deformable_detrtood not work. I just modify the detector in config files. I think RCNN-like models in mmtrack can be used with bytetrack.

376498485 avatar Apr 30 '22 04:04 376498485

Mayby you should set the "ref_img_sampler=None" in the "data" iterm. image

dyhBUPT avatar Apr 30 '22 07:04 dyhBUPT

Hi, lijinrun I think there is something wrong with your data configuration file. Please use the following configuration, which is equivalent to mmtracking/configs/_base_/datasets/mot_challenge_det.py. When we train bytetrack, we don't need the ref_img, so it will report the error you pointed out. Using the following configuration file, it can be compatible with any detector.

dataset_type = 'CocoDataset'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadImageFromFile', to_float32=True, file_client_args=file_client_args,),
    dict(type='LoadAnnotations', with_bbox=True,file_client_args=file_client_args, ),
    dict(
        type='Resize',
        img_scale=(1088, 1088),
        ratio_range=(0.8, 1.2),
        keep_ratio=True,
        bbox_clip_border=False),
    dict(type='PhotoMetricDistortion'),
    dict(type='RandomCrop', crop_size=(1088, 1088), bbox_clip_border=False),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
test_pipeline = [
    dict(type='LoadImageFromFile',file_client_args=file_client_args, ),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1088, 1088),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img'])
        ])
]
data_root = 'data/MOT17/'
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        file_client_args=file_client_args,
        ann_file=data_root + 'annotations/half-train_cocoformat.json',
        img_prefix=data_root + 'train',
        classes=('pedestrian', ),
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        file_client_args=file_client_args,
        ann_file=data_root + 'annotations/half-val_cocoformat.json',
        img_prefix=data_root + 'train',
        classes=('pedestrian', ),
        pipeline=test_pipeline),
    test=dict(
        type=dataset_type,
        file_client_args=file_client_args,
        ann_file=data_root + 'annotations/half-val_cocoformat.json',
        img_prefix=data_root + 'train',
        classes=('pedestrian', ),
        pipeline=test_pipeline))

pixeli99 avatar Apr 30 '22 07:04 pixeli99

Hi, do you use the latest version of mmdet?

My mmdet's version is 2.24. Maybe not all detectors in mmdet can be used with BYTE in mmtrack.

Have you tried other detectors? And do they work?

I have try Sparse RCNN, and it works. But deformable_detrtood not work. I just modify the detector in config files. I think RCNN-like models in mmtrack can be used with bytetrack.

You said that it can run on sparse_rcnn, because the implementation of mmdet supports any parameters. image

pixeli99 avatar Apr 30 '22 07:04 pixeli99