I've trained the model and I get this error when training, 07/03 15:13:27 - mmengine - ERROR - D:anaconda3envsyoloworld-envlibsite-packagesmmdetevaluationmetricscoco_metric.py - compute_metrics - 461 - The testing results of the whole dataset is empty. When using the test.py to test, the path keeps reporting errors, I double-checked that I only set the path of my own dataset, and did not specify the error path, when using the test.py, I used the config file that I used when using the train.py, I don't know if this is wrong. FileNotFoundError: [Errno 2] No such file or directory: 'data/coco/annotations/instances_val2017.json'

This is the command I commanded when I ran the test.py python tools/test.py D:\Code_project\yolo_world\YOLO-World-master\YOLO-World-master\configs\pretrain\yolo_world_v2_s_1.py 'D:\Code_project\yolo_world\YOLO-World-master\YOLO-World-master\tools\work_dirs\yolo_world_v2_s_1\epoch_100.pth' Please help me see how to fix it Here's my config file base = ( '../../third_party/mmyolo/configs/yolov8/' 'yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py') custom_imports = dict( imports=['yolo_world'], allow_failed_imports=False)

hyper-parameters

num_classes = 5 num_training_classes = 5 max_epochs = 100 # Maximum training epochs close_mosaic_epochs = 10 save_epoch_intervals = 5 text_channels = 512 neck_embed_channels = [128, 256, base.last_stage_out_channels // 2] neck_num_heads = [4, 8, base.last_stage_out_channels // 2 // 32] base_lr = 2e-4 weight_decay = 0.05 train_batch_size_per_gpu = 16 load_from = 'D:\Code_project\yolo_world\YOLO-World-master\YOLO-World-master\weights\yolo_world_v2_s_vlpan_bn_2e-4_80e_8gpus_mask-refine_finetune_coco_ep80-492dc329.pth'

text_model_name = '../pretrained_models/clip-vit-base-patch32-projection'

text_model_name = 'openai/clip-vit-base-patch32'

text_model_name ='D:\Code_project\yolo_world\YOLO-World-master\YOLO-World-master\openai\clip-vit-base-patch32' persistent_workers = False mixup_prob = 0.15 copypaste_prob = 0.3

model settings

model = dict( type='YOLOWorldDetector', mm_neck=True, num_train_classes=num_training_classes, num_test_classes=num_classes, data_preprocessor=dict(type='YOLOWDetDataPreprocessor'), backbone=dict( delete=True, type='MultiModalYOLOBackbone', image_model={{base.model.backbone}}, text_model=dict( type='HuggingCLIPLanguageBackbone', model_name=text_model_name, frozen_modules=['all'])), neck=dict(type='YOLOWorldPAFPN', guide_channels=text_channels, embed_channels=neck_embed_channels, num_heads=neck_num_heads, block_cfg=dict(type='MaxSigmoidCSPLayerWithTwoConv')), bbox_head=dict(type='YOLOWorldHead', head_module=dict(type='YOLOWorldHeadModule', use_bn_head=True, embed_dims=text_channels, num_classes=num_training_classes)), train_cfg=dict(assigner=dict(num_classes=num_training_classes)))

dataset settings

text_transform = [ dict(type='RandomLoadText', num_neg_samples=(num_classes, num_classes), max_num_samples=num_training_classes, padding_to_max=True, padding_value=''), dict(type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction', 'texts')) ] mosaic_affine_transform = [ dict( type='MultiModalMosaic', img_scale=base.img_scale, pad_val=114.0, pre_transform=base.pre_transform), dict(type='YOLOv5CopyPaste', prob=copypaste_prob), dict( type='YOLOv5RandomAffine', max_rotate_degree=0.0, max_shear_degree=0.0, max_aspect_ratio=100., scaling_ratio_range=(1 - base.affine_scale, 1 + base.affine_scale), # img_scale is (width, height) border=(-base.img_scale[0] // 2, -base.img_scale[1] // 2), border_val=(114, 114, 114), min_area_ratio=base.min_area_ratio, use_mask_refine=base.use_mask2refine) ] train_pipeline = [ *base.pre_transform, mosaic_affine_transform, dict( type='YOLOv5MultiModalMixUp', prob=mixup_prob, pre_transform=[base.pre_transform, *mosaic_affine_transform]), *base.last_transform[:-1], *text_transform ] train_pipeline_stage2 = [ *base.train_pipeline_stage2[:-1], *text_transform ] coco_train_dataset = dict( delete=True, type='MultiModalDataset', dataset=dict( type='YOLOv5CocoDataset', data_root='tomato.v1i.coco', ann_file=r'D:\Code_project\yolo_world\YOLO-World-master\YOLO-World-master\tomato.v1i.coco\train_annotations.coco.json', data_prefix=dict(img='D:/Code_project/yolo_world/YOLO-World-master/YOLO-World-master/tomato.v1i.coco/train/'), filter_cfg=dict(filter_empty_gt=False, min_size=32)), class_text_path=r'D:\Code_project\yolo_world\YOLO-World-master\YOLO-World-master\tomato.v1i.coco\tomato_class_texts.json', pipeline=train_pipeline)

train_dataloader = dict( persistent_workers=persistent_workers, batch_size=train_batch_size_per_gpu, collate_fn=dict(type='yolow_collate'), dataset=coco_train_dataset) test_pipeline = [ *base.test_pipeline[:-1], dict(type='LoadText'), dict( type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param', 'texts')) ] coco_val_dataset = dict( delete=True, type='MultiModalDataset', dataset=dict( type='YOLOv5CocoDataset', data_root='tomato.v1i.coco', ann_file=r'D:\Code_project\yolo_world\YOLO-World-master\YOLO-World-master\tomato.v1i.coco\valid_annotations.coco.json', data_prefix=dict(img='D:/Code_project/yolo_world/YOLO-World-master/YOLO-World-master/tomato.v1i.coco/valid/'), filter_cfg=dict(filter_empty_gt=False, min_size=32)), class_text_path=r'D:\Code_project\yolo_world\YOLO-World-master\YOLO-World-master\tomato.v1i.coco\tomato_class_texts.json', pipeline=test_pipeline) val_dataloader = dict(dataset=coco_val_dataset) test_dataloader = val_dataloader

training settings

default_hooks = dict( param_scheduler=dict( scheduler_type='linear', lr_factor=0.01, max_epochs=max_epochs), checkpoint=dict( max_keep_ckpts=-1, save_best=None, interval=save_epoch_intervals)) custom_hooks = [ dict( type='EMAHook', ema_type='ExpMomentumEMA', momentum=0.0001, update_buffers=True, strict_load=False, priority=49), dict( type='mmdet.PipelineSwitchHook', switch_epoch=max_epochs - close_mosaic_epochs, switch_pipeline=train_pipeline_stage2) ] train_cfg = dict( max_epochs=max_epochs, val_interval=5, dynamic_intervals=[((max_epochs - close_mosaic_epochs), base.val_interval_stage2)]) optim_wrapper = dict( optimizer=dict( delete=True, type='AdamW', lr=base_lr, weight_decay=weight_decay, batch_size_per_gpu=train_batch_size_per_gpu), paramwise_cfg=dict( custom_keys={'backbone.text_model': dict(lr_mult=0.01), 'logit_scale': dict(weight_decay=0.0)}), constructor='YOLOWv5OptimizerConstructor')

evaluation settings

val_evaluator = dict( delete=True, type='mmdet.CocoMetric', proposal_nums=(100, 1, 10), ann_file=r'D:\Code_project\yolo_world\YOLO-World-master\YOLO-World-master\tomato.v1i.coco\valid_annotations.coco.json', metric='bbox')

Jul 03 '24 10:07 escapethetrap

没有指定test_evaluator，比如test_evaluator = val_evaluator

Jul 05 '24 01:07 auroraljg

没有指定test_evaluator，比如test_evaluator = val_evaluator

Thanks for your comments. It is solved.

Oct 01 '24 14:10 zpyuan6

没有指定test_evaluator，比如test_evaluator = val_evaluator

Thanks for your comments. It is solved.

Can you share your solution？THX

Dec 09 '24 10:12 hengRUC

_base_ = (
    '../../../third_party/mmyolo/configs/yolov8/'
    'yolov8_l_syncbn_fast_8xb16-500e_coco.py')
custom_imports = dict(
    imports=['yolo_world'],
    allow_failed_imports=False)

# hyper-parameters
num_classes = 80
num_training_classes = 80
max_epochs = 80  # Maximum training epochs
close_mosaic_epochs = 10
save_epoch_intervals = 5
text_channels = 512
neck_embed_channels = [128, 256, _base_.last_stage_out_channels // 2]
neck_num_heads = [4, 8, _base_.last_stage_out_channels // 2 // 32]
base_lr = 2e-4
weight_decay = 0.05
train_batch_size_per_gpu = 8
load_from='pretrained_weights/yolo_world_l_clip_base_dual_vlpan_2e-3adamw_32xb16_100e_o365_goldg_train_pretrained-0e566235.pth'
persistent_workers = False

# model settings
model = dict(
    type='YOLOWorldDetector',
    mm_neck=True,
    num_train_classes=num_training_classes,
    num_test_classes=num_classes,
    data_preprocessor=dict(type='YOLOWDetDataPreprocessor'),
    backbone=dict(
        _delete_=True,
        type='MultiModalYOLOBackbone',
        image_model={{_base_.model.backbone}},
        text_model=dict(
            type='HuggingCLIPLanguageBackbone',
            model_name='openai/clip-vit-base-patch32',
            frozen_modules=['all'])),
    neck=dict(type='YOLOWorldDualPAFPN',
              guide_channels=text_channels,
              embed_channels=neck_embed_channels,
              num_heads=neck_num_heads,
              block_cfg=dict(type='MaxSigmoidCSPLayerWithTwoConv'),
              text_enhancder=dict(type='ImagePoolingAttentionModule',
                                  embed_channels=256,
                                  num_heads=8)),
    bbox_head=dict(type='YOLOWorldHead',
                   head_module=dict(type='YOLOWorldHeadModule',
                                    embed_dims=text_channels,
                                    num_classes=num_training_classes)),
    train_cfg=dict(assigner=dict(num_classes=num_training_classes)))

# dataset settings
text_transform = [
    dict(type='RandomLoadText',
         num_neg_samples=(num_classes, num_classes),
         max_num_samples=num_training_classes,
         padding_to_max=True,
         padding_value=''),
    dict(type='mmdet.PackDetInputs',
         meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
                    'flip_direction', 'texts'))
]
mosaic_affine_transform = [
    dict(
        type='MultiModalMosaic',
        img_scale=_base_.img_scale,
        pad_val=114.0,
        pre_transform=_base_.pre_transform),
    dict(
        type='YOLOv5RandomAffine',
        max_rotate_degree=0.0,
        max_shear_degree=0.0,
        max_aspect_ratio=100.,
        scaling_ratio_range=(1 - _base_.affine_scale,
                             1 + _base_.affine_scale),
        # img_scale is (width, height)
        border=(-_base_.img_scale[0] // 2, -_base_.img_scale[1] // 2),
        border_val=(114, 114, 114))
]
train_pipeline = [
    *_base_.pre_transform,
    *mosaic_affine_transform,
    dict(
        type='YOLOv5MultiModalMixUp',
        prob=_base_.mixup_prob,
        pre_transform=[*_base_.pre_transform,
                       *mosaic_affine_transform]),
    *_base_.last_transform[:-1],
    *text_transform
]
train_pipeline_stage2 = [
    *_base_.train_pipeline_stage2[:-1],
    *text_transform
]
coco_train_dataset = dict(
    _delete_=True,
    type='MultiModalDataset',
    dataset=dict(
        type='YOLOv5CocoDataset',
        data_root='/home/zhipeng/Desktop/Dataset/yolo_world_data/COCO',
        ann_file='annotations/instances_train2017.json',
        data_prefix=dict(img='train2017/'),
        filter_cfg=dict(filter_empty_gt=False, min_size=32)),
    class_text_path='data/texts/coco_class_texts.json',
    pipeline=train_pipeline)

train_dataloader = dict(
    persistent_workers=persistent_workers,
    batch_size=train_batch_size_per_gpu,
    collate_fn=dict(type='yolow_collate'),
    dataset=coco_train_dataset)
test_pipeline = [
    *_base_.test_pipeline[:-1],
    dict(type='LoadText'),
    dict(
        type='mmdet.PackDetInputs',
        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
                   'scale_factor', 'pad_param', 'texts'))
]
coco_val_dataset = dict(
    _delete_=True,
    type='MultiModalDataset',
    dataset=dict(
        type='YOLOv5CocoDataset',
        data_root='/home/zhipeng/Desktop/Dataset/yolo_world_data/COCO',
        ann_file='annotations/instances_val2017.json',
        data_prefix=dict(img='val2017/'),
        filter_cfg=dict(filter_empty_gt=False, min_size=32)),
    class_text_path='data/texts/coco_class_texts.json',
    pipeline=test_pipeline)
val_dataloader = dict(dataset=coco_val_dataset)
test_dataloader = val_dataloader
# training settings
default_hooks = dict(
    param_scheduler=dict(
        scheduler_type='linear',
        lr_factor=0.01,
        max_epochs=max_epochs),
    checkpoint=dict(
        max_keep_ckpts=-1,
        save_best=None,
        interval=save_epoch_intervals))
custom_hooks = [
    dict(
        type='EMAHook',
        ema_type='ExpMomentumEMA',
        momentum=0.0001,
        update_buffers=True,
        strict_load=False,
        priority=49),
    dict(
        type='mmdet.PipelineSwitchHook',
        switch_epoch=max_epochs - close_mosaic_epochs,
        switch_pipeline=train_pipeline_stage2)
]
train_cfg = dict(
    max_epochs=max_epochs,
    val_interval=5,
    dynamic_intervals=[((max_epochs - close_mosaic_epochs),
                        _base_.val_interval_stage2)])
optim_wrapper = dict(
    optimizer=dict(
        _delete_=True,
        type='AdamW',
        lr=base_lr,
        weight_decay=weight_decay,
        batch_size_per_gpu=train_batch_size_per_gpu),
    paramwise_cfg=dict(
        custom_keys={'backbone.text_model': dict(lr_mult=0.01),
                     'logit_scale': dict(weight_decay=0.0)}),
    constructor='YOLOWv5OptimizerConstructor')

# evaluation settings
val_evaluator = dict(
    _delete_=True,
    type='mmdet.CocoMetric',
    proposal_nums=(100, 1, 10),
    ann_file='/home/zhipeng/Desktop/Dataset/yolo_world_data/COCO/annotations/instances_val2017.json',
    metric='bbox')
test_evaluator = val_evaluator

I use the above config.

Dec 17 '24 12:12 zpyuan6

Pls where can I find the config configuration file for test.py

hyper-parameters

text_model_name = '../pretrained_models/clip-vit-base-patch32-projection'

text_model_name = 'openai/clip-vit-base-patch32'

model settings

dataset settings

training settings

evaluation settings