mmselfsup icon indicating copy to clipboard operation
mmselfsup copied to clipboard

XXX is not in the datasource registry

Open kenchanLOL opened this issue 2 years ago • 1 comments

Checklist When I was trying to use the tutorial notebook code to train MAE model with customized dataset. After I edited init.py in data_sources, I still encountered error message like this

~......lib/python3.7/site-packages/mmcv/utils/registry.py in build_from_cfg(cfg, registry, default_args)
     68     try:
---> 69         return obj_cls(**args)
     70     except Exception as e:

~...../mmselfsup/mmselfsup/datasets/single_view.py in __init__(self, data_source, pipeline, prefetch)
     24         super(SingleViewDataset, self).__init__(data_source, pipeline,
---> 25                                                 prefetch)
     26         self.gt_labels = self.data_source.get_gt_labels()

~..../mmselfsup/mmselfsup/datasets/base.py in __init__(self, data_source, pipeline, prefetch)
     31                       'the same as the pipeline in `MMDet`.')
---> 32         self.data_source = build_datasource(data_source)
     33         pipeline = [build_from_cfg(p, PIPELINES) for p in pipeline]

~..../mmselfsup/mmselfsup/datasets/builder.py in build_datasource(cfg, default_args)
     31 def build_datasource(cfg, default_args=None):
---> 32     return build_from_cfg(cfg, DATASOURCES, default_args)
     33 

~...../lib/python3.7/site-packages/mmcv/utils/registry.py in build_from_cfg(cfg, registry, default_args)
     61             raise KeyError(
---> 62                 f'{obj_type} is not in the {registry.name} registry')
     63     elif inspect.isclass(obj_type) or inspect.isfunction(obj_type):

KeyError: 'XXX is not in the datasource registry'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
...../ipykernel_24353/1952569340.py in <module>
     15 
     16 # Build the dataset
---> 17 datasets = [build_dataset(cfg.data.train)]
     18 
     19 # Start pre-train

~...../mmselfsup/mmselfsup/datasets/builder.py in build_dataset(cfg, default_args)
     41             build_dataset(cfg['dataset'], default_args), cfg['times'])
     42     else:
---> 43         dataset = build_from_cfg(cfg, DATASETS, default_args)
     44 
     45     return dataset

~...../lib/python3.7/site-packages/mmcv/utils/registry.py in build_from_cfg(cfg, registry, default_args)
     70     except Exception as e:
     71         # Normal TypeError does not print class name.
---> 72         raise type(e)(f'{obj_cls.__name__}: {e}')
     73 
     74 

KeyError: "SingleViewDataset: 'XXX is not in the datasource registry'" 

this is my config file :

# dataset settings
data_source = 'XXX'
dataset_type = 'SingleViewDataset'
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_pipeline = [
    dict(
        type='RandomResizedCrop', size=224, scale=(0.2, 1.0), interpolation=3),
    dict(type='RandomHorizontalFlip')
]

# prefetch
prefetch = False
if not prefetch:
    train_pipeline.extend(
        [dict(type='ToTensor'),
         dict(type='Normalize', **img_norm_cfg)])

# dataset summary
data = dict(
    samples_per_gpu=64,
    workers_per_gpu=8,
    train=dict(
        type=dataset_type,
        data_source=dict(
            type=data_source,
            data_prefix='data/XXX/train',
            ann_file='data/XXX/meta/train.txt',
        ),
        pipeline=train_pipeline,
        prefetch=prefetch))

the init.py in data_source:

from .base import BaseDataSource
from .cifar import CIFAR10, CIFAR100
from .image_list import ImageList
from .imagenet import ImageNet
from .imagenet_21k import ImageNet21k
from .imagenet2 import XXX

__all__ = [
    'BaseDataSource', 'CIFAR10', 'CIFAR100', 'ImageList', 'ImageNet',
    'ImageNet21k' , 'imagenet2'
]

I had tried to make a copy of ImageNet (imagenet2.py) and named it ,or renamed the original ImageNet data source, the error message still appears. Thanks for reading.

kenchanLOL avatar Jul 02 '22 07:07 kenchanLOL

The code is supposed to be like :

from .base import BaseDataSource
from .cifar import CIFAR10, CIFAR100
from .image_list import ImageList
from .imagenet import ImageNet
from .imagenet_21k import ImageNet21k
from .imagenet2 import XXX

__all__ = [
    'BaseDataSource', 'CIFAR10', 'CIFAR100', 'ImageList', 'ImageNet',
    'ImageNet21k' , 'XXX'
]

__all__ should contains class name, not file name. You could try is again

fangyixiao18 avatar Jul 03 '22 09:07 fangyixiao18

Closing due to inactivity, please reopen if there are any further problems.

fangyixiao18 avatar Oct 17 '22 02:10 fangyixiao18

The code is supposed to be like :

from .base import BaseDataSource
from .cifar import CIFAR10, CIFAR100
from .image_list import ImageList
from .imagenet import ImageNet
from .imagenet_21k import ImageNet21k
from .imagenet2 import XXX

__all__ = [
    'BaseDataSource', 'CIFAR10', 'CIFAR100', 'ImageList', 'ImageNet',
    'ImageNet21k' , 'XXX'
]

__all__ should contains class name, not file name. You could try is again

Hi, I wan to use CIFAR10 im ODC, and my config file as follow:

base = [ '../base/models/odc.py', # '../base/datasets/imagenet_odc.py', '../base/schedules/sgd_steplr-200e_in1k.py', '../base/default_runtime.py', ]

##Changed Start

dataset settings

dataset_type = 'Cifar10' data_root = '/home/wangxin/cifar/' file_client_args = dict(backend='disk')

train_pipeline = [ dict(type='LoadImageFromFile', file_client_args=file_client_args), dict(type='RandomResizedCrop', size=224, backend='pillow'), dict(type='RandomFlip', prob=0.5), dict(type='RandomRotation', degrees=2), dict( type='ColorJitter', brightness=0.4, contrast=0.4, saturation=1.0, hue=0.5), dict( type='RandomGrayscale', prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), dict( type='PackSelfSupInputs', algorithm_keys=['sample_idx'], meta_keys=['img_path']) ]

extract_pipeline = [ dict(type='LoadImageFromFile', file_client_args=file_client_args), dict(type='mmcls.ResizeEdge', scale=256, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=224), dict(type='PackSelfSupInputs', meta_keys=['img_path']) ]

train_dataloader = dict( batch_size=64, num_workers=4, persistent_workers=True, sampler=dict(type='DeepClusterSampler', shuffle=True, replace=True), collate_fn=dict(type='default_collate'), dataset=dict( type=dataset_type, data_root=data_root, ann_file='meta/train.txt', data_prefix=dict(img_path='train/'), pipeline=train_pipeline))

num_classes = 10000 custom_hooks = [ dict( type='DeepClusterHook', extract_dataloader=dict( batch_size=128, num_workers=8, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=False, round_up=True), collate_fn=dict(type='default_collate'), dataset=dict( type=dataset_type, data_root=data_root, ann_file='meta/train.txt', data_prefix=dict(img_path='train/'), pipeline=extract_pipeline)), clustering=dict(type='Kmeans', k=num_classes, pca_dim=-1), # no pca unif_sampling=False, reweight=True, reweight_pow=0.5, init_memory=True, initial=True, # call initially interval=9999999999), # initial only dict( type='ODCHook', centroids_update_interval=10, # iter deal_with_small_clusters_interval=1, evaluate_interval=50, reweight=True, reweight_pow=0.5) ]

##Changed End

model settings

model = dict(

head=dict(num_classes=num_classes),
memory_bank=dict(num_classes=num_classes),

)

optimizer

optimizer = dict(type='SGD', lr=0.06, weight_decay=1e-5, momentum=0.9) optim_wrapper = dict( type='OptimWrapper', optimizer=optimizer, paramwise_cfg=dict(custom_keys={'head': dict(momentum=0.)}))

learning rate scheduler

param_scheduler = [ dict(type='MultiStepLR', by_epoch=True, milestones=[400], gamma=0.4) ]

runtime settings

train_cfg = dict(max_epochs=440)

the max_keep_ckpts controls the max number of ckpt file in your work_dirs

if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt

it will remove the oldest one to keep the number of total ckpts as 3

default_hooks = dict( checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3))

There has ERROR Log as follow:

02/21 01:58:42 - mmengine - WARNING - The "visualizer" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules instead. 02/21 01:58:42 - mmengine - WARNING - The "vis_backend" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules instead. 02/21 01:58:43 - mmengine - WARNING - The "model" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules instead. 02/21 01:58:43 - mmengine - WARNING - The "model" registry in mmcls did not set import location. Fallback to call mmcls.utils.register_all_modules instead. 02/21 01:58:43 - mmengine - INFO - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used. 02/21 01:58:43 - mmengine - WARNING - The "hook" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules instead. 02/21 01:58:43 - mmengine - WARNING - The "dataset" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules instead. Traceback (most recent call last): File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/registry/build_functions.py", line 121, in build_from_cfg obj = obj_cls(**args) # type: ignore File "/home/wangxin/mmselfsup/mmselfsup/engine/hooks/deepcluster_hook.py", line 47, in init self.extractor = Extractor( File "/home/wangxin/mmselfsup/mmselfsup/models/utils/extractor.py", line 52, in init self.data_loader = Runner.build_dataloader( File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1333, in build_dataloader dataset = DATASETS.build(dataset_cfg) File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/registry/registry.py", line 521, in build return self.build_func(cfg, *args, **kwargs, registry=self) File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/registry/build_functions.py", line 100, in build_from_cfg raise KeyError( KeyError: 'Cifar10 is not in the dataset registry. Please check whether the value of Cifar10 is correct or it was registered as expected. More details can be found at https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#import-the-custom-module'

During handling of the above exception, another exception occurred:

Traceback (most recent call last): File "tools/train.py", line 99, in main() File "tools/train.py", line 92, in main runner = Runner.from_cfg(cfg) File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/runner.py", line 431, in from_cfg runner = cls( File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/runner.py", line 411, in init self.register_hooks(default_hooks, custom_hooks) File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1904, in register_hooks self.register_custom_hooks(custom_hooks) File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1885, in register_custom_hooks self.register_hook(hook) File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1786, in register_hook hook_obj = HOOKS.build(hook) File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/registry/registry.py", line 521, in build return self.build_func(cfg, *args, **kwargs, registry=self) File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/registry/build_functions.py", line 135, in build_from_cfg raise type(e)( KeyError: "class DeepClusterHook in mmselfsup/engine/hooks/deepcluster_hook.py: 'Cifar10 is not in the dataset registry. Please check whether the value of Cifar10 is correct or it was registered as expected. More details can be found at https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#import-the-custom-module'"

It seems need to registered Cifar10 ? Whether the mmselfsup has its own CIFAR10?

mrFocusXin avatar Feb 21 '23 02:02 mrFocusXin