mmselfsup copied to clipboard
XXX is not in the datasource registry
Checklist When I was trying to use the tutorial notebook code to train MAE model with customized dataset. After I edited in data_sources, I still encountered error message like this
~......lib/python3.7/site-packages/mmcv/utils/ in build_from_cfg(cfg, registry, default_args)
68 try:
---> 69 return obj_cls(**args)
70 except Exception as e:
~...../mmselfsup/mmselfsup/datasets/ in __init__(self, data_source, pipeline, prefetch)
24 super(SingleViewDataset, self).__init__(data_source, pipeline,
---> 25 prefetch)
26 self.gt_labels = self.data_source.get_gt_labels()
~..../mmselfsup/mmselfsup/datasets/ in __init__(self, data_source, pipeline, prefetch)
31 'the same as the pipeline in `MMDet`.')
---> 32 self.data_source = build_datasource(data_source)
33 pipeline = [build_from_cfg(p, PIPELINES) for p in pipeline]
~..../mmselfsup/mmselfsup/datasets/ in build_datasource(cfg, default_args)
31 def build_datasource(cfg, default_args=None):
---> 32 return build_from_cfg(cfg, DATASOURCES, default_args)
~...../lib/python3.7/site-packages/mmcv/utils/ in build_from_cfg(cfg, registry, default_args)
61 raise KeyError(
---> 62 f'{obj_type} is not in the {} registry')
63 elif inspect.isclass(obj_type) or inspect.isfunction(obj_type):
KeyError: 'XXX is not in the datasource registry'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
...../ipykernel_24353/ in <module>
16 # Build the dataset
---> 17 datasets = [build_dataset(]
19 # Start pre-train
~...../mmselfsup/mmselfsup/datasets/ in build_dataset(cfg, default_args)
41 build_dataset(cfg['dataset'], default_args), cfg['times'])
42 else:
---> 43 dataset = build_from_cfg(cfg, DATASETS, default_args)
45 return dataset
~...../lib/python3.7/site-packages/mmcv/utils/ in build_from_cfg(cfg, registry, default_args)
70 except Exception as e:
71 # Normal TypeError does not print class name.
---> 72 raise type(e)(f'{obj_cls.__name__}: {e}')
KeyError: "SingleViewDataset: 'XXX is not in the datasource registry'"
this is my config file :
# dataset settings
data_source = 'XXX'
dataset_type = 'SingleViewDataset'
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_pipeline = [
type='RandomResizedCrop', size=224, scale=(0.2, 1.0), interpolation=3),
# prefetch
prefetch = False
if not prefetch:
dict(type='Normalize', **img_norm_cfg)])
# dataset summary
data = dict(
the in data_source:
from .base import BaseDataSource
from .cifar import CIFAR10, CIFAR100
from .image_list import ImageList
from .imagenet import ImageNet
from .imagenet_21k import ImageNet21k
from .imagenet2 import XXX
__all__ = [
'BaseDataSource', 'CIFAR10', 'CIFAR100', 'ImageList', 'ImageNet',
'ImageNet21k' , 'imagenet2'
I had tried to make a copy of ImageNet ( and named it ,or renamed the original ImageNet data source, the error message still appears. Thanks for reading.
The code is supposed to be like :
from .base import BaseDataSource
from .cifar import CIFAR10, CIFAR100
from .image_list import ImageList
from .imagenet import ImageNet
from .imagenet_21k import ImageNet21k
from .imagenet2 import XXX
__all__ = [
'BaseDataSource', 'CIFAR10', 'CIFAR100', 'ImageList', 'ImageNet',
'ImageNet21k' , 'XXX'
should contains class
name, not file name. You could try is again
Closing due to inactivity, please reopen if there are any further problems.
The code is supposed to be like :
from .base import BaseDataSource from .cifar import CIFAR10, CIFAR100 from .image_list import ImageList from .imagenet import ImageNet from .imagenet_21k import ImageNet21k from .imagenet2 import XXX __all__ = [ 'BaseDataSource', 'CIFAR10', 'CIFAR100', 'ImageList', 'ImageNet', 'ImageNet21k' , 'XXX' ]
should containsclass
name, not file name. You could try is again
Hi, I wan to use CIFAR10 im ODC, and my config file as follow:
base = [ '../base/models/', # '../base/datasets/', '../base/schedules/', '../base/', ]
##Changed Start
dataset settings
dataset_type = 'Cifar10' data_root = '/home/wangxin/cifar/' file_client_args = dict(backend='disk')
train_pipeline = [ dict(type='LoadImageFromFile', file_client_args=file_client_args), dict(type='RandomResizedCrop', size=224, backend='pillow'), dict(type='RandomFlip', prob=0.5), dict(type='RandomRotation', degrees=2), dict( type='ColorJitter', brightness=0.4, contrast=0.4, saturation=1.0, hue=0.5), dict( type='RandomGrayscale', prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), dict( type='PackSelfSupInputs', algorithm_keys=['sample_idx'], meta_keys=['img_path']) ]
extract_pipeline = [ dict(type='LoadImageFromFile', file_client_args=file_client_args), dict(type='mmcls.ResizeEdge', scale=256, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=224), dict(type='PackSelfSupInputs', meta_keys=['img_path']) ]
train_dataloader = dict( batch_size=64, num_workers=4, persistent_workers=True, sampler=dict(type='DeepClusterSampler', shuffle=True, replace=True), collate_fn=dict(type='default_collate'), dataset=dict( type=dataset_type, data_root=data_root, ann_file='meta/train.txt', data_prefix=dict(img_path='train/'), pipeline=train_pipeline))
num_classes = 10000 custom_hooks = [ dict( type='DeepClusterHook', extract_dataloader=dict( batch_size=128, num_workers=8, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=False, round_up=True), collate_fn=dict(type='default_collate'), dataset=dict( type=dataset_type, data_root=data_root, ann_file='meta/train.txt', data_prefix=dict(img_path='train/'), pipeline=extract_pipeline)), clustering=dict(type='Kmeans', k=num_classes, pca_dim=-1), # no pca unif_sampling=False, reweight=True, reweight_pow=0.5, init_memory=True, initial=True, # call initially interval=9999999999), # initial only dict( type='ODCHook', centroids_update_interval=10, # iter deal_with_small_clusters_interval=1, evaluate_interval=50, reweight=True, reweight_pow=0.5) ]
##Changed End
model settings
model = dict(
optimizer = dict(type='SGD', lr=0.06, weight_decay=1e-5, momentum=0.9) optim_wrapper = dict( type='OptimWrapper', optimizer=optimizer, paramwise_cfg=dict(custom_keys={'head': dict(momentum=0.)}))
learning rate scheduler
param_scheduler = [ dict(type='MultiStepLR', by_epoch=True, milestones=[400], gamma=0.4) ]
runtime settings
train_cfg = dict(max_epochs=440)
the max_keep_ckpts controls the max number of ckpt file in your work_dirs
if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
it will remove the oldest one to keep the number of total ckpts as 3
default_hooks = dict( checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3))
There has ERROR Log as follow:
02/21 01:58:42 - mmengine - WARNING - The "visualizer" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
02/21 01:58:42 - mmengine - WARNING - The "vis_backend" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
02/21 01:58:43 - mmengine - WARNING - The "model" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
02/21 01:58:43 - mmengine - WARNING - The "model" registry in mmcls did not set import location. Fallback to call mmcls.utils.register_all_modules
02/21 01:58:43 - mmengine - INFO - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.
02/21 01:58:43 - mmengine - WARNING - The "hook" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
02/21 01:58:43 - mmengine - WARNING - The "dataset" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
Traceback (most recent call last):
File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/registry/", line 121, in build_from_cfg
obj = obj_cls(**args) # type: ignore
File "/home/wangxin/mmselfsup/mmselfsup/engine/hooks/", line 47, in init
self.extractor = Extractor(
File "/home/wangxin/mmselfsup/mmselfsup/models/utils/", line 52, in init
self.data_loader = Runner.build_dataloader(
File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/", line 1333, in build_dataloader
dataset =
File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/registry/", line 521, in build
return self.build_func(cfg, *args, **kwargs, registry=self)
File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/registry/", line 100, in build_from_cfg
raise KeyError(
KeyError: 'Cifar10 is not in the dataset registry. Please check whether the value of Cifar10
is correct or it was registered as expected. More details can be found at'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "tools/", line 99, in DeepClusterHook
in mmselfsup/engine/hooks/ 'Cifar10 is not in the dataset registry. Please check whether the value of Cifar10
is correct or it was registered as expected. More details can be found at'"
It seems need to registered Cifar10 ? Whether the mmselfsup has its own CIFAR10?