opencompass
opencompass copied to clipboard
[Bug] Can NOT find valid BAAI/TACO dataset
Prerequisite
- [x] I have searched Issues and Discussions but cannot get the expected help.
- [x] The bug has not been fixed in the latest version.
Type
I'm evaluating with the officially supported tasks/models/datasets.
Environment
{'CUDA available': True,
'CUDA_HOME': '/usr/local/cuda',
'GCC': 'gcc (GCC) 11.4.0',
'GPU 0,1,2,3,4,5,6,7': 'NVIDIA H20',
'MMEngine': '0.10.7',
'MUSA available': False,
'NVCC': 'Cuda compilation tools, release 12.8, V12.8.93',
'PyTorch': '2.7.1+cu126',
'PyTorch compiling details': 'PyTorch built with:\n'
' - GCC 11.2\n'
' - C++ Version: 201703\n'
' - Intel(R) oneAPI Math Kernel Library Version '
'2024.2-Product Build 20240605 for Intel(R) 64 '
'architecture applications\n'
' - Intel(R) MKL-DNN v3.7.1 (Git Hash '
'8d263e693366ef8db40acc569cc7d8edf644556d)\n'
' - OpenMP 201511 (a.k.a. OpenMP 4.5)\n'
' - LAPACK is enabled (usually provided by '
'MKL)\n'
' - NNPACK is enabled\n'
' - CPU capability usage: AVX512\n'
' - CUDA Runtime 12.6\n'
' - NVCC architecture flags: '
'-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n'
' - CuDNN 90.5.1\n'
' - Magma 2.6.1\n'
' - Build settings: BLAS_INFO=mkl, '
'BUILD_TYPE=Release, '
'COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, '
'CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, '
'CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, '
'CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 '
'-fvisibility-inlines-hidden -DUSE_PTHREADPOOL '
'-DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER '
'-DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM '
'-DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK '
'-DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC '
'-Wall -Wextra -Werror=return-type '
'-Werror=non-virtual-dtor '
'-Werror=range-loop-construct '
'-Werror=bool-operation -Wnarrowing '
'-Wno-missing-field-initializers '
'-Wno-unknown-pragmas -Wno-unused-parameter '
'-Wno-strict-overflow -Wno-strict-aliasing '
'-Wno-stringop-overflow -Wsuggest-override '
'-Wno-psabi -Wno-error=old-style-cast '
'-fdiagnostics-color=always -faligned-new '
'-Wno-maybe-uninitialized -fno-math-errno '
'-fno-trapping-math -Werror=format '
'-Wno-stringop-overflow, LAPACK_INFO=mkl, '
'PERF_WITH_AVX=1, PERF_WITH_AVX2=1, '
'TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, '
'USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, '
'USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, '
'USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, '
'USE_OPENMP=ON, USE_ROCM=OFF, '
'USE_ROCM_KERNEL_ASSERT=OFF, \n',
'Python': '3.10.18 (main, Jun 5 2025, 13:14:17) [GCC 11.2.0]',
'TorchVision': '0.22.1+cu126',
'lmdeploy': "not installed:No module named 'lmdeploy'",
'numpy_random_seed': 2147483648,
'opencompass': '0.4.2+6eee45e',
'sys.platform': 'linux',
'transformers': '4.56.0'}
Reproduces the problem - code/configuration sample
from mmengine.config import read_base
from opencompass.models import VLLMwithChatTemplate, OpenAISDK
from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner
from opencompass.runners import LocalRunner
from opencompass.tasks import OpenICLInferTask, OpenICLEvalTask
from opencompass.utils.text_postprocessors import extract_non_reasoning_content
#######################################################################
# PART 1 Datasets List #
#######################################################################
with read_base():
from opencompass.configs.datasets.taco.taco_gen_c7893a import TACO_datasets
datasets = sum(
(v for k, v in locals().items() if k.endswith("_datasets")),
[],
)
#######################################################################
# PART 2 Model List #
#######################################################################
models = [
dict(
type=VLLMwithChatTemplate,
abbr="deepseek-r1-distill-qwen-1.5b-vllm",
path="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
max_out_len=32768,
batch_size=16,
generation_kwargs=dict(temperature=0.6, top_p=0.95, do_sample=True),
model_kwargs=dict(tensor_parallel_size=1),
run_cfg=dict(num_gpus=1),
pred_postprocessor=dict(type=extract_non_reasoning_content)
)
]
#######################################################################
# PART 3 Inference/Evaluation #
#######################################################################
# Inference configuration
infer = dict(
partitioner=dict(
type=NumWorkerPartitioner,
num_worker=8
),
runner=dict(
type=LocalRunner,
task=dict(type=OpenICLInferTask)
),
)
# Evaluation configuration
eval = dict(
partitioner=dict(
type=NaivePartitioner, n=8
),
runner=dict(
type=LocalRunner,
task=dict(
type=OpenICLEvalTask)
),
)
#######################################################################
# PART 4 Utils #
#######################################################################
work_dir = "outputs/deepseek_r1_distill_qwen_1p5b_taco"
Reproduces the problem - command or script
python run.py eval_deepseek_r1_distill_qwen_1p5b_taco.py --debug
Reproduces the problem - error message
INFO 08-31 23:20:47 [__init__.py:241] Automatically detected platform cuda.
08/31 23:20:48 - OpenCompass - INFO - Current exp folder: outputs/deepseek_r1_distill_qwen_1p5b_taco/20250831_232048
08/31 23:20:48 - OpenCompass - WARNING - SlurmRunner is not used, so the partition argument is ignored.
08/31 23:20:48 - OpenCompass - INFO - ./data/BAAI-TACO does not exist!Start Download data automatically!If you have downloaded the data before,You can specific `COMPASS_DATA_CACHE` to avoid downloading~
Traceback (most recent call last):
File "/root/opencompass/run.py", line 4, in <module>
main()
File "/root/opencompass/opencompass/cli/main.py", line 345, in main
tasks = partitioner(cfg)
File "/root/opencompass/opencompass/partitioners/base.py", line 83, in __call__
tasks = self.partition(**model_and_dataset_args,
File "/root/opencompass/opencompass/partitioners/num_worker.py", line 72, in partition
dataset_size = self.get_size(dataset)
File "/root/opencompass/opencompass/partitioners/num_worker.py", line 145, in get_size
dataset = build_dataset_from_cfg(dataset)
File "/root/opencompass/opencompass/utils/build.py", line 12, in build_dataset_from_cfg
return LOAD_DATASET.build(dataset_cfg)
File "/root/miniconda3/envs/opencompass/lib/python3.10/site-packages/mmengine/registry/registry.py", line 570, in build
return self.build_func(cfg, *args, **kwargs, registry=self)
File "/root/miniconda3/envs/opencompass/lib/python3.10/site-packages/mmengine/registry/build_functions.py", line 121, in build_from_cfg
obj = obj_cls(**args) # type: ignore
File "/root/opencompass/opencompass/datasets/base.py", line 19, in __init__
dataset = self.load(**kwargs)
File "/root/opencompass/opencompass/datasets/taco.py", line 42, in load
path = get_data_path(path, local_mode=True)
File "/root/opencompass/opencompass/utils/datasets.py", line 30, in get_data_path
return download_dataset(local_path, cache_dir)
File "/root/opencompass/opencompass/utils/datasets.py", line 90, in download_dataset
assert dataset_name, f'No valid url for {data_path}!\n' + \
AssertionError: No valid url for ./data/BAAI-TACO!
Please make sure `./data/BAAI-TACO` is correct
Other information
data_path is a local path: https://github.com/open-compass/opencompass/blob/main/opencompass/configs/datasets/taco/taco_gen_c7893a.py#L22
https://github.com/open-compass/opencompass/blob/main/opencompass/datasets/taco.py#L42-L43
but I cannot find a valid dataset id or url in: https://github.com/open-compass/opencompass/blob/main/opencompass/utils/datasets_info.py