opencompass icon indicating copy to clipboard operation
opencompass copied to clipboard

[Bug] run.py configs/eval_subjective_score.py report "AttributeError: can't set attribute 'pad_token_id'"

Open e06084 opened this issue 1 year ago • 1 comments

Prerequisite

Type

I'm evaluating with the officially supported tasks/models/datasets.

Environment

{'CUDA available': False,
 'GCC': 'gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)',
 'MMEngine': '0.10.1',
 'OpenCV': '4.8.1',
 'PyTorch': '2.1.2',
 'PyTorch compiling details': 'PyTorch built with:\n'
                              '  - GCC 9.3\n'
                              '  - C++ Version: 201703\n'
                              '  - Intel(R) oneAPI Math Kernel Library Version '
                              '2023.1-Product Build 20230303 for Intel(R) 64 '
                              'architecture applications\n'
                              '  - Intel(R) MKL-DNN v3.1.1 (Git Hash '
                              '64f6bcbcbab628e96f33a62c3e975f8535a7bde4)\n'
                              '  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n'
                              '  - LAPACK is enabled (usually provided by '
                              'MKL)\n'
                              '  - NNPACK is enabled\n'
                              '  - CPU capability usage: AVX512\n'
                              '  - Build settings: BLAS_INFO=mkl, '
                              'BUILD_TYPE=Release, CUDA_VERSION=12.1, '
                              'CUDNN_VERSION=8.9.2, '
                              'CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, '
                              'CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 '
                              '-fabi-version=11 -fvisibility-inlines-hidden '
                              '-DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO '
                              '-DLIBKINETO_NOROCTRACER -DUSE_FBGEMM '
                              '-DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK '
                              '-DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE '
                              '-O2 -fPIC -Wall -Wextra -Werror=return-type '
                              '-Werror=non-virtual-dtor -Werror=bool-operation '
                              '-Wnarrowing -Wno-missing-field-initializers '
                              '-Wno-type-limits -Wno-array-bounds '
                              '-Wno-unknown-pragmas -Wno-unused-parameter '
                              '-Wno-unused-function -Wno-unused-result '
                              '-Wno-strict-overflow -Wno-strict-aliasing '
                              '-Wno-stringop-overflow -Wno-psabi '
                              '-Wno-error=pedantic -Wno-error=old-style-cast '
                              '-Wno-invalid-partial-specialization '
                              '-Wno-unused-private-field '
                              '-Wno-aligned-allocation-unavailable '
                              '-Wno-missing-braces -fdiagnostics-color=always '
                              '-faligned-new -Wno-unused-but-set-variable '
                              '-Wno-maybe-uninitialized -fno-math-errno '
                              '-fno-trapping-math -Werror=format '
                              '-Werror=cast-function-type '
                              '-Wno-stringop-overflow, LAPACK_INFO=mkl, '
                              'PERF_WITH_AVX=1, PERF_WITH_AVX2=1, '
                              'PERF_WITH_AVX512=1, '
                              'TORCH_DISABLE_GPU_ASSERTS=ON, '
                              'TORCH_VERSION=2.1.2, USE_CUDA=ON, USE_CUDNN=ON, '
                              'USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, '
                              'USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, '
                              'USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, '
                              'USE_OPENMP=ON, USE_ROCM=OFF, \n',
 'Python': '3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]',
 'TorchVision': '0.16.2',
 'numpy_random_seed': 2147483648,
 'opencompass': '0.2.0+fbb912d',
 'sys.platform': 'linux'}

 $ pip list | grep transformer
sentence-transformers  2.2.2
transformers           4.36.2

Reproduces the problem - code/configuration sample

$ cat configs/eval_subjective_score.py
from mmengine.config import read_base
with read_base():
    from .models.qwen.hf_qwen_7b_chat import models as hf_qwen_7b_chat
    from .models.qwen.hf_qwen_14b_chat import models as hf_qwen_14b_chat
    from .models.chatglm.hf_chatglm3_6b import models as hf_chatglm3_6b
    from .models.baichuan.hf_baichuan2_7b_chat import models as hf_baichuan2_7b
    from .models.hf_internlm.hf_internlm_chat_7b import models as hf_internlm_chat_7b
    from .models.hf_internlm.hf_internlm_chat_20b import models as hf_internlm_chat_20b
    from .datasets.subjective_cmp.subjective_creation import subjective_datasets

datasets = [*subjective_datasets]

from opencompass.models import HuggingFaceCausalLM, HuggingFace, OpenAI
from opencompass.partitioners import NaivePartitioner
from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
from opencompass.runners import LocalRunner
from opencompass.runners import SlurmSequentialRunner
from opencompass.tasks import OpenICLInferTask
from opencompass.tasks.subjective_eval import SubjectiveEvalTask
from opencompass.summarizers import Creationv01Summarizer
models = [*hf_chatglm3_6b]

api_meta_template = dict(
    round=[
        dict(role='HUMAN', api_role='HUMAN'),
        dict(role='BOT', api_role='BOT', generate=True)
    ],
    reserved_roles=[
        dict(role='SYSTEM', api_role='SYSTEM'),
    ],
)

infer = dict(
    partitioner=dict(type=NaivePartitioner),
    runner=dict(
        type=LocalRunner,
        max_num_workers=256,
        task=dict(type=OpenICLInferTask)),
)


_meta_template = dict(
    round=[
        dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'),
        dict(role="BOT", begin="\n<|im_start|>assistant\n", end='<|im_end|>', generate=True),
    ],
)


judge_model = dict(
        type=HuggingFaceCausalLM,
        abbr='chatglm3-6b-hf',
        path='THUDM/chatglm3-6b',
        tokenizer_path='THUDM/chatglm3-6b',
        model_kwargs=dict(
            device_map='auto',
            trust_remote_code=True
        ),
        tokenizer_kwargs=dict(
            padding_side='left',
            truncation_side='left',
            trust_remote_code=True,
            use_fast=False,),
        pad_token_id=151643,
        max_out_len=2048,
        max_seq_len=2048,
        batch_size=8,
        meta_template=_meta_template,
        run_cfg=dict(num_gpus=1, num_procs=1),
    )


eval = dict(
    partitioner=dict(
        type=SubjectiveNaivePartitioner,
        mode='singlescore',
        models = [*hf_chatglm3_6b]
    ),
    runner=dict(
        type=LocalRunner,
        # partition='llmeval',
        # quotatype='auto',
        max_num_workers=256,
        task=dict(
            type=SubjectiveEvalTask,
            judge_cfg=judge_model
        )),
)
work_dir = './creation/'

summarizer = dict(
    type=Creationv01Summarizer,
    match_method='smart',
)

Reproduces the problem - command or script

python run.py configs/eval_subjective_score.py --debug

Reproduces the problem - error message

/home/chupei/miniconda3/envs/opencompass/lib/python3.10/site-packages/torch/cuda/__init__.py:138: UserWarning: CUDA initialization: The NVIDIA driver on your system is too old (found version 11070). Please update your GPU driver by downloading and installing a new version from the URL: http://www.nvidia.com/Download/index.aspx Alternatively, go to: https://pytorch.org to install a PyTorch version that has been compiled with your version of the CUDA driver. (Triggered internally at /opt/conda/conda-bld/pytorch_1702400410390/work/c10/cuda/CUDAFunctions.cpp:108.)
  return torch._C._cuda_getDeviceCount() > 0
/home/chupei/miniconda3/envs/opencompass/lib/python3.10/site-packages/transformers/utils/hub.py:123: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
  warnings.warn(
A new version of the following files was downloaded from https://huggingface.co/THUDM/chatglm3-6b:
- tokenization_chatglm.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
12/21 20:27:45 - OpenCompass - WARNING - pad_token_id is not consistent with the tokenizer. Using 151643 as pad_token_id
Traceback (most recent call last):
  File "/home/chupei/code/opencompass/opencompass/tasks/subjective_eval.py", line 258, in <module>
    inferencer.run()
  File "/home/chupei/code/opencompass/opencompass/tasks/subjective_eval.py", line 81, in run
    self._score(model_cfg, dataset_cfg, eval_cfg, output_column)
  File "/home/chupei/code/opencompass/opencompass/tasks/subjective_eval.py", line 173, in _score
    icl_evaluator = ICL_EVALUATORS.build(eval_cfg['evaluator'])
  File "/home/chupei/miniconda3/envs/opencompass/lib/python3.10/site-packages/mmengine/registry/registry.py", line 570, in build
    return self.build_func(cfg, *args, **kwargs, registry=self)
  File "/home/chupei/miniconda3/envs/opencompass/lib/python3.10/site-packages/mmengine/registry/build_functions.py", line 121, in build_from_cfg
    obj = obj_cls(**args)  # type: ignore
  File "/home/chupei/code/opencompass/opencompass/openicl/icl_evaluator/lm_evaluator.py", line 91, in __init__
    model = build_model_from_cfg(model_cfg=judge_cfg)
  File "/home/chupei/code/opencompass/opencompass/utils/build.py", line 23, in build_model_from_cfg
    return MODELS.build(model_cfg)
  File "/home/chupei/miniconda3/envs/opencompass/lib/python3.10/site-packages/mmengine/registry/registry.py", line 570, in build
    return self.build_func(cfg, *args, **kwargs, registry=self)
  File "/home/chupei/miniconda3/envs/opencompass/lib/python3.10/site-packages/mmengine/registry/build_functions.py", line 121, in build_from_cfg
    obj = obj_cls(**args)  # type: ignore
  File "/home/chupei/code/opencompass/opencompass/models/huggingface.py", line 120, in __init__
    self._load_tokenizer(path=path,
  File "/home/chupei/code/opencompass/opencompass/models/huggingface.py", line 149, in _load_tokenizer
    self.tokenizer.pad_token_id = self.pad_token_id
AttributeError: can't set attribute 'pad_token_id'
[2023-12-21 20:27:51,054] torch.distributed.elastic.multiprocessing.api: [ERROR] failed (exitcode: 1) local_rank: 0 (pid: 60075) of binary: /home/chupei/miniconda3/envs/opencompass/bin/python
Traceback (most recent call last):
  File "/home/chupei/miniconda3/envs/opencompass/bin/torchrun", line 33, in <module>
    sys.exit(load_entry_point('torch==2.1.2', 'console_scripts', 'torchrun')())
  File "/home/chupei/miniconda3/envs/opencompass/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 346, in wrapper
    return f(*args, **kwargs)
  File "/home/chupei/miniconda3/envs/opencompass/lib/python3.10/site-packages/torch/distributed/run.py", line 806, in main
    run(args)
  File "/home/chupei/miniconda3/envs/opencompass/lib/python3.10/site-packages/torch/distributed/run.py", line 797, in run
    elastic_launch(
  File "/home/chupei/miniconda3/envs/opencompass/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 134, in __call__
    return launch_agent(self._config, self._entrypoint, list(args))
  File "/home/chupei/miniconda3/envs/opencompass/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 264, in launch_agent
    raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError: 
============================================================
/home/chupei/code/opencompass/opencompass/tasks/subjective_eval.py FAILED
------------------------------------------------------------
Failures:
  <NO_OTHER_FAILURES>
------------------------------------------------------------
Root Cause (first observed failure):
[0]:
  time      : 2023-12-21_20:27:51
  host      : SH-IDCA1404-10-140-54-48
  rank      : 0 (local_rank: 0)
  exitcode  : 1 (pid: 60075)
  error_file: <N/A>
  traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html
============================================================

Other information

No response

e06084 avatar Dec 21 '23 12:12 e06084

I believe the culprit of this issue is related to the implementation of ChatGLMTokenizer: https://huggingface.co/THUDM/chatglm3-6b/blob/103caa40027ebfd8450289ca2f278eac4ff26405/tokenization_chatglm.py#L147

pad_token_id is a property of ChatGLMTokenizer without a setter method, so it cannot be set directly during initialization: https://github.com/open-compass/opencompass/blob/cce5b6fbb62156c962376a3580c720a948fb054c/opencompass/models/huggingface.py#L147

acylam avatar Apr 28 '24 09:04 acylam