ChatGLM-Tuning
ChatGLM-Tuning copied to clipboard
ValueError: 130004 is not in list
when i am training the model user code:
!sed -i "s/THUDM\/chatglm-6b/\/openbayes\/home\/chatglm-6b/" finetune.py !cat finetune.py !python finetune.py \ --dataset_path /output/train \ --lora_rank 8 \ --per_device_train_batch_size 6 \ --gradient_accumulation_steps 1 \ --max_steps 3000 \ --save_steps 1000 \ --save_total_limit 2 \ --learning_rate 1e-4 \ --fp16 \ --remove_unused_columns false \ --logging_steps 50 \ --output_dir /output/lora
run below error:
`from transformers.integrations import TensorBoardCallback
from torch.utils.tensorboard import SummaryWriter
from transformers import TrainingArguments
from transformers import Trainer, HfArgumentParser
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn as nn
from peft import get_peft_model, LoraConfig, TaskType
from dataclasses import dataclass, field
import datasets
import os
tokenizer = AutoTokenizer.from_pretrained("/openbayes/home/chatglm-6b", trust_remote_code=True)
@dataclass class FinetuneArguments: dataset_path: str = field(default="data/alpaca") model_path: str = field(default="output") lora_rank: int = field(default=8)
class CastOutputToFloat(nn.Sequential): def forward(self, x): return super().forward(x).to(torch.float32)
def data_collator(features: list) -> dict: len_ids = [len(feature["input_ids"]) for feature in features] longest = max(len_ids) input_ids = [] labels_list = [] for ids_l, feature in sorted(zip(len_ids, features), key=lambda x: -x[0]): ids = feature["input_ids"] seq_len = feature["seq_len"] labels = ( [-100] * (seq_len - 1) + ids[(seq_len - 1) :] + [-100] * (longest - ids_l) ) ids = ids + [tokenizer.pad_token_id] * (longest - ids_l) _ids = torch.LongTensor(ids) labels_list.append(torch.LongTensor(labels)) input_ids.append(_ids) input_ids = torch.stack(input_ids) labels = torch.stack(labels_list) return { "input_ids": input_ids, "labels": labels, }
class ModifiedTrainer(Trainer): def compute_loss(self, model, inputs, return_outputs=False): return model( input_ids=inputs["input_ids"], labels=inputs["labels"], ).loss
def save_model(self, output_dir=None, _internal_call=False):
from transformers.trainer import TRAINING_ARGS_NAME
os.makedirs(output_dir, exist_ok=True)
torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME))
saved_params = {
k: v.to("cpu") for k, v in self.model.named_parameters() if v.requires_grad
}
torch.save(saved_params, os.path.join(output_dir, "adapter_model.bin"))
def main(): writer = SummaryWriter() finetune_args, training_args = HfArgumentParser( (FinetuneArguments, TrainingArguments) ).parse_args_into_dataclasses()
# init model
model = AutoModel.from_pretrained(
"/openbayes/home/chatglm-6b", load_in_8bit=True, trust_remote_code=True, device_map="auto"
)
model.gradient_checkpointing_enable()
model.enable_input_require_grads()
model.is_parallelizable = True
model.model_parallel = True
model.lm_head = CastOutputToFloat(model.lm_head)
model.config.use_cache = (
False # silence the warnings. Please re-enable for inference!
)
# setup peft
peft_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
inference_mode=False,
r=finetune_args.lora_rank,
lora_alpha=32,
lora_dropout=0.1,
)
model = get_peft_model(model, peft_config)
# load dataset
dataset = datasets.load_from_disk(finetune_args.dataset_path)
print(f"\n{len(dataset)=}\n")
# start train
trainer = ModifiedTrainer(
model=model,
train_dataset=dataset,
args=training_args,
callbacks=[TensorBoardCallback(writer)],
data_collator=data_collator,
)
trainer.train()
writer.close()
# save model
model.save_pretrained(training_args.output_dir)
if name == "main": main()
===================================BUG REPORT=================================== Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
/usr/local/lib/python3.8/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/usr/local/nvidia/lib64'), PosixPath('/usr/local/nvidia/lib')}
warn(msg)
/usr/local/lib/python3.8/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: /usr/local/nvidia/lib:/usr/local/nvidia/lib64 did not contain libcudart.so as expected! Searching further paths...
warn(msg)
/usr/local/lib/python3.8/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('home/be7a8072-3e98-4214-9de2-30dc39aa3baf')}
warn(msg)
/usr/local/lib/python3.8/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('tcp'), PosixPath('//10.111.0.1'), PosixPath('443')}
warn(msg)
/usr/local/lib/python3.8/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('7890'), PosixPath('http'), PosixPath('//alchemist-experience')}
warn(msg)
/usr/local/lib/python3.8/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('localhost,127.0.0.1,openbayes-server-svc,openbayes-storage-server-svc,10.0.0.0/8')}
warn(msg)
/usr/local/lib/python3.8/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('itxingqing/jobs/n27oyqsbrszc')}
warn(msg)
/usr/local/lib/python3.8/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('module'), PosixPath('//matplotlib_inline.backend_inline')}
warn(msg)
/usr/local/lib/python3.8/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/output/.torch')}
warn(msg)
/usr/local/lib/python3.8/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('//openbayes-server-svc/api/users/itxingqing/jobs/n27oyqsbrszc'), PosixPath('http')}
warn(msg)
CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching /usr/local/cuda/lib64...
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /usr/local/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...
Explicitly passing a revision is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a revision is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a revision is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Overriding torch_dtype=None with torch_dtype=torch.float16 due to requirements of bitsandbytes to enable model loading in mixed int8. Either pass torch_dtype=torch.float16 or don't pass this argument at all to remove this warning.
Loading checkpoint shards: 100%|██████████████████| 8/8 [00:06<00:00, 1.15it/s]
len(dataset)=3438
You are adding a <class 'transformers.integrations.TensorBoardCallback'> to the callbacks of this Trainer, but there is already one. The currentlist of callbacks is
:DefaultFlowCallback
TensorBoardCallback
/usr/local/lib/python3.8/site-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set no_deprecation_warning=True to disable this warning
warnings.warn(
0%| | 0/3000 [00:00<?, ?it/s]../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [96,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [97,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [98,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [99,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [100,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [101,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [102,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [103,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [104,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [105,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [106,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [107,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [108,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [109,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [110,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [111,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [112,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [113,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [114,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [115,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [116,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [117,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [118,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [119,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [120,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [121,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [122,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [123,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [124,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [125,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [126,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [150,0,0], thread: [127,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [64,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [65,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [66,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [67,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [68,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [69,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [70,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [71,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [72,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [73,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [74,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [75,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [76,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [77,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [78,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [79,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [80,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [81,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [82,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [83,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [84,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [85,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [86,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [87,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [88,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [89,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [90,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [91,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [92,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [93,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [94,0,0] Assertion srcIndex < srcSelectDimSize failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [152,0,0], thread: [95,0,0] Assertion srcIndex < srcSelectDimSize failed.
Traceback (most recent call last):
File "finetune.py", line 118, in
可以更新一下最新的代码,官方把token更新了
https://github.com/mymusise/ChatGLM-Tuning/issues/138#issuecomment-1499869956

finetune数据样本不需要增加[MASK]标记吗? · Issue #79 · mymusise/ChatGLM-Tuning https://github.com/mymusise/ChatGLM-Tuning/issues/79
Aloha0424 commented 2 weeks ago • 说: 这个问题在输入内容长度太长时会出现,原因是在encode时先做了特殊token的拼接再截断,导致截断后,拼接的特殊token 150001 150004丢掉了。快速的修复方式是直接在tokenize_dataset_rows.py preprocess函数中将 prompt_ids处理下,把最后2个id改为150001 150004 prompt_ids[-2] = 150001 prompt_ids[-1] = 150004
然后重新生成数据 重新运行 python tokenize_dataset_rows.py --jsonl_path data/alpaca_data.jsonl --save_path data/alpaca --max_seq_length 200 --skip_overlength false
def preprocess(tokenizer, config, example, max_seq_length):
prompt = example["context"]
target = example["target"]
prompt_ids = tokenizer.encode(prompt, max_length=max_seq_length, truncation=True)
# 加了 下面的 。。。。。。。
prompt_ids[-2] = 150001
prompt_ids[-1] = 150004
# 加了 上面的 然后重新生成数据 。。。。。。。
target_ids = tokenizer.encode(
target,
max_length=max_seq_length,
truncation=True,
add_special_tokens=False)
input_ids = prompt_ids + target_ids + [config.eos_token_id]
return {"input_ids": input_ids, "seq_len": len(prompt_ids)}
ChatGLM-6B 的 huggingface repo 更新了,需要重新下载模型下来,然后再运行(官方的一些特殊 token 的 ID 又变了)
ChatGLM-6B 的 huggingface repo 更新了,需要重新下载模型下来,然后再运行(官方的一些特殊 token 的 ID 又变了)
https://huggingface.co/THUDM/chatglm-6b这个地址?
ChatGLM-6B 的 huggingface repo 更新了,需要重新下载模型下来,然后再运行(官方的一些特殊 token 的 ID 又变了)
是的,然后里面模型和tokenizer 好些文件都变了
ChatGLM-6B 的 huggingface repo 更新了,需要重新下载模型下来,然后再运行(官方的一些特殊 token 的 ID 又变了)
是的,然后里面模型和tokenizer 好些文件都变了
了解,能加你的VX吗
ChatGLM-6B 的 huggingface repo 更新了,需要重新下载模型下来,然后再运行(官方的一些特殊 token 的 ID 又变了)
是的,然后里面模型和tokenizer 好些文件都变了
了解,能加你的VX吗
您解决这个问题了吗
我也遇到这个问题了,根本原因在于这一份代码的版本和开源的chatglm没有对齐的所导致的,解决办法也很简单,参照官方ptuning的数据预处理方法改就行了,简单加一行就行input_ids = tokenizer.build_inputs_with_special_tokens(a_ids, b_ids)
reference https://github.com/THUDM/ChatGLM-6B/blob/main/ptuning/main.py#L217
我也遇到这个问题了,根本原因在于这一份代码的版本和开源的chatglm没有对齐的所导致的,解决办法也很简答,参照官方ptuning的数据预处理方法改就行了,简单加一行就行input_ids = tokenizer.build_inputs_with_special_tokens(a_ids, b_ids)
reference https://github.com/THUDM/ChatGLM-6B/blob/main/ptuning/main.py#L217
@DBtxy 请问具体在那个脚本中加入input_ids = tokenizer.build_inputs_with_special_tokens(a_ids, b_ids)这句话,tokenization_chatglm.py吗
我也遇到这个问题了,根本原因在于这一份代码的版本和开源的chatglm没有对齐的所导致的,解决办法也很简答,参照官方ptuning的数据预处理方法改就行了,简单加一行就行input_ids = tokenizer.build_inputs_with_special_tokens(a_ids, b_ids) reference https://github.com/THUDM/ChatGLM-6B/blob/main/ptuning/main.py#L217
@DBtxy 请问具体在那个脚本中加入input_ids = tokenizer.build_inputs_with_special_tokens(a_ids, b_ids)这句话,tokenization_chatglm.py吗 在tokenize_dataset_rows.py 文件里面 改一下preprocess函数就行了,我把我的代码复制给你,可以参考一下
def preprocess(tokenizer, config, example, max_source_length, max_target_length):
prompt = example["context"]
target = example["target"]
# print("prompt: ", prompt)
# print(">>>>>>>>>>>>>>>> ")
# print("target: ", target)
prompt_ids = tokenizer.encode(text=prompt, max_length=max_source_length, add_special_tokens=False, truncation=True)
# 加了 下面的 。。。。。。。
# prompt_ids[-2] = 150001
# prompt_ids[-1] = 150004
target_ids = tokenizer.encode(
target,
max_length=max_target_length,
truncation=True,
add_special_tokens=False)
# input_ids = prompt_ids + target_ids + [config.eos_token_id]
input_ids = tokenizer.build_inputs_with_special_tokens(prompt_ids, target_ids)
return {"input_ids": input_ids, "seq_len": len(prompt_ids)}
我也遇到这个问题了,根本原因在于这一份代码的版本和开源的chatglm没有对齐的所导致的,解决办法也很简答,参照官方ptuning的数据预处理方法改就行了,简单加一行就行input_ids = tokenizer.build_inputs_with_special_tokens(a_ids, b_ids) reference https://github.com/THUDM/ChatGLM-6B/blob/main/ptuning/main.py#L217
@DBtxy 请问具体在那个脚本中加入input_ids = tokenizer.build_inputs_with_special_tokens(a_ids, b_ids)这句话,tokenization_chatglm.py吗 在tokenize_dataset_rows.py 文件里面 改一下preprocess函数就行了,我把我的代码复制给你,可以参考一下
def preprocess(tokenizer, config, example, max_source_length, max_target_length): prompt = example["context"] target = example["target"] # print("prompt: ", prompt) # print(">>>>>>>>>>>>>>>> ") # print("target: ", target) prompt_ids = tokenizer.encode(text=prompt, max_length=max_source_length, add_special_tokens=False, truncation=True) # 加了 下面的 。。。。。。。 # prompt_ids[-2] = 150001 # prompt_ids[-1] = 150004 target_ids = tokenizer.encode( target, max_length=max_target_length, truncation=True, add_special_tokens=False) # input_ids = prompt_ids + target_ids + [config.eos_token_id] input_ids = tokenizer.build_inputs_with_special_tokens(prompt_ids, target_ids) return {"input_ids": input_ids, "seq_len": len(prompt_ids)}
@DBtxy 兄弟,感谢你的及时回复 ,我很快就看到了 因为我跑的不是P-TUNING的代码 所以不能直接改过来 但是我理解你的思路!! 再次感谢!!!
tokenize_dataset_rows.py 老哥 tokenize_dataset_rows.py 这个文件在哪呀 没找到呀
tokenize_dataset_rows.py 老哥 tokenize_dataset_rows.py 这个文件在哪呀 没找到呀
我说的这个文件就在这个git下面呀,其实不管你用的哪份代码,找一下构建input_ids 的地方就行
tokenize_dataset_rows.py 老哥 tokenize_dataset_rows.py 这个文件在哪呀 没找到呀
我说的这个文件就在这个git下面呀,其实不管你用的哪份代码,找一下构建input_ids 的地方就行
就是最新的 也有input_ids = tokenizer.build_inputs_with_special_tokens(a_ids, b_ids)这行代码 还是不行呀,大神什么情况这是
报错是130004 is not in list, 为何加 prompt_ids[-2] = 150001 prompt_ids[-1] = 150004
tokenize_dataset_rows.py 老哥 tokenize_dataset_rows.py 这个文件在哪呀 没找到呀
我说的这个文件就在这个git下面呀,其实不管你用的哪份代码,找一下构建input_ids 的地方就行
就是最新的 也有input_ids = tokenizer.build_inputs_with_special_tokens(a_ids, b_ids)这行代码 还是不行呀,大神什么情况这是
你好,请问解决了吗,谢谢
我也遇到这个问题了,根本原因在于这一份代码的版本和开源的chatglm没有对齐的所导致的,解决办法也很简答,参照官方ptuning的数据预处理方法改就行了,简单加一行就行input_ids = tokenizer.build_inputs_with_special_tokens(a_ids, b_ids) reference https://github.com/THUDM/ChatGLM-6B/blob/main/ptuning/main.py#L217
@DBtxy 请问具体在那个脚本中加入input_ids = tokenizer.build_inputs_with_special_tokens(a_ids, b_ids)这句话,tokenization_chatglm.py吗 在tokenize_dataset_rows.py 文件里面 改一下preprocess函数就行了,我把我的代码复制给你,可以参考一下
def preprocess(tokenizer, config, example, max_source_length, max_target_length): prompt = example["context"] target = example["target"] # print("prompt: ", prompt) # print(">>>>>>>>>>>>>>>> ") # print("target: ", target) prompt_ids = tokenizer.encode(text=prompt, max_length=max_source_length, add_special_tokens=False, truncation=True) # 加了 下面的 。。。。。。。 # prompt_ids[-2] = 150001 # prompt_ids[-1] = 150004 target_ids = tokenizer.encode( target, max_length=max_target_length, truncation=True, add_special_tokens=False) # input_ids = prompt_ids + target_ids + [config.eos_token_id] input_ids = tokenizer.build_inputs_with_special_tokens(prompt_ids, target_ids) return {"input_ids": input_ids, "seq_len": len(prompt_ids)}
请问tokenize_dataset_rows.py 在哪个路径里面呢,我在官方代码里面没找到
