Can't find 'word_embedding' using Prompt-tuning with Deepspeed
-
I'm trying to use prompt-tuning with peft and deepspeed as in examples. But the 'word_embeddings' can't be found when initialize PeftModelForCausalLM. The error messages are as followings.

-
I think the bug is due to the zero3. The parameters' shape is 0, so it doesn't match the vocab_size. And then the word_embeddings can't be found. How can I fix this bug and use prompt-tuning and zero at the same time?
-
By the way, LoRA and Prefix-tuning work well with zero3.
-
My code is
from transformers.deepspeed import HfDeepSpeedConfig
from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
from transformers.models.codegen.modeling_codegen import CodeGenMLP
import argparse
import torch
import time, datetime
import deepspeed
from deepspeed.accelerator import get_accelerator
from torch.utils.data import Dataset
from transformers.activations import ClippedGELUActivation, LinearActivation
from lion_pytorch import Lion
from datasets import load_dataset
import os, sys
from transformers import Trainer, TrainingArguments, HfArgumentParser, TrainerCallback
from transformers.integrations import WandbCallback
from peft import LoraConfig, get_peft_model, TaskType, PromptTuningInit
from peft import PrefixTuningConfig, PromptTuningConfig, PeftModelForCausalLM
class MyDataset(Dataset):
def __init__(self, data, tknz):
super().__init__()
self.data = data
self.tknz = tknz
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
tknz_text = self.tknz(
self.data[idx]['text'],
max_length=args.seq_len,
padding='max_length',
truncation=True,
)
return {
'input_ids': tknz_text['input_ids'],
'attention_mask': tknz_text['attention_mask'],
'labels': tknz_text['input_ids']
}
def collate_fn(batch, tknz):
tknz_batch = tknz.pad(
batch,
padding=True,
max_length=args.seq_len,
pad_to_multiple_of=8,
return_tensors='pt'
)
print("input_ids ", tknz_batch['input_ids'].shape)
print("attn_mask ", tknz_batch['attention_mask'].shape)
return {
'input_ids': tknz_batch['input_ids'],
'attention_mask': tknz_batch['attention_mask'],
'labels': tknz_batch['input_ids']
}
class PrinterCallback(TrainerCallback):
# def on_substep_end(self, args, state, control, **kwargs):
# get_accelerator().empty_cache()
def on_log(self, args, state, control, logs=None, **kwargs):
_ = logs.pop("total_flos", None)
if state.is_local_process_zero:
print(logs)
def print_trainable_parameters(model):
"""
Prints the number of trainable parameters in the model.
"""
trainable_params = 0
all_param = 0
for _, param in model.named_parameters():
all_param += param.numel()
if param.requires_grad:
trainable_params += param.numel()
print(
f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
)
def train():
if args.local_rank == 0:
print(training_args)
print(f"[{datetime.datetime.today()}] Loading model.")
model = AutoModelForCausalLM.from_pretrained(args.model_name_or_path, torch_dtype=torch.float16, local_files_only=True)
if args.peft_type == "lora":
config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q_proj", "v_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
elif args.peft_type == "prefix-tuning":
config = PrefixTuningConfig(
task_type=TaskType.CAUSAL_LM,
num_virtual_tokens=4,
)
elif args.peft_type == "prompt-tuning": # TODO: can't find word_embeddings
config = PromptTuningConfig(
task_type=TaskType.CAUSAL_LM,
prompt_tuning_init=PromptTuningInit.TEXT,
num_virtual_tokens=4,
tokenizer_name_or_path=args.model_name_or_path,
)
elif args.peft_type == "p-tuning": # TODO: modify the config
config = PromptTuningConfig(
task_type=TaskType.CAUSAL_LM,
prompt_tuning_init=PromptTuningInit.TEXT,
num_virtual_tokens=4,
tokenizer_name_or_path=args.model_name_or_path,
)
else:
raise ValueError(f"Unknown PEFT type: {args.peft_type}")
model = get_peft_model(model, config)
print_trainable_parameters(model)
tknz = AutoTokenizer.from_pretrained(args.model_name_or_path)
tknz.pad_token = tknz.eos_token
if args.local_rank == 0:
print(f"[{datetime.datetime.today()}] Loading dataset.")
dataset = load_dataset("NeelNanda/pile-10k")['train'].select(range(args.data_size))
dataset = MyDataset(dataset, tknz)
if args.local_rank == 0:
print(f"[{datetime.datetime.today()}] Initializing DeepSpeed Engine.")
trainer = Trainer(
model=model,
args=training_args[0],
data_collator=lambda batch: collate_fn(batch, tknz),
train_dataset=dataset,
tokenizer=tknz,
callbacks=[PrinterCallback()],
)
if args.local_rank == 0:
print(f"[{datetime.datetime.today()}] Entering training loop.")
trainer.train()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--local_rank", type=int, default=-1)
parser.add_argument('--project', type=str, default="my_project")
parser.add_argument('--name', type=str, default="my_exps")
parser.add_argument('--data_size', type=int, default=100)
parser.add_argument('--seq_len', type=int, default=300)
parser.add_argument('--model_name_or_path', type=str, default="Salesforce/codegen-350M-mono")
parser.add_argument("--training_args_file", type=str, default="config/training_args.yml")
parser.add_argument("--peft_type", type=str, default="lora")
args = parser.parse_args()
training_args = HfArgumentParser(TrainingArguments).parse_yaml_file(args.training_args_file)
train()
Did you find a solution for this issue for prompt-tuning?
Did you find a solution for this issue for prompt-tuning?
Unfortunately no...
did you get error like
return self.flatten(align_dense_tensors(tensor_list, alignment))
RuntimeError: torch.cat(): expected a non-empty list of Tensors
when running prompttuning with zero2? I got the same error when running with zero3.
This issue has been automatically marked as stale because it has not had recent activity. If you think this still needs to be addressed please comment on this thread.
Do you have any updates on this issue? @KaiLv69
Do you have any updates on this issue? @KaiLv69
Nope. Turn to LoRA :)
Same 'word_embeddings' can't be found issue for T5ForConditionalGeneration with deepspeed zero3
@sasaadi @nanyyyyyy @fateme-hshm96 @hepengfe I found that the error is due to the lack of consideration with zero3. Here is the solution. https://github.com/OpenLMLab/collie/pull/54/commits/7fcf9317d70f48429bee3936c080b88e2de4f99a In short, when finding word_embeddings using zero3, we should check the shape of p.ds_tensor instead of p.data. :)