peft DeBERTa-v2 with LoRA models does not save weights correctly

DeBERTa-v2 with LoRA models does not save weights correctly

Open albarji opened this issue 1 year ago • 0 comments

Bug description

After training a DeBERTa-v2 model with LoRA, saving the model to disk and loading it back, the loaded model doesn't produce the same output as the original model.

Environment

datasets                  2.10.1
peft                      0.2.0
tokenizers                0.11.4
transformers              4.24.0
Python 3.10.9

Script to reproduce

from datasets import load_dataset
from peft import LoraConfig, TaskType, get_peft_model, PeftModel, PeftConfig
import torch
from torch.optim import AdamW
from torch.utils.data import DataLoader
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_name = "microsoft/deberta-v2-xlarge"

# Prepare data
dataset = load_dataset("tweet_eval", "emotion")
num_labels = len(set(dataset["train"]["label"]))
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenized_dataset = dataset.map(
    lambda example: tokenizer(example["text"], max_length=60, padding='max_length', truncation=True),
    batched=True
)
tokenized_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
tokenized_dataset = tokenized_dataset.remove_columns(["text"]).rename_column("label", "labels")
train_dataloader = DataLoader(tokenized_dataset["train"], shuffle=True, batch_size=16)

# Prepare model
peft_config = LoraConfig(task_type=TaskType.SEQ_CLS, r=8, lora_alpha=8, lora_dropout=0.1)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
model = get_peft_model(model, peft_config).to("cuda")

# Prepare optimizer
optimizer = AdamW(params=model.parameters(), lr=1e-5)

# Single epoch training loop
for batch in train_dataloader:
    for key in batch:
        batch[key] = batch[key].to("cuda")
    outputs = model(**batch)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    
# Make predictions with trained model
model.eval()
text = "Trying the model here"
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
    preds = model(input_ids=inputs["input_ids"].to("cuda"))
    print(f"Predicted tensor: {preds}")

# Save model
model.save_pretrained("tmp/lora_model")

# Load model back
config = PeftConfig.from_pretrained("tmp/lora_model")
loadedmodel = AutoModelForSequenceClassification.from_pretrained(config.base_model_name_or_path, num_labels=num_labels)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
loadedmodel = PeftModel.from_pretrained(loadedmodel, "tmp/lora_model").to("cuda")

# Predict with loaded model
loadedmodel.eval()
with torch.no_grad():
    loadedpreds = loadedmodel(input_ids=inputs["input_ids"].to("cuda"))
    print(f"Predicted tensor: {loadedpreds}")

# Assert results are the same
assert torch.isclose(preds.logits, loadedpreds.logits).all()

The same code works if the model is substituted by roberta-base.

Apr 17 '23 10:04 albarji

peft peft copied to clipboard

DeBERTa-v2 with LoRA models does not save weights correctly

Bug description

Environment

Script to reproduce

peft
peft copied to clipboard