peft icon indicating copy to clipboard operation
peft copied to clipboard

DeBERTa-v2 with LoRA models does not save weights correctly

Open albarji opened this issue 1 year ago • 0 comments

Bug description

After training a DeBERTa-v2 model with LoRA, saving the model to disk and loading it back, the loaded model doesn't produce the same output as the original model.

Environment

datasets                  2.10.1
peft                      0.2.0
tokenizers                0.11.4
transformers              4.24.0
Python 3.10.9

Script to reproduce

from datasets import load_dataset
from peft import LoraConfig, TaskType, get_peft_model, PeftModel, PeftConfig
import torch
from torch.optim import AdamW
from torch.utils.data import DataLoader
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_name = "microsoft/deberta-v2-xlarge"

# Prepare data
dataset = load_dataset("tweet_eval", "emotion")
num_labels = len(set(dataset["train"]["label"]))
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenized_dataset = dataset.map(
    lambda example: tokenizer(example["text"], max_length=60, padding='max_length', truncation=True),
    batched=True
)
tokenized_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
tokenized_dataset = tokenized_dataset.remove_columns(["text"]).rename_column("label", "labels")
train_dataloader = DataLoader(tokenized_dataset["train"], shuffle=True, batch_size=16)

# Prepare model
peft_config = LoraConfig(task_type=TaskType.SEQ_CLS, r=8, lora_alpha=8, lora_dropout=0.1)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
model = get_peft_model(model, peft_config).to("cuda")

# Prepare optimizer
optimizer = AdamW(params=model.parameters(), lr=1e-5)

# Single epoch training loop
for batch in train_dataloader:
    for key in batch:
        batch[key] = batch[key].to("cuda")
    outputs = model(**batch)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    
# Make predictions with trained model
model.eval()
text = "Trying the model here"
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
    preds = model(input_ids=inputs["input_ids"].to("cuda"))
    print(f"Predicted tensor: {preds}")

# Save model
model.save_pretrained("tmp/lora_model")

# Load model back
config = PeftConfig.from_pretrained("tmp/lora_model")
loadedmodel = AutoModelForSequenceClassification.from_pretrained(config.base_model_name_or_path, num_labels=num_labels)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
loadedmodel = PeftModel.from_pretrained(loadedmodel, "tmp/lora_model").to("cuda")

# Predict with loaded model
loadedmodel.eval()
with torch.no_grad():
    loadedpreds = loadedmodel(input_ids=inputs["input_ids"].to("cuda"))
    print(f"Predicted tensor: {loadedpreds}")

# Assert results are the same
assert torch.isclose(preds.logits, loadedpreds.logits).all()

The same code works if the model is substituted by roberta-base.

albarji avatar Apr 17 '23 10:04 albarji