peft
peft copied to clipboard
DeBERTa-v2 with LoRA models does not save weights correctly
Bug description
After training a DeBERTa-v2 model with LoRA, saving the model to disk and loading it back, the loaded model doesn't produce the same output as the original model.
Environment
datasets 2.10.1
peft 0.2.0
tokenizers 0.11.4
transformers 4.24.0
Python 3.10.9
Script to reproduce
from datasets import load_dataset
from peft import LoraConfig, TaskType, get_peft_model, PeftModel, PeftConfig
import torch
from torch.optim import AdamW
from torch.utils.data import DataLoader
from transformers import AutoModelForSequenceClassification, AutoTokenizer
model_name = "microsoft/deberta-v2-xlarge"
# Prepare data
dataset = load_dataset("tweet_eval", "emotion")
num_labels = len(set(dataset["train"]["label"]))
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenized_dataset = dataset.map(
lambda example: tokenizer(example["text"], max_length=60, padding='max_length', truncation=True),
batched=True
)
tokenized_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
tokenized_dataset = tokenized_dataset.remove_columns(["text"]).rename_column("label", "labels")
train_dataloader = DataLoader(tokenized_dataset["train"], shuffle=True, batch_size=16)
# Prepare model
peft_config = LoraConfig(task_type=TaskType.SEQ_CLS, r=8, lora_alpha=8, lora_dropout=0.1)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
model = get_peft_model(model, peft_config).to("cuda")
# Prepare optimizer
optimizer = AdamW(params=model.parameters(), lr=1e-5)
# Single epoch training loop
for batch in train_dataloader:
for key in batch:
batch[key] = batch[key].to("cuda")
outputs = model(**batch)
loss = outputs.loss
loss.backward()
optimizer.step()
optimizer.zero_grad()
# Make predictions with trained model
model.eval()
text = "Trying the model here"
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
preds = model(input_ids=inputs["input_ids"].to("cuda"))
print(f"Predicted tensor: {preds}")
# Save model
model.save_pretrained("tmp/lora_model")
# Load model back
config = PeftConfig.from_pretrained("tmp/lora_model")
loadedmodel = AutoModelForSequenceClassification.from_pretrained(config.base_model_name_or_path, num_labels=num_labels)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
loadedmodel = PeftModel.from_pretrained(loadedmodel, "tmp/lora_model").to("cuda")
# Predict with loaded model
loadedmodel.eval()
with torch.no_grad():
loadedpreds = loadedmodel(input_ids=inputs["input_ids"].to("cuda"))
print(f"Predicted tensor: {loadedpreds}")
# Assert results are the same
assert torch.isclose(preds.logits, loadedpreds.logits).all()
The same code works if the model is substituted by roberta-base
.