peft icon indicating copy to clipboard operation
peft copied to clipboard

Unable to train StabilityLM using peft

Open aashay96 opened this issue 1 year ago • 0 comments

I have tried to train stabilityLM via stabilityai through PEFT. Tried multiple configs with streaming data, single file, small batch sizes. But it always run into GPU out of memory issue. Currently using 48GB RTX A6000.

`import os os.environ["CUDA_VISIBLE_DEVICES"]="0" import torch import torch.nn as nn import bitsandbytes as bnb from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-3b") tokenizer.pad_token = tokenizer.eos_token

import transformers from datasets import load_dataset data = load_dataset("aashay96/indic_language_corpus",data_files=['indic_dataset_extracted/data/as/as.txt']) data = data.map(lambda samples: tokenizer(samples['text'],truncation=True,max_length=4096), batched=True)

model = AutoModelForCausalLM.from_pretrained( "stabilityai/stablelm-tuned-alpha-3b", load_in_8bit=True, device_map='auto', )

for param in model.parameters(): param.requires_grad = False # freeze the model - train adapters later if param.ndim == 1: # cast the small parameters (e.g. layernorm) to fp32 for stability param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable() # reduce number of stored activations model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential): def forward(self, x): return super().forward(x).to(torch.float32) model.embed_out = CastOutputToFloat(model.embed_out)

def print_trainable_parameters(model): """ Prints the number of trainable parameters in the model. """ trainable_params = 0 all_param = 0 for _, param in model.named_parameters(): all_param += param.numel() if param.requires_grad: trainable_params += param.numel() print( f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}" )

from peft import LoraConfig, get_peft_model

config = LoraConfig( r=16, lora_alpha=32, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM" )

model = get_peft_model(model, config) print_trainable_parameters(model)

import transformers from datasets import load_dataset #data = load_dataset("Abirate/english_quotes") #data = data.map(lambda samples: tokenizer(samples['quote']), batched=True)

trainer = transformers.Trainer( model=model, train_dataset=data['train'], args=transformers.TrainingArguments( per_device_train_batch_size=4, gradient_accumulation_steps=4, warmup_steps=100, save_steps=1000, num_train_epochs=3, #max_steps=20000, learning_rate=2e-4, fp16=True, logging_steps=1, output_dir='outputs', ), data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False) ) model.config.use_cache = False # silence the warnings. Please re-enable for inference! trainer.train()

`

Error - OutOfMemoryError: CUDA out of memory. Tried to allocate 8.00 GiB (GPU 0; 47.54 GiB total capacity; 29.90 GiB already allocated; 5.18 GiB free; 41.20 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

aashay96 avatar Apr 23 '23 12:04 aashay96