sparseml
sparseml copied to clipboard
Custom datasets
Summary
Bug fix for running custom dataset using accerate
and use preprocessing func in module src/sparseml/transformers/utils/preprocesing_funtions.py
, specified by string arg in parser.
#!/bin/bash
export CUDA_VISIBLE_DEVICES="0,1,2,3"
NPROC=$(($(echo $CUDA_VISIBLE_DEVICES | grep -o "," | wc -l)+1))
NPROC=1
ROOT=/home/george/sparseml/
DATASET=json
DATASET_PATH=/network/abhinav/datasets/evolcodealpaca
DATASET_CONFIG_NAME="default"
WORKERS=16
MAX_LEN=1024
SRC_MODEL_NAME=/network/abhinav/llama/models/Llama-2-7b-hf
SRC_MODEL=/network/abhinav/llama/models/Llama-2-7b-hf
RECIPE_DIR=/network/george/recipes #$ROOT/recipes/llama2_code
TEXT_COLUMN="text"
FSDP_CONFIG=$RECIPE_DIR/fsdp.yaml
LR=5e-5
WARM=0.1
EPOCHS=2
ID=$RANDOM
BATCH=16
GRAD_ACC=1
DST_MODEL_DIR=$HOME/models/llama2_coding/dense_finetuned
DST_MODEL_NAME=$SRC_MODEL_NAME@$DATASET@LR$LR@WARM$WARM@BS$BATCH$GrA$GRAD_ACC@EP$EPOCHS@ID$ID
DST_MODEL=$DST_MODEL_DIR/$DST_MODEL_NAME
PREPROCESSING_FUNC="evolved_codealpaca_dataset"
accelerate launch \
--config_file $FSDP_CONFIG \
--no_python sparseml.transformers.text_generation.train \
--fsdp full_shard \
--fsdp_config $RECIPE_DIR/extra_fsdp.json \
--model $SRC_MODEL \
--dataset $DATASET \
--dataset_path $DATASET_PATH \
--dataset_config_name $DATASET_CONFIG_NAME \
--preprocessing_num_workers $WORKERS \
--max_seq_length $MAX_LEN \
--learning_rate $LR \
--warmup_ratio $WARM \
--per_device_train_batch_size $BATCH \
--gradient_accumulation_steps $GRAD_ACC \
--output_dir $DST_MODEL \
--num_train_epochs $EPOCHS \
--report_to wandb \
--run_name $DST_MODEL_NAME \
--precision bfloat16 \
--bf16 True \
--text_column $TEXT_COLUMN \
--preprocessing_func $PREPROCESSING_FUNC