Cant use LORA
🐛 Describe the bug
ValueError: Target modules {'v_proj', 'up_proj', 'o_proj', 'down_proj', 'k_proj', 'q_proj', 'gate_proj'} not found in the base model. Please check the target modules and try again.
here is the training script
# import os
# from dataclasses import dataclass, field
# from typing import Optional
# from datasets.arrow_dataset import Dataset
# import torch
# from datasets import load_dataset
# from peft import LoraConfig
# from peft import AutoPeftModelForCausalLM
# import wandb
# import pandas as pd
# wandb.login(key="82cbd27eead1f27bb5cc79b0a83a3a70fd4595f0")
# attn_implementation = "eager"
# torch_dtype = torch.float16
# from transformers import (
# AutoTokenizer,
# HfArgumentParser,
# TrainingArguments,
# BitsAndBytesConfig,
# )
# from datasets import DatasetDict, concatenate_datasets
# from trl import SFTTrainer
# torch.manual_seed(42)
# @dataclass
# class ScriptArguments:
# local_rank: Optional[int] = field(default=-1, metadata={"help": "Used for multi-gpu"})
# per_device_train_batch_size: Optional[int] = field(default=1)
# per_device_eval_batch_size: Optional[int] = field(default=1)
# gradient_accumulation_steps: Optional[int] = field(default=4)
# learning_rate: Optional[float] = field(default=5e-6)
# max_grad_norm: Optional[float] = field(default=0.3)
# weight_decay: Optional[int] = field(default=0.01)
# lora_alpha: Optional[int] = field(default=16)
# lora_dropout: Optional[float] = field(default=0.0)
# lora_r: Optional[int] = field(default=8)
# max_seq_length: Optional[int] = field(default=2500)
# model_name: Optional[str] = field(default="allenai/OLMo-7B-Instruct", metadata={"help": "The model to train."})
# dataset_name: Optional[str] = field(default="allenai/ultrafeedback_binarized_cleaned", metadata={"help": "The dataset to use."})
# use_4bit: Optional[bool] = field(default=True, metadata={"help": "Activate 4bit precision base model loading"})
# use_nested_quant: Optional[bool] = field(default=False, metadata={"help": "Activate nested quantization for 4bit base models"})
# bnb_4bit_compute_dtype: Optional[str] = field(default="float16", metadata={"help": "Compute dtype for 4bit base models"})
# bnb_4bit_quant_type: Optional[str] = field(default="nf4", metadata={"help": "Quantization type fp4 or nf4"})
# num_train_epochs: Optional[int] = field(default=1, metadata={"help": "The number of training epochs for the reward model."})
# fp16: Optional[bool] = field(default=False, metadata={"help": "Enables fp16 training."})
# bf16: Optional[bool] = field(default=True, metadata={"help": "Enables bf16 training."})
# packing: Optional[bool] = field(default=False, metadata={"help": "Use packing dataset creating."})
# gradient_checkpointing: Optional[bool] = field(default=False, metadata={"help": "Enables gradient checkpointing."})
# optim: Optional[str] = field(default="paged_adamw_8bit", metadata={"help": "The optimizer to use."})
# lr_scheduler_type: str = field(default="linear", metadata={"help": "Learning rate schedule."})
# max_steps: int = field(default=10000000000, metadata={"help": "How many optimizer update steps to take"})
# warmup_steps: int = field(default=10, metadata={"help": "# of steps to do a warmup for"})
# group_by_length: bool = field(default=True, metadata={"help": "Group sequences into batches with same length."})
# save_steps: int = field(default=200, metadata={"help": "Save checkpoint every X updates steps."})
# logging_steps: int = field(default=5, metadata={"help": "Log every X updates steps."})
# merge_and_push: Optional[bool] = field(default=False, metadata={"help": "Merge and push weights after training"})
# output_dir: str = field(default="./easy_align_results", metadata={"help": "The output directory."})
# parser = HfArgumentParser(ScriptArguments)
# script_args = parser.parse_args_into_dataclasses()[0]
# tokenizer = AutoTokenizer.from_pretrained(script_args.model_name, trust_remote_code=True)
# # # Load the dataset
# # dataset = load_dataset(script_args.dataset_name, split='train_gen')
# # # Define the filtering function
# # def filter_examples(example):
# # combined_text = example['prompt']
# # for message in example['chosen']:
# # combined_text += message['content']
# # for message in example['rejected']:
# # combined_text += message['content']
# # tokens = tokenizer.encode(combined_text)
# # return len(tokens) < script_args.max_seq_length
# # # Filter and shuffle the dataset
# # filtered_dataset = dataset.filter(filter_examples, num_proc=4)
# # shuffled_dataset = filtered_dataset.shuffle(seed=42)
# # # Split into train and test sets
# # train_test_dataset = shuffled_dataset.train_test_split(test_size=0.1, seed=42)
# # train_dataset, test_dataset = train_test_dataset['train'], train_test_dataset['test']
# # Load the datasets
# train_dataset = load_dataset(script_args.dataset_name, split='train_gen')
# test_dataset = load_dataset(script_args.dataset_name, split='test_gen')
# # Define the filtering function
# def filter_examples(example):
# combined_text = example['prompt']
# for message in example['chosen']:
# combined_text += message['content']
# for message in example['rejected']:
# combined_text += message['content']
# tokens = tokenizer.encode(combined_text)
# return len(tokens) < script_args.max_seq_length
# # Filter and shuffle the train dataset
# filtered_train_dataset = train_dataset.filter(filter_examples, num_proc=4)
# shuffled_train_dataset = filtered_train_dataset.shuffle(seed=42)
# # Filter and shuffle the test dataset
# filtered_test_dataset = test_dataset.filter(filter_examples, num_proc=4)
# shuffled_test_dataset = filtered_test_dataset.shuffle(seed=42)
# # Select a subset for training and testing
# train_dataset = filtered_test_dataset
# test_dataset = shuffled_test_dataset
# # Format chat template
# def format_chat_template(example):
# formatted_examples = []
# for key in ['chosen', 'rejected']:
# formatted_example = ""
# for message in example[key]:
# role = message['role']
# content = message['content']
# if role == 'user':
# if key == 'chosen':
# formatted_example += f"<|{role}|>\n<dddc>{content}\n"
# else:
# formatted_example += f"<|{role}|>\n<dddr>{content}\n"
# else:
# formatted_example += f"<|{role}|>\n{content}\n"
# formatted_examples.append(formatted_example)
# return {'text': formatted_examples}
# # Format and prepare train and test datasets
# formatted_train_data = []
# formatted_test_data = []
# for example in train_dataset:
# formatted_examples = format_chat_template(example)
# formatted_train_data.extend(formatted_examples['text'])
# for example in test_dataset:
# formatted_examples = format_chat_template(example)
# formatted_test_data.extend(formatted_examples['text'])
# formatted_train_dataset = Dataset.from_dict({'text': formatted_train_data})
# formatted_test_dataset = Dataset.from_dict({'text': formatted_test_data})
# print(f"Number of examples in the train set: {len(formatted_train_dataset)}")
# print(f"Number of examples in the test set: {len(formatted_test_dataset)}")
# def create_and_prepare_model(args):
# compute_dtype = getattr(torch, args.bnb_4bit_compute_dtype)
# bnb_config = BitsAndBytesConfig(
# load_in_4bit=True,
# bnb_4bit_quant_type=args.bnb_4bit_quant_type,
# bnb_4bit_compute_dtype=compute_dtype,
# bnb_4bit_use_double_quant=True,
# )
# if compute_dtype == torch.float16 and args.use_4bit:
# major, _ = torch.cuda.get_device_capability()
# if major >= 8:
# print("=" * 80)
# print("Your GPU supports bfloat16, you can accelerate training with the argument --bf16")
# print("=" * 80)
# device_map = {"": 0}
# model = AutoPeftModelForCausalLM.from_pretrained(
# args.model_name,
# quantization_config=bnb_config,
# device_map=device_map,
# use_auth_token=True,
# attn_implementation=attn_implementation
# )
# peft_config = LoraConfig(
# r=args.lora_r,
# lora_alpha=args.lora_alpha,
# lora_dropout=args.lora_dropout,
# bias="none",
# task_type="CAUSAL_LM",
# target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
# )
# tokenizer = AutoTokenizer.from_pretrained(args.model_name, trust_remote_code=True)
# tokenizer.pad_token = tokenizer.eos_token
# return model, peft_config, tokenizer
# training_arguments = TrainingArguments(
# num_train_epochs=script_args.num_train_epochs,
# output_dir=script_args.output_dir,
# per_device_train_batch_size=script_args.per_device_train_batch_size,
# gradient_accumulation_steps=script_args.gradient_accumulation_steps,
# optim=script_args.optim,
# save_steps=script_args.save_steps,
# logging_steps=script_args.logging_steps,
# learning_rate=script_args.learning_rate,
# fp16=script_args.fp16,
# bf16=script_args.bf16,
# evaluation_strategy="steps",
# eval_steps=50,
# warmup_steps=script_args.warmup_steps,
# group_by_length=script_args.group_by_length,
# lr_scheduler_type=script_args.lr_scheduler_type,
# report_to='wandb',
# gradient_checkpointing=True,
# save_strategy="steps",
# load_best_model_at_end=True,
# metric_for_best_model="eval_loss", # Specify the evaluation metric here
# greater_is_better=False # Set to True if higher metric values are better
# )
# model, peft_config, tokenizer = create_and_prepare_model(script_args)
# tokenizer.padding_side = "right"
# trainer = SFTTrainer(
# model=model,
# train_dataset=train_dataset,
# eval_dataset=test_dataset,
# peft_config=peft_config,
# dataset_text_field="text",
# max_seq_length=script_args.max_seq_length,
# tokenizer=tokenizer,
# args=training_arguments,
# packing=script_args.packing,
# )
# trainer.train()
# if script_args.merge_and_push:
# output_dir = os.path.join(script_args.output_dir, "final_checkpoints")
# trainer.model.save_pretrained(output_dir)
# del model
# torch.cuda.empty_cache()
# model = AutoPeftModelForCausalLM.from_pretrained(output_dir, device_map="auto", torch_dtype=torch.bfloat16)
# model = model.merge_and_unload()
# output_merged_dir = os.path.join(script_args.output_dir, "final_merged_checkpoint")
# model.save_pretrained(output_merged_dir, safe_serialization=True)
from hf_olmo import * # registers the Auto* classes
import os
from dataclasses import dataclass, field
from typing import Optional
from datasets.arrow_dataset import Dataset
import torch
from datasets import load_dataset
from peft import LoraConfig
from peft import AutoPeftModelForCausalLM
import wandb
import pandas as pd
from peft import get_peft_model
wandb.login(key="82cbd27eead1f27bb5cc79b0a83a3a70fd4595f0")
attn_implementation = "eager"
# torch_dtype = torch.float16
from transformers import (
AutoTokenizer,
HfArgumentParser,
TrainingArguments,
BitsAndBytesConfig,
)
from datasets import DatasetDict, concatenate_datasets
from trl import SFTTrainer
torch.manual_seed(42)
@dataclass
class ScriptArguments:
local_rank: Optional[int] = field(default=-1, metadata={"help": "Used for multi-gpu"})
per_device_train_batch_size: Optional[int] = field(default=1)
per_device_eval_batch_size: Optional[int] = field(default=1)
gradient_accumulation_steps: Optional[int] = field(default=32)
learning_rate: Optional[float] = field(default=5e-6)
max_grad_norm: Optional[float] = field(default=0.3)
weight_decay: Optional[int] = field(default=0.01)
lora_alpha: Optional[int] = field(default=32)
lora_dropout: Optional[float] = field(default=0.1)
lora_r: Optional[int] = field(default=16)
max_seq_length: Optional[int] = field(default=2048)
model_name: Optional[str] = field(default="allenai/OLMo-7B-SFT", metadata={"help": "The model to train."})
dataset_name: Optional[str] = field(default="allenai/ultrafeedback_binarized_cleaned", metadata={"help": "The dataset to use."})
use_4bit: Optional[bool] = field(default=True, metadata={"help": "Activate 4bit precision base model loading"})
use_nested_quant: Optional[bool] = field(default=False, metadata={"help": "Activate nested quantization for 4bit base models"})
bnb_4bit_compute_dtype: Optional[str] = field(default="float16", metadata={"help": "Compute dtype for 4bit base models"})
bnb_4bit_quant_type: Optional[str] = field(default="nf4", metadata={"help": "Quantization type fp4 or nf4"})
num_train_epochs: Optional[int] = field(default=1, metadata={"help": "The number of training epochs for the reward model."})
fp16: Optional[bool] = field(default=False, metadata={"help": "Enables fp16 training."})
bf16: Optional[bool] = field(default=True, metadata={"help": "Enables bf16 training."})
packing: Optional[bool] = field(default=True, metadata={"help": "Use packing dataset creating."})
gradient_checkpointing: Optional[bool] = field(default=False, metadata={"help": "Enables gradient checkpointing."})
optim: Optional[str] = field(default="paged_adamw_8bit", metadata={"help": "The optimizer to use."})
lr_scheduler_type: str = field(default="linear", metadata={"help": "Learning rate schedule."})
max_steps: int = field(default=10000000000, metadata={"help": "How many optimizer update steps to take"})
warmup_steps: int = field(default=10, metadata={"help": "# of steps to do a warmup for"})
group_by_length: bool = field(default=True, metadata={"help": "Group sequences into batches with same length."})
save_steps: int = field(default=1000, metadata={"help": "Save checkpoint every X updates steps."})
logging_steps: int = field(default=5, metadata={"help": "Log every X updates steps."})
merge_and_push: Optional[bool] = field(default=False, metadata={"help": "Merge and push weights after training"})
output_dir: str = field(default="./easy_align_results", metadata={"help": "The output directory."})
parser = HfArgumentParser(ScriptArguments)
script_args = parser.parse_args_into_dataclasses()[0]
tokenizer = AutoTokenizer.from_pretrained(script_args.model_name, trust_remote_code=True)
# # Load the dataset
# dataset = load_dataset(script_args.dataset_name, split='train_gen')
# # Define the filtering function
# def filter_examples(example):
# combined_text = example['prompt']
# for message in example['chosen']:
# combined_text += message['content']
# for message in example['rejected']:
# combined_text += message['content']
# tokens = tokenizer.encode(combined_text)
# return len(tokens) < script_args.max_seq_length
# # Filter and shuffle the dataset
# filtered_dataset = dataset.filter(filter_examples, num_proc=4)
# shuffled_dataset = filtered_dataset.shuffle(seed=42)
# # Split into train and test sets
# train_test_dataset = shuffled_dataset.train_test_split(test_size=0.1, seed=42)
# train_dataset, test_dataset = train_test_dataset['train'], train_test_dataset['test']
# Load the datasets
train_dataset = load_dataset(script_args.dataset_name, split='train_prefs')
test_dataset = load_dataset(script_args.dataset_name, split='test_prefs')
# Define the filtering function
def filter_examples(example):
combined_text = example['prompt']
for message in example['chosen']:
combined_text += message['content']
for message in example['rejected']:
combined_text += message['content']
tokens = tokenizer.encode(combined_text)
return len(tokens) < script_args.max_seq_length
# Filter and shuffle the train dataset
filtered_train_dataset = train_dataset.filter(filter_examples, num_proc=16)
shuffled_train_dataset = filtered_train_dataset.shuffle(seed=42)
# Filter and shuffle the test dataset
filtered_test_dataset = test_dataset.filter(filter_examples, num_proc=16)
shuffled_test_dataset = filtered_test_dataset.shuffle(seed=42)
# Select a subset for training and testing
train_dataset = filtered_train_dataset
# test_dataset = shuffled_test_dataset
test_dataset = shuffled_test_dataset.select(range(100))
# Format chat template
def format_chat_template(example):
formatted_examples = []
for key in ['chosen', 'rejected']:
formatted_example = ""
for message in example[key]:
role = message['role']
content = message['content']
if role == 'user':
if key == 'chosen':
formatted_example += f"<|{role}|>\n<dddc>{content}\n"
else:
formatted_example += f"<|{role}|>\n<dddr>{content}\n"
else:
formatted_example += f"<|{role}|>\n{content}\n"
formatted_examples.append(formatted_example)
return {'text': formatted_examples}
# Format and prepare train and test datasets
formatted_train_data = []
formatted_test_data = []
for example in train_dataset:
formatted_examples = format_chat_template(example)
formatted_train_data.extend(formatted_examples['text'])
for example in test_dataset:
formatted_examples = format_chat_template(example)
formatted_test_data.extend(formatted_examples['text'])
formatted_train_dataset = Dataset.from_dict({'text': formatted_train_data})
formatted_test_dataset = Dataset.from_dict({'text': formatted_test_data})
print(f"Number of examples in the train set: {len(formatted_train_dataset)}")
print(f"Number of examples in the test set: {len(formatted_test_dataset)}")
from transformers import AutoModelForCausalLM
def create_and_prepare_model(args):
compute_dtype = getattr(torch, args.bnb_4bit_compute_dtype)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type=args.bnb_4bit_quant_type,
bnb_4bit_compute_dtype=compute_dtype,
bnb_4bit_use_double_quant=True,
)
if compute_dtype == torch.float16 and args.use_4bit:
major, _ = torch.cuda.get_device_capability()
if major >= 8:
print("=" * 80)
print("Your GPU supports bfloat16, you can accelerate training with the argument --bf16")
print("=" * 80)
peft_config = LoraConfig(
r=args.lora_r,
lora_alpha=args.lora_alpha,
lora_dropout=args.lora_dropout,
bias="none",
task_type="CAUSAL_LM",
# target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
# target_modules=["att_proj", "attn_out", "ff_proj", "ff_out"]
target_modules=["q_proj", "o_proj", "v_proj", "k_proj", "gate_proj", "up_proj", "down_proj"]
)
model = AutoModelForCausalLM.from_pretrained(args.model_name).to('cuda:0')
lora_model = get_peft_model(model, peft_config)
# model = AutoPeftModelForCausalLM.from_pretrained(
# args.model_name,
# quantization_config=bnb_config,
# config=peft_config
# # use_auth_token=True,
# # attn_implementation=attn_implementation
# )
tokenizer = AutoTokenizer.from_pretrained(args.model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
return lora_model, peft_config, tokenizer
training_arguments = TrainingArguments(
num_train_epochs=script_args.num_train_epochs,
output_dir=script_args.output_dir,
per_device_train_batch_size=script_args.per_device_train_batch_size,
gradient_accumulation_steps=script_args.gradient_accumulation_steps,
optim=script_args.optim,
save_total_limit=1, # Keep only the best model checkpoint
logging_steps=script_args.logging_steps,
learning_rate=script_args.learning_rate,
fp16=script_args.fp16,
bf16=script_args.bf16,
evaluation_strategy="steps",
eval_steps=1000,
warmup_steps=script_args.warmup_steps,
group_by_length=script_args.group_by_length,
lr_scheduler_type=script_args.lr_scheduler_type,
report_to='wandb',
# gradient_checkpointing=args.gradient_checkpointing,
# deepspeed='./config.json',
save_strategy="steps",
# load_best_model_at_end=True,
metric_for_best_model="eval_loss", # Specify the evaluation metric here
greater_is_better=False # Set to True if higher metric values are better
# device_map=
)
model, peft_config, tokenizer = create_and_prepare_model(script_args)
tokenizer.padding_side = "right"
trainer = SFTTrainer(
model=model,
train_dataset=formatted_train_dataset,
eval_dataset=formatted_test_dataset,
peft_config=peft_config,
dataset_text_field="text",
max_seq_length=script_args.max_seq_length,
tokenizer=tokenizer,
args=training_arguments,
packing=script_args.packing,
)
trainer.train()
# https://huggingface.co/docs/peft/en/tutorial/peft_model_config
model.save_pretrained("ez/olmo-sft-ez")
# if script_args.merge_and_push:
# output_dir = os.path.join(script_args.output_dir, "final_checkpoints")
# trainer.model.save_pretrained(output_dir)
# del model
# torch.cuda.empty_cache()
# model = AutoPeftModelForCausalLM.from_pretrained(output_dir, device_map="auto", torch_dtype=torch.bfloat16)
# model = model.merge_and_unload()
# output_merged_dir = os.path.join(script_args.output_dir, "final_merged_checkpoint")
# model.save_pretrained(output_merged_dir, safe_serialization=True)
Versions
Python 3.8.10
absl-py==1.4.0
accelerate==0.30.1
ai2-olmo==0.3.0
aiohttp==3.9.5
aiosignal==1.3.1
annotated-types==0.6.0
antlr4-python3-runtime==4.9.3
array-record==0.4.0
astunparse==1.6.3
async-timeout==4.0.3
attrs==23.2.0
Babel==2.15.0
bitsandbytes==0.42.0
boto3==1.34.109
botocore==1.34.109
cached-path==1.6.2
cachetools==5.3.3
certifi==2019.11.28
chardet==3.0.4
click==8.1.7
colorama==0.4.6
datasets==2.19.1
dbus-python==1.2.16
deepspeed==0.14.2
dill==0.3.8
dm-tree==0.1.8
docker-pycreds==0.4.0
docstring-parser==0.16
einops==0.8.0
et-xmlfile==1.1.0
etils==1.3.0
eval-type-backport==0.2.0
filelock==3.13.4
flash-attn==2.5.9.post1
flatbuffers==24.3.25
frozenlist==1.4.1
fsspec==2024.3.1
future==1.0.0
gast==0.4.0
gin-config==0.5.0
gitdb==4.0.11
GitPython==3.1.43
google-api-core==2.19.0
google-auth==2.29.0
google-auth-oauthlib==1.0.0
google-cloud-core==2.4.1
google-cloud-storage==2.16.0
google-crc32c==1.5.0
google-pasta==0.2.0
google-resumable-media==2.7.0
googleapis-common-protos==1.63.0
grpcio==1.64.0
h5py==3.11.0
hjson==3.1.0
huggingface==0.0.1
huggingface-cli==0.1
huggingface-hub==0.23.0
idna==2.8
importlib-metadata==7.1.0
importlib-resources==6.4.0
jinja2==3.1.4
jmespath==1.0.1
joblib==1.4.2
keras==2.13.1
libclang==18.1.1
lxml==5.2.2
Markdown==3.6
markdown-it-py==3.0.0
MarkupSafe==2.1.5
mdurl==0.1.2
mesh-tensorflow==0.1.21
mpmath==1.3.0
multidict==6.0.5
multiprocess==0.70.16
networkx==3.1
ninja==1.11.1.1
nltk==3.8.1
numpy==1.24.3
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==8.9.2.26
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.20.5
nvidia-nvjitlink-cu12==12.4.127
nvidia-nvtx-cu12==12.1.105
oauthlib==3.2.2
omegaconf==2.3.0
openai==0.10.2
openpyxl==3.1.2
opt-einsum==3.3.0
packaging==24.0
pandas==1.3.2
pandas-stubs==2.0.3.230814
peft==0.11.1
pillow==10.3.0
platformdirs==4.2.2
portalocker==2.8.2
promise==2.3
proto-plus==1.23.0
protobuf==3.20.3
psutil==5.9.8
py-cpuinfo==9.0.0
pyarrow==16.1.0
pyarrow-hotfix==0.6
pyasn1==0.6.0
pyasn1-modules==0.4.0
pydantic==2.7.1
pydantic-core==2.18.2
pygments==2.18.0
PyGObject==3.36.0
pynvml==11.5.0
python-apt==2.0.1+ubuntu0.20.4.1
python-dateutil==2.9.0.post0
pytz==2024.1
PyYAML==6.0.1
regex==2024.5.15
requests==2.22.0
requests-oauthlib==2.0.0
requests-unixsocket==0.2.0
rich==13.7.1
rouge-score==0.1.2
rsa==4.9
s3transfer==0.10.1
sacrebleu==2.4.2
safetensors==0.4.3
scikit-learn==1.3.2
scipy==1.10.1
sentencepiece==0.2.0
sentry-sdk==2.2.0
setproctitle==1.3.3
shtab==1.7.1
six==1.14.0
smmap==5.0.1
sympy==1.12
t5==0.7.1
tabulate==0.9.0
tensorboard==2.13.0
tensorboard-data-server==0.7.2
tensorflow==2.13.1
tensorflow-datasets==4.9.2
tensorflow-estimator==2.13.0
tensorflow-hub==0.16.1
tensorflow-io-gcs-filesystem==0.34.0
tensorflow-metadata==1.14.0
tensorflow-text==2.13.0
termcolor==2.4.0
tf-keras==2.15.1
tfds-nightly==4.9.2.dev202308090034
threadpoolctl==3.5.0
tokenizers==0.15.2
toml==0.10.2
torch==2.3.1
torchvision==0.18.1
tqdm==4.66.4
transformers==4.38.0
triton==2.3.1
trl==0.8.6
types-pytz==2024.1.0.20240417
typing-extensions==4.11.0
tyro==0.8.4
tzdata==2024.1
urllib3==2.2.1
wandb==0.17.0
werkzeug==3.0.3
wrapt==1.16.0
xxhash==3.4.1
yarl==1.9.4
zipp==3.18.2
Consider trying allenai/OLMo-7B-Instruct-hf instead. If that doesn't help, then can you please provide a minimal repro so that it's easier to figure out what went wrong?
Im looking to run an experiment on with my own alignment method and compare it against the instruct model, so I need to start with the SFT model. The script I provided above should be enough to reproduce. Thanks
In that case, allenai/OLMo-7B-SFT-hf may be what you want
Ah interesting. What's the difference?
The -hf versions work with the version of OLMo that we integrated into the transformers library. After we implemented OLMo into the transformer library, the old-style checkpoints could no longer work with AutoModelForCausalLM.
More details about the distinction between -hf and non -hf are in https://github.com/allenai/OLMo/blob/main/docs/Checkpoints.md
We missed the SFT model when we updated the HF repo docs with new instructions for transformers >=v4.40.0. I've updated that page now.
Ok thanks. I think this would be useful to others if this was mentioned on the non hf model pages. (Maybe it already is an I missed it)
Hi, thanks again for the inquiry! We’re currently working on closing out old tickets, so we’re closing this out for now, but if you require a follow-up response, please re-open and we will get back to you!