alpaca-lora
alpaca-lora copied to clipboard
lora for text classification
I am trying to perform sequence classification for text using LLAMA 7B model leveraging LORA training. I have 2 classes. The finetune.py based on PeftModelForSequenceClassification rather than causalLM is running very well! But in test(generate).py, I cannot get the embedding of tokenized inputs.
The bug is: Exception: Please convert all Tensors to FakeTensors first or instantiate FakeTensorMode with 'allow_non_fake_inputs'. Found in aten.embedding.default(*(Parameter containing: tensor([[ 9.8884e-05, -2.3329e-04, 5.8460e-04, ..., -3.4237e-04, 5.9724e-05, -1.1957e-04], [ 1.5289e-02, -1.2154e-02, 1.2512e-02, ..., 1.3092e-02, 7.2174e-03, -6.8045e-04], [ 1.7433e-03, 1.7633e-03, -1.4465e-02, ..., -1.1444e-02, -1.2665e-02, 3.7289e-04], ..., [-9.0179e-03, 3.0807e-02, -1.6708e-02, ..., -1.2680e-02, 1.0437e-02, 4.2343e-03], [-1.1368e-02, -1.4801e-02, -3.5667e-03, ..., 6.5308e-03, -2.2263e-02, -6.1455e-03], [-1.3992e-02, 1.6985e-03, -2.1469e-02, ..., 1.3527e-02, 2.8290e-02, -8.9111e-03]], device='cuda:0', dtype=torch.float16), FakeTensor(FakeTensor(..., device='meta', size=(1, 111), dtype=torch.int64), cuda:0), 31999), **{})
Any help would be greatly appreciated. Thanks!!!
My code is as follow:
def main(
load_8bit: bool = True,
base_model: str = "/home/fyli/pretrain/llama-7b-hf",
lora_weights: str = "/home/fyli/alpaca-lora/trained_weight",
prompt_template: str = "", # The prompt template to use, will default to alpaca.
data_path: str = "/home/fyli/datasets/machamp/rel-heter/test.json",
save_file : str = "./test_results/rel-heter.csv",
output_info: bool = False,
):
base_model = base_model or os.environ.get("BASE_MODEL", "")
assert (
base_model
), "Please specify a --base_model, e.g. --base_model='huggyllama/llama-7b'"
tokenizer = LlamaTokenizer.from_pretrained(base_model)
if device == "cuda":
model = LlamaForSequenceClassification.from_pretrained(
base_model,
load_in_8bit=load_8bit,
torch_dtype=torch.float16,
device_map="auto",
)
model = PeftModel.from_pretrained(
model,
lora_weights,
torch_dtype=torch.float16,
)
else:
assert False, "CUDA is not available."
print('[INFO]model: '+str(model)+"[INFO]\n")
# unwind broken decapoda-research config
model.config.pad_token_id = tokenizer.pad_token_id = 0 # unk
model.config.bos_token_id = 1
model.config.eos_token_id = 2
if not load_8bit:
model.half() # seems to fix bugs for some users.
model.eval()
if torch.__version__ >= "2" and sys.platform != "win32":
model = torch.compile(model)
def evaluate(
instruction,
input=None,
**kwargs,
):
full_prompt = input+instruction
# inputs = tokenizer(prompt, return_tensors="pt")
tokenizer = LlamaTokenizer.from_pretrained(base_model)
tokenizer_full_prompt = tokenizer(
full_prompt,
truncation=True,
max_length=256,
padding=False,
return_tensors="pt",)
input_ids = tokenizer_full_prompt["input_ids"]
print("\n[INFO]: "+str(input_ids)+str(input_ids.dtype))
with torch.no_grad():
output = model(**tokenizer_full_prompt.to(device))
# output = model.forward(
# # input_ids = input_ids,
# inputs_embeds = inputs_embeds,
# attention_mask = tokenizer_full_prompt["attention_mask"],
# return_dict = True,
# )
print("\n[INFO]output: "+str(output))
Test = load_dataset('json',data_files=data_path)
y_pred = []
y_true = []
for test in Test["train"]:
gen = evaluate(test["instruction"], test["input"])
print("\n[INFO] gen: "+str(gen))
if __name__ == "__main__":
fire.Fire(main)
The bug information is: [INFO]Embedding: 32000 4096 -1
[INFO] inited_weight: Parameter containing: tensor(..., device='meta', size=(32000, 4096), dtype=torch.float32, requires_grad=True)torch.float32
[INFO] embedding.dtype: torch.float32 [INFO] Llama.init.weight torch.float32 [INFO] SC.init.weight torch.float32 Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33/33 [00:08<00:00, 3.88it/s] Some weights of the model checkpoint at /home/fyli/pretrain/llama-7b-hf were not used when initializing LlamaForSequenceClassification: ['lm_head.weight']
- This IS expected if you are initializing LlamaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LlamaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at /home/fyli/pretrain/llama-7b-hf and are newly initialized: ['score.weight'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. [INFO]model: PeftModelForSequenceClassification( (base_model): LoraModel( (model): LlamaForSequenceClassification( (model): LlamaModel( (embed_tokens): Embedding(32000, 4096, padding_idx=31999) (layers): ModuleList( (0-31): 32 x LlamaDecoderLayer( (self_attn): LlamaAttention( (q_proj): Linear8bitLt( in_features=4096, out_features=4096, bias=False (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=8, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=8, out_features=4096, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (k_proj): Linear8bitLt(in_features=4096, out_features=4096, bias=False) (v_proj): Linear8bitLt( in_features=4096, out_features=4096, bias=False (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=8, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=8, out_features=4096, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (o_proj): Linear8bitLt(in_features=4096, out_features=4096, bias=False) (rotary_emb): LlamaRotaryEmbedding() ) (mlp): LlamaMLP( (gate_proj): Linear8bitLt(in_features=4096, out_features=11008, bias=False) (down_proj): Linear8bitLt(in_features=11008, out_features=4096, bias=False) (up_proj): Linear8bitLt(in_features=4096, out_features=11008, bias=False) (act_fn): SiLUActivation() ) (input_layernorm): LlamaRMSNorm() (post_attention_layernorm): LlamaRMSNorm() ) ) (norm): LlamaRMSNorm() ) (score): ModulesToSaveWrapper( (original_module): Linear(in_features=4096, out_features=2, bias=False) (modules_to_save): ModuleDict( (default): Linear(in_features=4096, out_features=2, bias=False) ) ) ) ) )[INFO]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. The class this function is called from is 'LlamaTokenizer'.
[INFO]: tensor([[ 0, 10969, 319, 338, 3611, 29901, 260, 5059, 1709, 29889, 3211, 29901, 289, 1236, 5860, 929, 364, 29881, 29889, 472, 29880, 6949, 29889, 9008, 29901, 29871, 29946, 29900, 29946, 29914, 29941, 29945, 29896, 448, 29929, 29945, 29941, 29941, 29889, 7663, 29901, 285, 4615, 29871, 29929, 29906, 29889, 869, 10969, 350, 338, 28915, 29901, 29871, 29941, 29896, 29955, 29906, 1236, 5860, 929, 364, 29881, 29889, 452, 29889, 4272, 29901, 472, 29880, 6949, 29889, 9008, 29901, 29871, 29946, 29900, 29946, 29899, 29906, 29941, 29955, 29899, 29955, 29953, 29900, 29896, 29889, 1134, 29901, 4538, 414, 29889, 770, 29901, 29871, 29955, 29896, 29929, 29889, 869, 13, 17506, 10969, 319, 322, 10969, 350, 278, 1021, 29973]])torch.int64
[INFO]PeftSC.forward() is called.
[INFO]PeftSC return base_model.
[INFO]SC.forward() is called. [INFO] weight torch.float16
[INFO]Llama.forward() is called. [INFO] weight torch.float16 [INFO] weight torch.float16 [INFO]batch_size is set (1, 111), torch.Size([1, 111])
[INFO] embedding is called.Parameter containing:
tensor([[ 9.8884e-05, -2.3329e-04, 5.8460e-04, ..., -3.4237e-04,
5.9724e-05, -1.1957e-04],
[ 1.5289e-02, -1.2154e-02, 1.2512e-02, ..., 1.3092e-02,
7.2174e-03, -6.8045e-04],
[ 1.7433e-03, 1.7633e-03, -1.4465e-02, ..., -1.1444e-02,
-1.2665e-02, 3.7289e-04],
...,
[-9.0179e-03, 3.0807e-02, -1.6708e-02, ..., -1.2680e-02,
1.0437e-02, 4.2343e-03],
[-1.1368e-02, -1.4801e-02, -3.5667e-03, ..., 6.5308e-03,
-2.2263e-02, -6.1455e-03],
[-1.3992e-02, 1.6985e-03, -2.1469e-02, ..., 1.3527e-02,
2.8290e-02, -8.9111e-03]], device='cuda:0', dtype=torch.float16)torch.float16, FakeTensor(FakeTensor(..., device='meta', size=(1, 111), dtype=torch.int64), cuda:0)torch.int64, 31999, False, False
Traceback (most recent call last):
File "/home/fyli/anaconda3/envs/lora3/lib/python3.9/site-packages/torch/_dynamo/utils.py", line 1199, in run_node
return nnmodule(*args, **kwargs)
File "/home/fyli/anaconda3/envs/lora3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/fyli/anaconda3/envs/lora3/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/fyli/anaconda3/envs/lora3/lib/python3.9/site-packages/torch/nn/modules/sparse.py", line 168, in forward
return F.embedding(
File "/home/fyli/anaconda3/envs/lora3/lib/python3.9/site-packages/torch/nn/functional.py", line 2211, in embedding
return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
File "/home/fyli/anaconda3/envs/lora3/lib/python3.9/site-packages/torch/utils/_stats.py", line 20, in wrapper
return fn(args, **kwargs)
File "/home/fyli/anaconda3/envs/lora3/lib/python3.9/site-packages/torch/_subclasses/fake_tensor.py", line 987, in torch_dispatch
return self.dispatch(func, types, args, kwargs)
File "/home/fyli/anaconda3/envs/lora3/lib/python3.9/site-packages/torch/_subclasses/fake_tensor.py", line 1066, in dispatch
args, kwargs = self.validate_and_convert_non_fake_tensors(
File "/home/fyli/anaconda3/envs/lora3/lib/python3.9/site-packages/torch/_subclasses/fake_tensor.py", line 1220, in validate_and_convert_non_fake_tensors
return tree_map_only(
File "/home/fyli/anaconda3/envs/lora3/lib/python3.9/site-packages/torch/utils/_pytree.py", line 266, in tree_map_only
return tree_map(map_only(ty)(fn), pytree)
File "/home/fyli/anaconda3/envs/lora3/lib/python3.9/site-packages/torch/utils/_pytree.py", line 196, in tree_map
return tree_unflatten([fn(i) for i in flat_args], spec)
File "/home/fyli/anaconda3/envs/lora3/lib/python3.9/site-packages/torch/utils/_pytree.py", line 196, in
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/fyli/anaconda3/envs/lora3/lib/python3.9/site-packages/torch/_dynamo/utils.py", line 1152, in get_fake_value
return wrap_fake_exception(
File "/home/fyli/anaconda3/envs/lora3/lib/python3.9/site-packages/torch/_dynamo/utils.py", line 808, in wrap_fake_exception
return fn()
File "/home/fyli/anaconda3/envs/lora3/lib/python3.9/site-packages/torch/_dynamo/utils.py", line 1153, in
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/fyli/alpaca-lora/test2.py", line 202, in
from user code:
File "/home/fyli/anaconda3/envs/lora3/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 541, in
Set torch._dynamo.config.verbose=True for more information
You can suppress this exception and fall back to eager by setting: torch._dynamo.config.suppress_errors = True