LLaMA-Factory
LLaMA-Factory copied to clipboard
AttributeError: 'Qwen2Attention' object has no attribute 'max_position_embeddings'
Reminder
- [X] I have read the README and searched the existing issues.
System Info
python3.10
-e git+https://github.com/hiyouga/LLaMA-Factory.git@bdde35fd2e4a919c1d63ebfc9a0ea8ba0c97e14c#egg=llamafactory
==((====))== Unsloth 2024.8: Fast Qwen2 patching. Transformers = 4.45.0.dev0.
\\ /| GPU: NVIDIA L4. Max memory: 22.161 GB. Platform = Linux.
O^O/ \_/ \ Pytorch: 2.3.0+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.
\ / Bfloat16 = TRUE. FA [Xformers = 0.0.26.post1. FA2 = True]
"-____-" Free Apache license: http://github.com/unslothai/unsloth
Reproduction
I used the following config
{
"model_name_or_path": "Qwen/Qwen2-1.5B-Instruct",
"use_unsloth": True,
"stage": "sft",
"do_train": True,
"finetuning_type": "full",
"lora_target": "all",
"use_galore": True,
# "enable_liger_kernel": True,
"galore_layerwise": True,
"galore_target": "mlp,self_attn",
"galore_rank": 128,
"galore_scale": 2.0,
"dataset": "identity,alpaca_en_demo",
"template": "llama3",
"cutoff_len": 1024,
"max_samples": 1000,
"overwrite_cache": True,
"preprocessing_num_workers": 4,
"output_dir": "saves/llama3-8b/lora/sft",
"logging_steps": 10,
"save_steps": 500,
"plot_loss": True,
"overwrite_output_dir": True,
"per_device_train_batch_size": 1,
# "gradient_accumulation_steps": 2,
"learning_rate": 0.0001,
"num_train_epochs": 3,
"lr_scheduler_type": "cosine",
"warmup_ratio": 0.1,
"bf16": True,
"ddp_timeout": 180000000,
"val_size": 0.1,
"per_device_eval_batch_size": 1,
"eval_strategy": "steps",
"eval_steps": 500
}
but i got errors:
Traceback (most recent call last):
File "/usr/local/bin/llamafactory-cli", line 8, in <module>
sys.exit(main())
File "/workspace/LLaMA-Factory/src/llamafactory/cli.py", line 111, in main
run_exp()
File "/workspace/LLaMA-Factory/src/llamafactory/train/tuner.py", line 50, in run_exp
run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks)
File "/workspace/LLaMA-Factory/src/llamafactory/train/sft/workflow.py", line 48, in run_sft
model = load_model(tokenizer, model_args, finetuning_args, training_args.do_train)
File "/workspace/LLaMA-Factory/src/llamafactory/model/loader.py", line 145, in load_model
model = load_unsloth_pretrained_model(config, model_args)
File "/workspace/LLaMA-Factory/src/llamafactory/model/model_utils/unsloth.py", line 57, in load_unsloth_pretrained_model
model, _ = FastLanguageModel.from_pretrained(**unsloth_kwargs)
File "/usr/local/lib/python3.10/dist-packages/unsloth/models/loader.py", line 301, in from_pretrained
model, tokenizer = dispatch_model.from_pretrained(
File "/usr/local/lib/python3.10/dist-packages/unsloth/models/qwen2.py", line 87, in from_pretrained
return FastLlamaModel.from_pretrained(
File "/usr/local/lib/python3.10/dist-packages/unsloth/models/llama.py", line 1580, in from_pretrained
model = AutoModelForCausalLM.from_pretrained(
File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 557, in from_pretrained
return model_class.from_pretrained(
File "/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py", line 3848, in from_pretrained
model = cls(config, *model_args, **model_kwargs)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/modeling_qwen2.py", line 1080, in __init__
self.model = Qwen2Model(config)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/modeling_qwen2.py", line 868, in __init__
[Qwen2DecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
File "/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/modeling_qwen2.py", line 868, in <listcomp>
[Qwen2DecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
File "/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/modeling_qwen2.py", line 655, in __init__
self.self_attn = QWEN2_ATTENTION_CLASSES[config._attn_implementation](config, layer_idx)
File "<string>", line 39, in Qwen2Attention__init__
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1709, in __getattr__
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
AttributeError: 'Qwen2Attention' object has no attribute 'max_position_embeddings'
Expected behavior
No response
Others
No response
https://github.com/huggingface/transformers/pull/33550