ms-swift
ms-swift copied to clipboard
微调llama2-7b报错
请帮忙看一下,非常感谢! 脚本:
PYTHONPATH=../../.. \
CUDA_VISIBLE_DEVICES=1,2,3,4 \
torchrun \
--nproc_per_node=4 \
--master_port 29500 \
llm_sft.py \
--model_revision master \
--tuner_backend swift \
--template_type llama \
--dtype fp16 \
--output_dir output \
--ddp_backend nccl \
--custom_train_dataset_path /home/zwj/data/LongAlpaca-12k/LongAlpaca-12k-new.json \
--train_dataset_sample -1 \
--num_train_epochs 1 \
--max_length 16384 \
--lora_rank 8 \
--lora_alpha 32 \
--lora_dropout_p 0.05 \
--lora_target_modules AUTO \
--gradient_checkpointing false \
--batch_size 1 \
--weight_decay 0. \
--learning_rate 1e-4 \
--gradient_accumulation_steps $(expr 16 / 4) \
--max_grad_norm 0.5 \
--warmup_ratio 0.03 \
--eval_steps 100 \
--save_steps 100 \
--save_total_limit 2 \
--logging_steps 10 \
--push_to_hub false \
--deepspeed_config_path '/home/zwj/GitHub/swift-main/swift/llm/ds_config/zero2.json' \
--only_save_model true \
--model_cache_dir /home/zwj/models/Llama-2-7b-chat-hf \
--sft_type longlora \
--model_type llama2-7b-chat
报错信息:
Traceback (most recent call last):
File "/home/zwj/GitHub/swift-main/examples/pytorch/llm/llm_sft.py", line 7, in <module>
output = sft_main()
^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/utils/run_utils.py", line 31, in x_main
result = llm_x(args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/llm/sft.py", line 297, in llm_sft
trainer.train(training_args.resume_from_checkpoint)
File "/home/zwj/GitHub/swift-main/swift/trainers/trainers.py", line 50, in train
super().train(*args, **kwargs)
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/trainer.py", line 1539, in train
return inner_training_loop(
^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/trainer.py", line 1869, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/trainer.py", line 2772, in training_step
loss = self.compute_loss(model, inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/trainers/trainers.py", line 215, in compute_loss
outputs = model(**inputs)
^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/deepspeed/utils/nvtx.py", line 15, in wrapped_fn
ret_val = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/deepspeed/runtime/engine.py", line 1852, in forward
loss = self.module(*inputs, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/tuners/base.py", line 77, in forward
return self.base_model(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py", line 1183, in forward
outputs = self.model(
^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py", line 1070, in forward
layer_outputs = decoder_layer(
^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py", line 798, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/tuners/longlora/llama.py", line 204, in forward_noflashattn
kv_seq_len += past_key_value[0].shape[-2]
~~~~~~~~~~~~~~^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/cache_utils.py", line 78, in __getitem__
raise KeyError(f"Cache only has {len(self)} layers, attempted to access layer with index {layer_idx}")
KeyError: 'Cache only has 0 layers, attempted to access layer with index 0'
Traceback (most recent call last):
File "/home/zwj/GitHub/swift-main/examples/pytorch/llm/llm_sft.py", line 7, in <module>
output = sft_main()
^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/utils/run_utils.py", line 31, in x_main
result = llm_x(args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/llm/sft.py", line 297, in llm_sft
trainer.train(training_args.resume_from_checkpoint)
File "/home/zwj/GitHub/swift-main/swift/trainers/trainers.py", line 50, in train
super().train(*args, **kwargs)
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/trainer.py", line 1539, in train
return inner_training_loop(
^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/trainer.py", line 1869, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/trainer.py", line 2772, in training_step
loss = self.compute_loss(model, inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/trainers/trainers.py", line 215, in compute_loss
outputs = model(**inputs)
^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/deepspeed/utils/nvtx.py", line 15, in wrapped_fn
ret_val = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/deepspeed/runtime/engine.py", line 1852, in forward
loss = self.module(*inputs, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/tuners/base.py", line 77, in forward
return self.base_model(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py", line 1183, in forward
outputs = self.model(
^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py", line 1070, in forward
layer_outputs = decoder_layer(
^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py", line 798, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/tuners/longlora/llama.py", line 204, in forward_noflashattn
kv_seq_len += past_key_value[0].shape[-2]
~~~~~~~~~~~~~~^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/cache_utils.py", line 78, in __getitem__
raise KeyError(f"Cache only has {len(self)} layers, attempted to access layer with index {layer_idx}")
KeyError: 'Cache only has 0 layers, attempted to access layer with index 0'
Traceback (most recent call last):
File "/home/zwj/GitHub/swift-main/examples/pytorch/llm/llm_sft.py", line 7, in <module>
output = sft_main()
^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/utils/run_utils.py", line 31, in x_main
result = llm_x(args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/llm/sft.py", line 297, in llm_sft
trainer.train(training_args.resume_from_checkpoint)
File "/home/zwj/GitHub/swift-main/swift/trainers/trainers.py", line 50, in train
super().train(*args, **kwargs)
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/trainer.py", line 1539, in train
return inner_training_loop(
^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/trainer.py", line 1869, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/trainer.py", line 2772, in training_step
loss = self.compute_loss(model, inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/trainers/trainers.py", line 215, in compute_loss
outputs = model(**inputs)
^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/deepspeed/utils/nvtx.py", line 15, in wrapped_fn
ret_val = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/deepspeed/runtime/engine.py", line 1852, in forward
loss = self.module(*inputs, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/tuners/base.py", line 77, in forward
return self.base_model(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py", line 1183, in forward
outputs = self.model(
^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py", line 1070, in forward
layer_outputs = decoder_layer(
^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py", line 798, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/tuners/longlora/llama.py", line 204, in forward_noflashattn
kv_seq_len += past_key_value[0].shape[-2]
~~~~~~~~~~~~~~^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/cache_utils.py", line 78, in __getitem__
raise KeyError(f"Cache only has {len(self)} layers, attempted to access layer with index {layer_idx}")
KeyError: 'Cache only has 0 layers, attempted to access layer with index 0'
Train: 0%| | 0/561 [00:00<?, ?it/s]Traceback (most recent call last):
File "/home/zwj/GitHub/swift-main/examples/pytorch/llm/llm_sft.py", line 7, in <module>
output = sft_main()
^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/utils/run_utils.py", line 31, in x_main
result = llm_x(args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/llm/sft.py", line 297, in llm_sft
trainer.train(training_args.resume_from_checkpoint)
File "/home/zwj/GitHub/swift-main/swift/trainers/trainers.py", line 50, in train
super().train(*args, **kwargs)
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/trainer.py", line 1539, in train
return inner_training_loop(
^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/trainer.py", line 1869, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/trainer.py", line 2772, in training_step
loss = self.compute_loss(model, inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/trainers/trainers.py", line 215, in compute_loss
outputs = model(**inputs)
^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/deepspeed/utils/nvtx.py", line 15, in wrapped_fn
ret_val = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/deepspeed/runtime/engine.py", line 1852, in forward
loss = self.module(*inputs, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/tuners/base.py", line 77, in forward
return self.base_model(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py", line 1183, in forward
outputs = self.model(
^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py", line 1070, in forward
layer_outputs = decoder_layer(
^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py", line 798, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/zwj/GitHub/swift-main/swift/tuners/longlora/llama.py", line 204, in forward_noflashattn
kv_seq_len += past_key_value[0].shape[-2]
~~~~~~~~~~~~~~^^^
File "/home/zwj/miniconda3/envs/swift/lib/python3.11/site-packages/transformers/cache_utils.py", line 78, in __getitem__
raise KeyError(f"Cache only has {len(self)} layers, attempted to access layer with index {layer_idx}")
KeyError: 'Cache only has 0 layers, attempted to access layer with index 0'
显卡:V100 * 4 操作系统:Ubuntu 18.04 驱动版本:Driver Version: 515.65.01 CUDA Version: 11.7 PyTorch版本:2.2.0+cu118 微调模型:meta-llama/Llama-2-7b-hf 微调数据集:Yukang/LongAlpaca-12k 微调方法:LongLoRA