CUDA_VISIBLE_DEVICES=0,1,2,3
swift sft
--neftune_noise_alpha "5"
--model_id_or_path "AI-ModelScope/llava-v1.6-mistral-7b"
--template_type "llava-mistral-instruct"
--custom_train_dataset_path xxx.json
--custom_val_dataset_path xxx.json
--dataset_test_ratio "0.2"
--save_steps "50"
--lora_target_modules q_proj k_proj v_proj
--batch_size "32"
--learning_rate "1e-4"
--num_train_epochs "15"
--gradient_accumulation_steps "16"
--eval_batch_size "32"
--use_flash_attn "True"
--add_output_dir_suffix False
--output_dir finetune_output
--logging_dir finetune_output
--max_length -1
--train_dataset_sample -1 \
Traceback (most recent call last):
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/swift/cli/sft.py", line 5, in
sft_main()
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/swift/utils/run_utils.py", line 31, in x_main
result = llm_x(args, **kwargs)
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/swift/llm/sft.py", line 261, in llm_sft
trainer.train(training_args.resume_from_checkpoint)
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/swift/trainers/trainers.py", line 54, in train
res = super().train(*args, **kwargs)
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/transformers/trainer.py", line 1780, in train
return inner_training_loop(
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/transformers/trainer.py", line 2118, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/transformers/trainer.py", line 3036, in training_step
loss = self.compute_loss(model, inputs)
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/swift/trainers/trainers.py", line 220, in compute_loss
outputs = model(**inputs)
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/accelerate/utils/operations.py", line 825, in forward
return model_forward(*args, **kwargs)
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/accelerate/utils/operations.py", line 813, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 16, in decorate_autocast
return func(*args, **kwargs)
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/peft/peft_model.py", line 1129, in forward
return self.base_model(
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 161, in forward
return self.model.forward(*args, **kwargs)
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
File "/home/jianc/.cache/modelscope/hub/_github/LLaVA.git/llava/model/language_model/llava_mistral.py", line 81, in forward
) = self.prepare_inputs_labels_for_multimodal(
File "/home/jianc/.cache/modelscope/hub/_github/LLaVA.git/llava/model/llava_arch.py", line 251, in prepare_inputs_labels_for_multimodal
cur_input_embeds = self.get_model().embed_tokens(torch.cat(cur_input_ids_noim))
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1574, in _call_impl
hook_result = hook(self, args, result)
File "/home/jianc/miniconda3/envs/benchmark-llm/lib/python3.10/site-packages/transformers/trainer_utils.py", line 128, in neftune_post_forward_hook
dims = torch.tensor(output.size(1) * output.size(2))
IndexError: Dimension out of range (expected to be in range of [-2, 1], but got 2)
I have set max_length to -1
transformers的neftune_noise_alpha不支持多模态,使用--neftune_backend swift
The transformers' neftune_noise_alpha does not support multimodal, use --neftune_backend swift.
transformers的neftune_noise_alpha不支持多模态,使用--neftune_backend swift
The transformers' neftune_noise_alpha does not support multimodal, use --neftune_backend swift.
it worked after I deleted the neftune_noise_alpha in .sh