I followed the instructions to fine-tune llama2-7b by typing the command python qlora.py --model_name_or_path ~/models/llama-2-7b/. But I got the following error:
Traceback (most recent call last):
File "qlora.py", line 841, in
train()
File "qlora.py", line 803, in train
train_result = trainer.train()
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/transformers/trainer.py", line 1539, in train
return inner_training_loop(
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/transformers/trainer.py", line 1809, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/transformers/trainer.py", line 2654, in training_step
loss = self.compute_loss(model, inputs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/transformers/trainer.py", line 2679, in compute_loss
outputs = model(**inputs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/peft/peft_model.py", line 922, in forward
return self.base_model(
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 806, in forward
outputs = self.model(
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 685, in forward
layer_outputs = torch.utils.checkpoint.checkpoint(
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/_compile.py", line 24, in inner
return torch._dynamo.disable(fn, recursive)(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/_dynamo/eval_frame.py", line 328, in _fn
return fn(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/_dynamo/external_utils.py", line 17, in inner
return fn(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/utils/checkpoint.py", line 451, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/autograd/function.py", line 539, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/utils/checkpoint.py", line 230, in forward
outputs = run_function(*args)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 681, in custom_forward
return module(*inputs, output_attentions, None)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 408, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 305, in forward
query_states = self.q_proj(hidden_states)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/peft/tuners/lora.py", line 1123, in forward
result = super().forward(x)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/bitsandbytes/nn/modules.py", line 221, in forward
out = bnb.matmul_4bit(x, self.weight.t(), bias=bias, quant_state=self.weight.quant_state)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/bitsandbytes/autograd/_functions.py", line 570, in matmul_4bit
return MatMul4Bit.apply(A, B, out, bias, quant_state)
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/torch/autograd/function.py", line 539, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/home/lchen/miniconda3/envs/qlora/lib/python3.8/site-packages/bitsandbytes/autograd/_functions.py", line 515, in forward
output = torch.nn.functional.linear(A, F.dequantize_4bit(B, state).to(A.dtype).t(), bias)
RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)