My system:
Nv A6000
Cuda 118, torch 2.2.0
encountered the following error
Traceback (most recent call last):
File "/media/yuganlab/easystore1/XueshenCode/unsloth/fintune.py", line 109, in
trainer_stats = trainer.train()
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/trl/trainer/sft_trainer.py", line 361, in train
output = super().train(*args, **kwargs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/transformers/trainer.py", line 1859, in train
return inner_training_loop(
File "", line 361, in _fast_inner_training_loop
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/transformers/trainer.py", line 3138, in training_step
loss = self.compute_loss(model, inputs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/transformers/trainer.py", line 3161, in compute_loss
outputs = model(**inputs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/accelerate/utils/operations.py", line 822, in forward
return model_forward(*args, **kwargs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/accelerate/utils/operations.py", line 810, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 16, in decorate_autocast
return func(*args, **kwargs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/unsloth/models/llama.py", line 882, in PeftModelForCausalLM_fast_forward
return self.base_model(
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 161, in forward
return self.model.forward(*args, **kwargs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/unsloth/models/llama.py", line 813, in _CausalLM_fast_forward
outputs = self.model(
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/unsloth/models/llama.py", line 650, in LlamaModel_fast_forward
hidden_states = Unsloth_Offloaded_Gradient_Checkpointer.apply(
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/torch/autograd/function.py", line 553, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/torch/cuda/amp/autocast_mode.py", line 115, in decorate_fwd
return fwd(*args, **kwargs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/unsloth/models/_utils.py", line 333, in forward
(output,) = forward_function(hidden_states, *args)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/unsloth/models/llama.py", line 432, in LlamaDecoderLayer_fast_forward
hidden_states = fast_rms_layernorm(self.input_layernorm, hidden_states)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/unsloth/kernels/rms_layernorm.py", line 190, in fast_rms_layernorm
out = Fast_RMS_Layernorm.apply(X, W, eps, gemma)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/torch/autograd/function.py", line 553, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/unsloth/kernels/rms_layernorm.py", line 144, in forward
fx[(n_rows,)](
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/triton/runtime/jit.py", line 167, in
return lambda *args, **kwargs: self.run(grid=grid, warmup=False, *args, **kwargs)
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/triton/runtime/jit.py", line 425, in run
kernel.run(grid_0, grid_1, grid_2, kernel.num_warps, kernel.num_ctas, # number of warps/ctas per instance
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/triton/compiler/compiler.py", line 255, in getattribute
self._init_handles()
File "/home/yuganlab/anaconda3/envs/unsloth/lib/python3.10/site-packages/triton/compiler/compiler.py", line 250, in _init_handles
self.module, self.function, self.n_regs, self.n_spills = driver.utils.load_binary(
RuntimeError: Triton Error [CUDA]: device kernel image is invalid
@danielhanchen downgrading to 2.1.0 does not fix this issue in new installs of llamafac, it then returns to the error:
File "<string>", line 65, in _rms_layernorm_forward
ValueError: Pointer argument (at 2) cannot be accessed from Triton (cpu tensor?)
edit:
in case anyone else finds this too, https://github.com/unslothai/unsloth/pull/415 is related, just apply the patch on latest torch
edit2:
sadly still reverts back to the above issue once you triton==2.1.0
edit3:
seems to only affect multi-gpu
is there a solution for this? I'm still having this issue even if I downgraded triton==2.1.0.