trafficstars
Did anyone face this issue?
warnings.warn(
Traceback (most recent call last):
File "test_train.small.gemma.infini.py", line 150, in
trainer.train()
File "/transformers/src/transformers/trainer.py", line 1885, in train
return inner_training_loop(
File "/transformers/src/transformers/trainer.py", line 2216, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/transformers/src/transformers/trainer.py", line 3238, in training_step
loss = self.compute_loss(model, inputs)
File "/transformers/src/transformers/trainer.py", line 3264, in compute_loss
outputs = model(**inputs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/accelerate/utils/operations.py", line 822, in forward
return model_forward(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/accelerate/utils/operations.py", line 810, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/usr/local/lib/python3.8/dist-packages/torch/amp/autocast_mode.py", line 16, in decorate_autocast
return func(*args, **kwargs)
File "/jupyter_workspace/Raghav/InfiniTransformer/infini_gemma/modeling_infini_gemma.py", line 1613, in forward
outputs = self.model(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/jupyter_workspace/Raghav/InfiniTransformer/infini_gemma/modeling_infini_gemma.py", line 1371, in forward
layer_outputs = self._gradient_checkpointing_func(
File "/usr/local/lib/python3.8/dist-packages/torch/_compile.py", line 24, in inner
return torch._dynamo.disable(fn, recursive)(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/eval_frame.py", line 451, in _fn
return fn(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/external_utils.py", line 36, in inner
return fn(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/utils/checkpoint.py", line 487, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
File "/usr/local/lib/python3.8/dist-packages/torch/autograd/function.py", line 598, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/usr/local/lib/python3.8/dist-packages/torch/utils/checkpoint.py", line 262, in forward
outputs = run_function(*args)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/jupyter_workspace/Raghav/InfiniTransformer/infini_gemma/modeling_infini_gemma.py", line 1056, in forward
_attended = self.self_attn(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/jupyter_workspace/Raghav/InfiniTransformer/infini_gemma/modeling_infini_gemma.py", line 868, in forward
query_states, key_states = apply_rotary_pos_emb(
File "/jupyter_workspace/Raghav/InfiniTransformer/infini_gemma/modeling_infini_gemma.py", line 272, in apply_rotary_pos_emb
q_embed = (q * cos) + (rotate_half(q) * sin)
RuntimeError: The size of tensor a (65536) must match the size of tensor b (8192) at non-singleton dimension 2
Hey @Beomi ,any clarity on this?it would be deeply appreciated.
I currently ran the custom (Type I method) training script to pre-train on dataset but I also wanted to do finetuning using HF framework but getting this error
嘿,对此有任何澄清吗?将不胜感激。我目前运行自定义(I 型方法)训练脚本以在数据集上进行预训练,但我也想使用 HF 框架进行微调,但出现此错误
+1