Titans
Titans copied to clipboard
[BUG]
🐛 Describe the bug
I wrote the following code, I think the dimentions of tensors are correct. What should I do? class LlamaMLP(nn.Module): def init( self, hidden_size: int, intermediate_size: int, hidden_act: str, ): super().init() # self.gate_proj = nn.Linear(hidden_size, intermediate_size, bias=False) self.gate_proj = col_nn.Linear(hidden_size, intermediate_size, dtype=torch.float, bias=False) # self.down_proj = nn.Linear(intermediate_size, hidden_size, bias=False) self.down_proj = col_nn.Linear(intermediate_size, hidden_size, dtype=torch.float, bias=False) # self.up_proj = nn.Linear(hidden_size, intermediate_size, bias=False) self.up_proj = col_nn.Linear(hidden_size, intermediate_size, dtype=torch.float, bias=False) self.act_fn = ACT2FN[hidden_act]
def forward(self, x):
up = self.act_fn(self.gate_proj(x)) * self.up_proj(x)
down = self.down_proj(up)
return down
the output I get is:
Traceback (most recent call last):
File "train.py", line 158, in
AssertionError: Invalid shapes in Linear1D_Row forward: input=torch.Size([1, 128, 4096]), weight=torch.Size([12288, 2048]). Expected last dim of input 2048.
layer_outputs = decoder_layer(
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/nas-alinlp/butyuhao/GLM/colossal-ai/llama/modeling_llama.py", line 348, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/nas-alinlp/butyuhao/GLM/colossal-ai/llama/modeling_llama.py", line 218, in forward
qkv = self.query_key_value(hidden_states)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/nn/layer/colossalai_layer/_utils.py", line 41, in forward
return self.module(*args)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/nn/layer/colossalai_layer/_utils.py", line 41, in forward
return self.module(*args)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/nn/layer/parallel_1d/layers.py", line 697, in forward
assert input.shape[-1] == self.weight.shape[-1],
AssertionError: Invalid shapes in Linear1D_Row forward: input=torch.Size([1, 128, 4096]), weight=torch.Size([12288, 2048]). Expected last dim of input 2048.
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 60198) of binary: /opt/conda/bin/python
Environment
TENSOR_PARALLEL_SIZE = 2 TENSOR_PARALLEL_MODE = '1d'
parallel = dict( pipeline=1, tensor=dict(mode=TENSOR_PARALLEL_MODE, size=TENSOR_PARALLEL_SIZE), )
batch_size=1