🐛 Describe the bug
Traceback (most recent call last):
File "inference.py", line 59, in
eval(args)
File "inference.py", line 23, in eval
actor.model.load_state_dict(state_dict)
File "/home/rst/ColossalAI/env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1671, in load_state_dict
raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
RuntimeError: Error(s) in loading state_dict for OPTForCausalLM:
Missing key(s) in state_dict: "model.decoder.project_out.weight", "model.decoder.project_in.weight", "model.decoder.layers.12.self_attn.k_proj.weight", "model.decoder.layers.12.self_attn.k_proj.bias", "model.decoder.layers.12.self_attn.v_proj.weight", "model.decoder.layers.12.self_attn.v_proj.bias", "model.decoder.layers.12.self_attn.q_proj.weight", "model.decoder.layers.12.self_attn.q_proj.bias", "model.decoder.layers.12.self_attn.out_proj.weight", "model.decoder.layers.12.self_attn.out_proj.bias", "model.decoder.layers.12.self_attn_layer_norm.weight", "model.decoder.layers.12.self_attn_layer_norm.bias", "model.decoder.layers.12.fc1.weight", "model.decoder.layers.12.fc1.bias", "model.decoder.layers.12.fc2.weight", "model.decoder.layers.12.fc2.bias", "model.decoder.layers.12.final_layer_norm.weight", "model.decoder.layers.12.final_layer_norm.bias", "model.decoder.layers.13.self_attn.k_proj.weight", "model.decoder.layers.13.self_attn.k_proj.bias", "model.decoder.layers.13.self_attn.v_proj.weight", "model.decoder.layers.13.self_attn.v_proj.bias", "model.decoder.layers.13.self_attn.q_proj.weight", "model.decoder.layers.13.self_attn.q_proj.bias", "model.decoder.layers.13.self_attn.out_proj.weight", "model.decoder.layers.13.self_attn.out_proj.bias", "model.decoder.layers.13.self_attn_layer_norm.weight", "model.decoder.layers.13.self_attn_layer_norm.bias", "model.decoder.layers.13.fc1.weight", "model.decoder.layers.13.fc1.bias", "model.decoder.layers.13.fc2.weight", "model.decoder.layers.13.fc2.bias", "model.decoder.layers.13.final_layer_norm.weight", "model.decoder.layers.13.final_layer_norm.bias", "model.decoder.layers.14.self_attn.k_proj.weight", "model.decoder.layers.14.self_attn.k_proj.bias", "model.decoder.layers.14.self_attn.v_proj.weight", "model.decoder.layers.14.self_attn.v_proj.bias", "model.decoder.layers.14.self_attn.q_proj.weight", "model.decoder.layers.14.self_attn.q_proj.bias", "model.decoder.layers.14.self_attn.out_proj.weight", "model.decoder.layers.14.self_attn.out_proj.bias", "model.decoder.layers.14.self_attn_layer_norm.weight", "model.decoder.layers.14.self_attn_layer_norm.bias", "model.decoder.layers.14.fc1.weight", "model.decoder.layers.14.fc1.bias", "model.decoder.layers.14.fc2.weight", "model.decoder.layers.14.fc2.bias", "model.decoder.layers.14.final_layer_norm.weight", "model.decoder.layers.14.final_layer_norm.bias", "model.decoder.layers.15.self_attn.k_proj.weight", "model.decoder.layers.15.self_attn.k_proj.bias", "model.decoder.layers.15.self_attn.v_proj.weight", "model.decoder.layers.15.self_attn.v_proj.bias", "model.decoder.layers.15.self_attn.q_proj.weight", "model.decoder.layers.15.self_attn.q_proj.bias", "model.decoder.layers.15.self_attn.out_proj.weight", "model.decoder.layers.15.self_attn.out_proj.bias", "model.decoder.layers.15.self_attn_layer_norm.weight", "model.decoder.layers.15.self_attn_layer_norm.bias", "model.decoder.layers.15.fc1.weight", "model.decoder.layers.15.fc1.bias", "model.decoder.layers.15.fc2.weight", "model.decoder.layers.15.fc2.bias", "model.decoder.layers.15.final_layer_norm.weight", "model.decoder.layers.15.final_layer_norm.bias", "model.decoder.layers.16.self_attn.k_proj.weight", "model.decoder.layers.16.self_attn.k_proj.bias", "model.decoder.layers.16.self_attn.v_proj.weight", "model.decoder.layers.16.self_attn.v_proj.bias", "model.decoder.layers.16.self_attn.q_proj.weight", "model.decoder.layers.16.self_attn.q_proj.bias", "model.decoder.layers.16.self_attn.out_proj.weight", "model.decoder.layers.16.self_attn.out_proj.bias", "model.decoder.layers.16.self_attn_layer_norm.weight", "model.decoder.layers.16.self_attn_layer_norm.bias", "model.decoder.layers.16.fc1.weight", "model.decoder.layers.16.fc1.bias", "model.decoder.layers.16.fc2.weight", "model.decoder.layers.16.fc2.bias", "model.decoder.layers.16.final_layer_norm.weight", "model.decoder.layers.16.final_layer_norm.bias", "model.decoder.layers.17.self_attn.k_proj.weight", "model.decoder.layers.17.self_attn.k_proj.bias", "model.decoder.layers.17.self_attn.v_proj.weight", "model.decoder.layers.17.self_attn.v_proj.bias", "model.decoder.layers.17.self_attn.q_proj.weight", "model.decoder.layers.17.self_attn.q_proj.bias", "model.decoder.layers.17.self_attn.out_proj.weight", "model.decoder.layers.17.self_attn.out_proj.bias", "model.decoder.layers.17.self_attn_layer_norm.weight", "model.decoder.layers.17.self_attn_layer_norm.bias", "model.decoder.layers.17.fc1.weight", "model.decoder.layers.17.fc1.bias", "model.decoder.layers.17.fc2.weight", "model.decoder.layers.17.fc2.bias", "model.decoder.layers.17.final_layer_norm.weight", "model.decoder.layers.17.final_layer_norm.bias", "model.decoder.layers.18.self_attn.k_proj.weight", "model.decoder.layers.18.self_attn.k_proj.bias", "model.decoder.layers.18.self_attn.v_proj.weight", "model.decoder.layers.18.self_attn.v_proj.bias", "model.decoder.layers.18.self_attn.q_proj.weight", "model.decoder.layers.18.self_attn.q_proj.bias", "model.decoder.layers.18.self_attn.out_proj.weight", "model.decoder.layers.18.self_attn.out_proj.bias", "model.decoder.layers.18.self_attn_layer_norm.weight", "model.decoder.layers.18.self_attn_layer_norm.bias", "model.decoder.layers.18.fc1.weight", "model.decoder.layers.18.fc1.bias", "model.decoder.layers.18.fc2.weight", "model.decoder.layers.18.fc2.bias", "model.decoder.layers.18.final_layer_norm.weight", "model.decoder.layers.18.final_layer_norm.bias", "model.decoder.layers.19.self_attn.k_proj.weight", "model.decoder.layers.19.self_attn.k_proj.bias", "model.decoder.layers.19.self_attn.v_proj.weight", "model.decoder.layers.19.self_attn.v_proj.bias", "model.decoder.layers.19.self_attn.q_proj.weight", "model.decoder.layers.19.self_attn.q_proj.bias", "model.decoder.layers.19.self_attn.out_proj.weight", "model.decoder.layers.19.self_attn.out_proj.bias", "model.decoder.layers.19.self_attn_layer_norm.weight", "model.decoder.layers.19.self_attn_layer_norm.bias", "model.decoder.layers.19.fc1.weight", "model.decoder.layers.19.fc1.bias", "model.decoder.layers.19.fc2.weight", "model.decoder.layers.19.fc2.bias", "model.decoder.layers.19.final_layer_norm.weight", "model.decoder.layers.19.final_layer_norm.bias", "model.decoder.layers.20.self_attn.k_proj.weight", "model.decoder.layers.20.self_attn.k_proj.bias", "model.decoder.layers.20.self_attn.v_proj.weight", "model.decoder.layers.20.self_attn.v_proj.bias", "model.decoder.layers.20.self_attn.q_proj.weight", "model.decoder.layers.20.self_attn.q_proj.bias", "model.decoder.layers.20.self_attn.out_proj.weight", "model.decoder.layers.20.self_attn.out_proj.bias", "model.decoder.layers.20.self_attn_layer_norm.weight", "model.decoder.layers.20.self_attn_layer_norm.bias", "model.decoder.layers.20.fc1.weight", "model.decoder.layers.20.fc1.bias", "model.decoder.layers.20.fc2.weight", "model.decoder.layers.20.fc2.bias", "model.decoder.layers.20.final_layer_norm.weight", "model.decoder.layers.20.final_layer_norm.bias", "model.decoder.layers.21.self_attn.k_proj.weight", "model.decoder.layers.21.self_attn.k_proj.bias", "model.decoder.layers.21.self_attn.v_proj.weight", "model.decoder.layers.21.self_attn.v_proj.bias", "model.decoder.layers.21.self_attn.q_proj.weight", "model.decoder.layers.21.self_attn.q_proj.bias", "model.decoder.layers.21.self_attn.out_proj.weight", "model.decoder.layers.21.self_attn.out_proj.bias", "model.decoder.layers.21.self_attn_layer_norm.weight", "model.decoder.layers.21.self_attn_layer_norm.bias", "model.decoder.layers.21.fc1.weight", "model.decoder.layers.21.fc1.bias", "model.decoder.layers.21.fc2.weight", "model.decoder.layers.21.fc2.bias", "model.decoder.layers.21.final_layer_norm.weight", "model.decoder.layers.21.final_layer_norm.bias", "model.decoder.layers.22.self_attn.k_proj.weight", "model.decoder.layers.22.self_attn.k_proj.bias", "model.decoder.layers.22.self_attn.v_proj.weight", "model.decoder.layers.22.self_attn.v_proj.bias", "model.decoder.layers.22.self_attn.q_proj.weight", "model.decoder.layers.22.self_attn.q_proj.bias", "model.decoder.layers.22.self_attn.out_proj.weight", "model.decoder.layers.22.self_attn.out_proj.bias", "model.decoder.layers.22.self_attn_layer_norm.weight", "model.decoder.layers.22.self_attn_layer_norm.bias", "model.decoder.layers.22.fc1.weight", "model.decoder.layers.22.fc1.bias", "model.decoder.layers.22.fc2.weight", "model.decoder.layers.22.fc2.bias", "model.decoder.layers.22.final_layer_norm.weight", "model.decoder.layers.22.final_layer_norm.bias", "model.decoder.layers.23.self_attn.k_proj.weight", "model.decoder.layers.23.self_attn.k_proj.bias", "model.decoder.layers.23.self_attn.v_proj.weight", "model.decoder.layers.23.self_attn.v_proj.bias", "model.decoder.layers.23.self_attn.q_proj.weight", "model.decoder.layers.23.self_attn.q_proj.bias", "model.decoder.layers.23.self_attn.out_proj.weight", "model.decoder.layers.23.self_attn.out_proj.bias", "model.decoder.layers.23.self_attn_layer_norm.weight", "model.decoder.layers.23.self_attn_layer_norm.bias", "model.decoder.layers.23.fc1.weight", "model.decoder.layers.23.fc1.bias", "model.decoder.layers.23.fc2.weight", "model.decoder.layers.23.fc2.bias", "model.decoder.layers.23.final_layer_norm.weight", "model.decoder.layers.23.final_layer_norm.bias".
Unexpected key(s) in state_dict: "model.decoder.final_layer_norm.weight", "model.decoder.final_layer_norm.bias".
Environment
No response