python3.7 ./src/train.py -task abs -mode test_text -model_path ./models/bertext_cnndm_transformer.pt -text_src ./data/src -text_tgt ./data
/tgt -test_from ./models/bertext_cnndm_transformer.pt
[2020-04-12 10:08:56,525 INFO] Loading checkpoint from ./models/bertext_cnndm_transformer.pt
Namespace(accum_count=1, alpha=0.6, batch_size=140, beam_size=5, bert_data_path='../bert_data_new/cnndm', beta1=0.9, beta2=0.999, block_trigram=True, dec_dropout=0.2, dec_ff_s
ize=2048, dec_heads=8, dec_hidden_size=768, dec_layers=6, enc_dropout=0.2, enc_ff_size=512, enc_hidden_size=512, enc_layers=6, encoder='bert', ext_dropout=0.2, ext_ff_size=204
8, ext_heads=8, ext_hidden_size=768, ext_layers=2, finetune_bert=True, generator_shard_size=32, gpu_ranks=[0], label_smoothing=0.1, large=False, load_from_extractive='', log_f
ile='../logs/cnndm.log', lr=1, lr_bert=0.002, lr_dec=0.002, max_grad_norm=0, max_length=150, max_ndocs_in_batch=6, max_pos=512, max_tgt_len=140, min_length=15, mode='test_text
', model_path='./models/bertext_cnndm_transformer.pt', optim='adam', param_init=0, param_init_glorot=True, recall_eval=False, report_every=1, report_rouge=True, result_path='.
./results/cnndm', save_checkpoint_steps=5, seed=666, sep_optim=False, share_emb=False, task='abs', temp_dir='../temp', test_all=False, test_batch_size=200, test_from='./models
/bertext_cnndm_transformer.pt', test_start_from=-1, text_src='./data/src', text_tgt='./data/tgt', train_from='', train_steps=1000, use_bert_emb=False, use_interval=True, visib
le_gpus='-1', warmup_steps=8000, warmup_steps_bert=8000, warmup_steps_dec=8000, world_size=1)
[2020-04-12 10:08:57,546 INFO] loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at ../temp/4dad025149294
6e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.8f56353af4a709bf5ff0fbc915d8f5b42bfff892cbb6ac98c3c45f481a03c685
[2020-04-12 10:08:57,546 INFO] Model config {
"architectures": [
"BertForMaskedLM"
],
"attention_probs_dropout_prob": 0.1,
"finetuning_task": null,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"num_attention_heads": 12,
"num_hidden_layers": 12,
"num_labels": 2,
"output_attentions": false,
"output_hidden_states": false,
"pruned_heads": {},
"torchscript": false,
"type_vocab_size": 2,
"vocab_size": 30522
}
[2020-04-12 10:08:57,694 INFO] loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin from cache at ../temp/aa1ef1aede448
2d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157
Traceback (most recent call last):
File "./src/train.py", line 140, in
test_text_abs(args)
File "/home/yash_watwani/PreSumm-dev/src/train_abstractive.py", line 324, in test_text_abs
model = AbsSummarizer(args, device, checkpoint)
File "/home/yash_watwani/PreSumm-dev/src/models/model_builder.py", line 217, in init
self.load_state_dict(checkpoint['model'], strict=True)
File "/home/yash_watwani/.local/lib/python3.7/site-packages/torch/nn/modules/module.py", line 777, in load_state_dict
self.class.name, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for AbsSummarizer:
Missing key(s) in state_dict: "decoder.embeddings.weight", "decoder.pos_emb.pe", "decoder.transformer_layers.0.mask", "decoder.transformer_layers.0.self_attn.linear_ke
ys.weight", "decoder.transformer_layers.0.self_attn.linear_keys.bias", "decoder.transformer_layers.0.self_attn.linear_values.weight", "decoder.transformer_layers.0.self_attn.l
inear_values.bias", "decoder.transformer_layers.0.self_attn.linear_query.weight", "decoder.transformer_layers.0.self_attn.linear_query.bias", "decoder.transformer_layers.0.sel
f_attn.final_linear.weight", "decoder.transformer_layers.0.self_attn.final_linear.bias", "decoder.transformer_layers.0.context_attn.linear_keys.weight", "decoder.transformer_l
ayers.0.context_attn.linear_keys.bias", "decoder.transformer_layers.0.context_attn.linear_values.weight", "decoder.transformer_layers.0.context_attn.linear_values.bias", "deco
der.transformer_layers.0.context_attn.linear_query.weight", "decoder.transformer_layers.0.context_attn.linear_query.bias", "decoder.transformer_layers.0.context_attn.final_lin
ear.weight", "decoder.transformer_layers.0.context_attn.final_linear.bias", "decoder.transformer_layers.0.feed_forward.w_1.weight", "decoder.transformer_layers.0.feed_forward.
w_1.bias", "decoder.transformer_layers.0.feed_forward.w_2.weight", "decoder.transformer_layers.0.feed_forward.w_2.bias", "decoder.transformer_layers.0.feed_forward.layer_norm.
weight", "decoder.transformer_layers.0.feed_forward.layer_norm.bias", "decoder.transformer_layers.0.layer_norm_1.weight", "decoder.transformer_layers.0.layer_norm_1.bias", "de
coder.transformer_layers.0.layer_norm_2.weight", "decoder.transformer_layers.0.layer_norm_2.bias", "decoder.transformer_layers.1.mask", "decoder.transformer_layers.1.self_attn
.linear_keys.weight", "decoder.transformer_layers.1.self_attn.linear_keys.bias", "decoder.transformer_layers.1.self_attn.linear_values.weight", "decoder.transformer_layers.1.s
elf_attn.linear_values.bias", "decoder.transformer_layers.1.self_attn.linear_query.weight", "decoder.transformer_layers.1.self_attn.linear_query.bias", "decoder.transformer_la
yers.1.self_attn.final_linear.weight", "decoder.transformer_layers.1.self_attn.final_linear.bias", "decoder.transformer_layers.1.context_attn.linear_keys.weight", "decoder.tra
nsformer_layers.1.context_attn.linear_keys.bias", "decoder.transformer_layers.1.context_attn.linear_values.weight", "decoder.transformer_layers.1.context_attn.linear_values.bi
as", "decoder.transformer_layers.1.context_attn.linear_query.weight", "decoder.transformer_layers.1.context_attn.linear_query.bias", "decoder.transformer_layers.1.context_attn
.final_linear.weight", "decoder.transformer_layers.1.context_attn.final_linear.bias", "decoder.transformer_layers.1.feed_forward.w_1.weight", "decoder.transformer_layers.1.fee
d_forward.w_1.bias", "decoder.transformer_layers.1.feed_forward.w_2.weight", "decoder.transformer_layers.1.feed_forward.w_2.bias", "decoder.transformer_layers.1.feed_forward.l
ayer_norm.weight", "decoder.transformer_layers.1.feed_forward.layer_norm.bias", "decoder.transformer_layers.1.layer_norm_1.weight", "decoder.transformer_layers.1.layer_norm_1.
bias", "decoder.transformer_layers.1.layer_norm_2.weight", "decoder.transformer_layers.1.layer_norm_2.bias", "decoder.transformer_layers.2.mask", "decoder.transformer_layers.2
.self_attn.linear_keys.weight", "decoder.transformer_layers.2.self_attn.linear_keys.bias", "decoder.transformer_layers.2.self_attn.linear_values.weight", "decoder.transformer_
layers.2.self_attn.linear_values.bias", "decoder.transformer_layers.2.self_attn.linear_query.weight", "decoder.transformer_layers.2.self_attn.linear_query.bias", "decoder.tran
sformer_layers.2.self_attn.final_linear.weight", "decoder.transformer_layers.2.self_attn.final_linear.bias", "decoder.transformer_layers.2.context_attn.linear_keys.weight", "d
ecoder.transformer_layers.2.context_attn.linear_keys.bias", "decoder.transformer_layers.2.context_attn.linear_values.weight", "decoder.transformer_layers.2.context_attn.linear
_values.bias", "decoder.transformer_layers.2.context_attn.linear_query.weight", "decoder.transformer_layers.2.context_attn.linear_query.bias", "decoder.transformer_layers.2.co
ntext_attn.final_linear.weight", "decoder.transformer_layers.2.context_attn.final_linear.bias", "decoder.transformer_layers.2.feed_forward.w_1.weight", "decoder.transformer_la
yers.2.feed_forward.w_1.bias", "decoder.transformer_layers.2.feed_forward.w_2.weight", "decoder.transformer_layers.2.feed_forward.w_2.bias", "decoder.transformer_layers.2.feed
_forward.layer_norm.weight", "decoder.transformer_layers.2.feed_forward.layer_norm.bias", "decoder.transformer_layers.2.layer_norm_1.weight", "decoder.transformer_layers.2.lay
er_norm_1.bias", "decoder.transformer_layers.2.layer_norm_2.weight", "decoder.transformer_layers.2.layer_norm_2.bias", "decoder.transformer_layers.3.mask", "decoder.transforme
r_layers.3.self_attn.linear_keys.weight", "decoder.transformer_layers.3.self_attn.linear_keys.bias", "decoder.transformer_layers.3.self_attn.linear_values.weight", "decoder.tr
ansformer_layers.3.self_attn.linear_values.bias", "decoder.transformer_layers.3.self_attn.linear_query.weight", "decoder.transformer_layers.3.self_attn.linear_query.bias", "de
coder.transformer_layers.3.self_attn.final_linear.weight", "decoder.transformer_layers.3.self_attn.final_linear.bias", "decoder.transformer_layers.3.context_attn.linear_keys.w
eight", "decoder.transformer_layers.3.context_attn.linear_keys.bias", "decoder.transformer_layers.3.context_attn.linear_values.weight", "decoder.transformer_layers.3.context_a
ttn.linear_values.bias", "decoder.transformer_layers.3.context_attn.linear_query.weight", "decoder.transformer_layers.3.context_attn.linear_query.bias", "decoder.transformer_l
ayers.3.context_attn.final_linear.weight", "decoder.transformer_layers.3.context_attn.final_linear.bias", "decoder.transformer_layers.3.feed_forward.w_1.weight", "decoder.tran
sformer_layers.3.feed_forward.w_1.bias", "decoder.transformer_layers.3.feed_forward.w_2.weight", "decoder.transformer_layers.3.feed_forward.w_2.bias", "decoder.transformer_lay
ers.3.feed_forward.layer_norm.weight", "decoder.transformer_layers.3.feed_forward.layer_norm.bias", "decoder.transformer_layers.3.layer_norm_1.weight", "decoder.transformer_la
yers.3.layer_norm_1.bias", "decoder.transformer_layers.3.layer_norm_2.weight", "decoder.transformer_layers.3.layer_norm_2.bias", "decoder.transformer_layers.4.mask", "decoder.
transformer_layers.4.self_attn.linear_keys.weight", "decoder.transformer_layers.4.self_attn.linear_keys.bias", "decoder.transformer_layers.4.self_attn.linear_values.weight", "