TurboTransformers
TurboTransformers copied to clipboard
只支持huggingface训练的tensorflow模型载入么?
trafficstars
首页写到“我们提供了载入huggingface/transformers的pytorch和tensorflow预训练模型方式”,但是 https://github.com/Tencent/TurboTransformers/blob/master/example/python/README_cn.md 又写到“首先我们需要准备一个使用huggingface训练好的bert模型”,也没看到tensorflow方式的例子。
- convert your huggingface/tensorflow model to *.npz python tools/convert_huggingface_bert_tf_to_npz.py bert-based-uncased bert_tf.npz
- update the corresponding line in cpu_example.py
tt_model = turbo_transformers.BertModelWithPooler.from_npz(
'/workspace/bert_tf.npz', cfg)
哦哦,明白了,谢谢。也就是说这只是个huggingface/transformers的下游工具,连tf版本也只是支持huggingface的tf版本。
可以这么理解。但是,我们也转换过tensorflow官方的模型到npz格式。只需要对转换脚本稍作更改即可。
关于转换tensorflow官方的模型(keras.model)到npz格式的脚本,因为huggingface的tf bert model本质上也是keras.model,两者原理相同,所以我们只提供了从huggingface的tf model转npz的示例。我这里提供一个从tensorflow_hub上下载的官方bert model转换npz的示例,你可以参考以下方式进行转换:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.models import Model
import numpy as np
dic = {'bert_model/word_embeddings/embeddings:0': 'embeddings.word_embeddings.weight',
'bert_model/embedding_postprocessor/type_embeddings:0': 'embeddings.token_type_embeddings.weight',
'bert_model/embedding_postprocessor/position_embeddings:0': 'embeddings.position_embeddings.weight',
'bert_model/embedding_postprocessor/layer_norm/gamma:0': 'embeddings.LayerNorm.weight',
'bert_model/embedding_postprocessor/layer_norm/beta:0': 'embeddings.LayerNorm.bias',
'bert_model/pooler_transform/kernel:0': 'pooler.dense.weight',
'bert_model/pooler_transform/bias:0': 'pooler.dense.bias'
}
num_layers = 12
for i in range(num_layers):
dic[f'bert_model/encoder/layer_{i}/self_attention/query/kernel:0'] = f'encoder.layer.{i}.attention.self.query.weight'
dic[f'bert_model/encoder/layer_{i}/self_attention/query/bias:0'] = f'encoder.layer.{i}.attention.self.query.bias'
dic[f'bert_model/encoder/layer_{i}/self_attention/key/kernel:0'] = f'encoder.layer.{i}.attention.self.key.weight'
dic[f'bert_model/encoder/layer_{i}/self_attention/key/bias:0'] = f'encoder.layer.{i}.attention.self.key.bias'
dic[f'bert_model/encoder/layer_{i}/self_attention/value/kernel:0'] = f'encoder.layer.{i}.attention.self.value.weight'
dic[f'bert_model/encoder/layer_{i}/self_attention/value/bias:0'] = f'encoder.layer.{i}.attention.self.value.bias'
dic[f'bert_model/encoder/layer_{i}/self_attention_output/kernel:0'] = f'encoder.layer.{i}.attention.output.dense.weight'
dic[f'bert_model/encoder/layer_{i}/self_attention_output/bias:0'] = f'encoder.layer.{i}.attention.output.dense.bias'
dic[f'bert_model/encoder/layer_{i}/self_attention_layer_norm/gamma:0'] = f'encoder.layer.{i}.attention.output.LayerNorm.weight'
dic[f'bert_model/encoder/layer_{i}/self_attention_layer_norm/beta:0'] = f'encoder.layer.{i}.attention.output.LayerNorm.bias'
dic[f'bert_model/encoder/layer_{i}/intermediate/kernel:0'] = f'encoder.layer.{i}.intermediate.dense.weight'
dic[f'bert_model/encoder/layer_{i}/intermediate/bias:0'] =f'encoder.layer.{i}.intermediate.dense.bias'
dic[f'bert_model/encoder/layer_{i}/output/kernel:0'] = f'encoder.layer.{i}.output.dense.weight'
dic[f'bert_model/encoder/layer_{i}/output/bias:0'] = f'encoder.layer.{i}.output.dense.bias'
dic[f'bert_model/encoder/layer_{i}/output_layer_norm/gamma:0'] = f'encoder.layer.{i}.output.LayerNorm.weight'
dic[f'bert_model/encoder/layer_{i}/output_layer_norm/beta:0'] = f'encoder.layer.{i}.output.LayerNorm.bias'
def trans_layer_name_tf2turbo(name):
return dic[name]
max_seq_length = 128 # Your choice here.
input_word_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32,
name="input_word_ids")
input_mask = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32,
name="input_mask")
segment_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32,
name="segment_ids")
bert_layer = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1",
trainable=True)
pooled_output, sequence_output = bert_layer([input_word_ids, input_mask, segment_ids])
model = Model(inputs=[input_word_ids, input_mask, segment_ids], outputs=[pooled_output, sequence_output])
names = [v.name for v in model.trainable_variables]
print(names)
weights = model.trainable_weights
arrays = {}
for i in range(len(names)):
arrays[trans_layer_name_tf2turbo(names[i])] = weights[i].numpy()
q_weight_key = 'self.query.weight'
k_weight_key = 'self.key.weight'
v_weight_key = 'self.value.weight'
q_bias_key = 'self.query.bias'
k_bias_key = 'self.key.bias'
v_bias_key = 'self.value.bias'
numpy_dict = {}
for k in arrays.keys():
if k.endswith(q_weight_key):
ret = []
ret.append(arrays[k])
ret.append(arrays[k[:-len(q_weight_key)] + k_weight_key])
ret.append(arrays[k[:-len(q_weight_key)] + v_weight_key])
v = np.concatenate(ret, axis=1)
numpy_dict[k[:-len(q_weight_key)] + "qkv.weight"] = np.ascontiguousarray(v)
elif k.endswith(q_bias_key):
ret = []
ret.append(arrays[k])
ret.append(arrays[k[:-len(q_bias_key)] + k_bias_key])
ret.append(arrays[k[:-len(q_bias_key)] + v_bias_key])
v = np.ascontiguousarray(np.concatenate(ret, axis=0))
numpy_dict[k[:-len(q_bias_key)] + 'qkv.bias'] = v
elif any((k.endswith(suffix) for suffix in (k_weight_key, v_weight_key,
k_bias_key, v_bias_key))):
continue
else:
numpy_dict[k] = np.ascontiguousarray(arrays[k])
np.savez_compressed('model.npz',**numpy_dict)