PaddleSpeech
PaddleSpeech copied to clipboard
[TTS]Windows10 CPU fastspeech2_mix_onnx_0.2.0遇到am_sess.run(None, input_feed=am_input_feed)时不报错,直接结束运行代码
环境:windows10 CPU Core i7
conda create -n audio python=3.9 libuv
conda activate audio
pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
pip install pytest-runner paddlespeech==1.4.1 或者 paddlespeech==1.4.0
pip install langid==1.1.6 zhconv==1.4.3 transformers==4.31.0 SpeechRecognition==3.10.0
pip install onnx==1.14.1
pip install accelerate sentencepiece protobuf py-cpuinfo
pip install scipy pyaudio wave soundfile
pip install numpy==1.23
模型下载:https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip 参考代码来自:https://github.com/PaddlePaddle/PaddleSpeech/blob/1dc67f96e0d083adb291589cecb28c9181914a07/paddlespeech/t2s/exps/ort_predict.py#L28
在Windows CPU运行脚本test_tts_win.py如下:在am_sess.run(None, input_feed=am_input_feed)时不报错,直接结束运行脚本test_tts_win.py
def load_tts_model2(model_path, device):
print("loading tts fastspeech2_mix---------")
t4 = time.time()
cpu_threads = 4
spk_id = 174
#am = 'fastspeech2_mix'
phones_dict= model_path + "fastspeech2_mix_onnx_0.2.0/phone_id_map.txt"
am_model_path = model_path + "fastspeech2_mix_onnx_0.2.0/fastspeech2_mix.onnx"
voc_model_path = model_path + "fastspeech2_mix_onnx_0.2.0/hifigan_csmsc.onnx"
show_memory_info("before loading tts 1 ")
tts_frontend = MixFrontend(phone_vocab_path=phones_dict)
providers = ['CPUExecutionProvider']
sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
sess_options.intra_op_num_threads = cpu_threads
am_sess = ort.InferenceSession(am_model_path, providers=providers, sess_options=sess_options)
voc_sess = ort.InferenceSession(voc_model_path, providers=providers, sess_options=sess_options)
print("tts fastspeech2_mix load model done! Warmup start----")
merge_sentences = True
# frontend warmup
# Loading model cost 0.5+ seconds
tts_frontend.get_input_ids(
"hello, thank you, thank you very much",
merge_sentences=merge_sentences)
print("tts fastspeech2_mix load model done! Warmup start am warmup ----")
## am warmup
spk_id = [spk_id]
for T in [27, 38, 54]:
am_input_feed = {}
phone_ids = np.random.randint(1, 266, size=(T, ))
am_input_feed.update({'text': phone_ids})
am_input_feed.update({'spk_id': spk_id})
print(" am warmup 1----")
am_sess.run(None, input_feed=am_input_feed) #### skip!!!!!!!!!!!!!!
print(" am warmup 2----")
print("tts fastspeech2_mix load model done! Warmup start voc warmup ----")
# voc warmup
for T in [227, 308, 544]:
data = np.random.rand(T, 80).astype("float32")
voc_sess.run(None, input_feed={"logmel": data})
print("tts warm up done!")
t5 = time.time()
print("loading TTS fastspeech2_mix---------Done, cost time(s): ", t5-t4)
print("loading TTS tacotron2-DDC---------")
load_tts_model2("./models/", "cpu")