MockingBird
MockingBird copied to clipboard
models/synthesizer/preprocess_audio.py bug
def preprocess_general(speaker_dir, out_dir: Path, skip_existing: bool, hparams, dict_info, no_alignments: bool, encoder_model_fpath: Path): metadata = [] extensions = (".wav", ".flac", "*.mp3") for extension in extensions: wav_fpath_list = speaker_dir.glob(extension) # Iterate over each wav for wav_fpath in wav_fpath_list: words = dict_info.get(wav_fpath.name.split(".")[0]) if not words: words = dict_info.get(wav_fpath.name) # try with extension if not words: print(f"No word found in dict_info for {wav_fpath.name}, skip it") continue sub_basename = "%s_%02d" % (wav_fpath.name, 0)
mel_fpath = out_dir.joinpath("mels", f"mel-{sub_basename}.npy")
wav_fpath = out_dir.joinpath("audio", f"audio-{sub_basename}.npy")
if skip_existing and mel_fpath.exists() and wav_fpath.exists():
continue
wav, text = _split_on_silences(wav_fpath, words, hparams)
result = _process_utterance(wav, text, out_dir, sub_basename,
False, hparams, encoder_model_fpath) # accelarate
if result is None:
continue
wav_fpath_name, mel_fpath_name, embed_fpath_name, wav, mel_frames, text = result
metadata.append ((wav_fpath_name, mel_fpath_name, embed_fpath_name, len(wav), mel_frames, text))
return metadata
发现该函数中的这段代码有些不对,运行报错 FileNotFoundError: [Errno 2] No such file or directory: '/MockingBird/SV2TTS/synthesizer/audio/audio-001242.wav_00.npy'
mel_fpath = out_dir.joinpath("mels", f"mel-{sub_basename}.npy") wav_fpath = out_dir.joinpath("audio", f"audio-{sub_basename}.npy") wav, text = _split_on_silences(wav_fpath, words, hparams)
应该是wav_fpath没保存.npy的数据,我猜这里应该是先load代码for wav_fpath in wav_fpath_list:中的wav_fpath吧,读出来wav数据,再转换成npy吧
@babysor
解决了吗 我也遇到同样问题
解决了吗 我也遇到同样问题
加一行保存npy文件代码就行了
解决了吗 我也遇到同样问题
加一行保存npy文件代码就行了
请问能给个代码并描述下加在哪里吗?谢谢!
其实就是 wav_fpath = out_dir.joinpath("audio", f"audio-{sub_basename}.npy")
时将 wav_fpath 变量修改了
_split_on_silences 里应该读取原wav文件 _process_utterance 进行保存npy文件
改动:
mel_fpath_out = out_dir.joinpath("mels", f"mel-{sub_basename}.npy")
wav_fpath_out = out_dir.joinpath("audio", f"audio-{sub_basename}.npy")
if skip_existing and mel_fpath_out.exists() and wav_fpath_out.exists():
continue
wav, text = _split_on_silences(wav_fpath, words, hparams)
result = _process_utterance(wav, text, out_dir, sub_basename, mel_fpath_out, wav_fpath_out, hparams, encoder_model_fpath)