FireRedASR
FireRedASR copied to clipboard
fix sample rate error in asr_feat.py
在 asr_feat.py 中,
frame_length,frame_shift被传入了KaldifeatFbank但并未被使用wav中的sample_rate也没有传入 knf,导致所有音频都按照 16k 采样率提取特征
class KaldifeatFbank:
def __init__(self, num_mel_bins=80, frame_length=25, frame_shift=10,
dither=1.0):
self.dither = dither
opts = knf.FbankOptions()
opts.frame_opts.dither = dither
opts.mel_opts.num_bins = num_mel_bins
opts.frame_opts.snip_edges = True
opts.mel_opts.debug_mel = False
self.opts = opts
def __call__(self, wav, is_train=False):
if type(wav) is str:
sample_rate, wav_np = kaldiio.load_mat(wav)
elif type(wav) in [tuple, list] and len(wav) == 2:
sample_rate, wav_np = wav
assert len(wav_np.shape) == 1
dither = self.dither if is_train else 0.0
self.opts.frame_opts.dither = dither
fbank = knf.OnlineFbank(self.opts)
fbank.accept_waveform(sample_rate, wav_np.tolist())
feat = []
for i in range(fbank.num_frames_ready):
feat.append(fbank.get_frame(i))
if len(feat) == 0:
print("Check data, len(feat) == 0", wav, flush=True)
return np.zeros((0, self.opts.mel_opts.num_bins))
feat = np.vstack(feat)
return feat
Thanks for your pull request! We'll review the code.