VAD-python
VAD-python copied to clipboard
Data indices as result of vad instead of time
How can I get data incides where speech starts or ends, instead of time when the same happens?
import sounddevice as sd
import numpy as np
import soundfile as sf
from vad import VoiceActivityDetector
filename = '/home/sadam/saddam.wav' # path to your filename here
y, sr = sf.read(filename)
def get_chunk(y, sr, from_ms, to_ms):
from_sample = np.round(from_ms*(sr)).astype(int)
to_sample = np.round(to_ms*(sr)).astype(int)
return y[from_sample:to_sample]
voice = VoiceActivityDetector(filename)
d_sp = voice.detect_speech()
rd_lb = v.convert_windows_to_readible_labels(d_sp)
conct_data = [] # this will store all speech parts
for i in range(len(rd_lb)):
data = get_chunk(y,sr, rd_lb[i]['speech_begin'],rd_lb[i]['speech_end'])
conct_data.extend(data)
sd.play(conct_data,sr)
sd.wait()