auditok icon indicating copy to clipboard operation
auditok copied to clipboard

Real-Time Silence detection from bytes

Open DoodleBears opened this issue 1 year ago • 1 comments

From #23, I am trying to split speaker's audio using pyaudio stream:

The Callback Part (how can I use in_data and split it like read input from microphone in #23 )

def callback(self, in_data, frame_count, time_info, status):
    """Write frames and return PA flag"""
    # wave_file.writeframes(in_data)
    self.frames.append(in_data)
    input= b''.join(self.frames)
    print(input)
    reader = AudioReader(
        input=input,
        sr=self.__SAMPLE_RATE,
        sw=self.__SAMPLE_WIDTH,
        ch=self.__CHANNEL
        )
    for (i, region) in enumerate(split(
        input=reader,
        # eth=self.__ENERGY_THRESHOLD,
        max_silence=self.__MAX_SILENCE,
        max_dur=self.__MAX_DURATION,
        min_dur=self.__MIN_DURATION
        )):
        print(f"{constants.CONSOLE_COLOR_RED}split{constants.CONSOLE_COLOR_WHITE}")
        path = f'{constants.TEMP_SPEAKER_OUTPUT_AUDIO_DIR}/{str(time.time()) + constants.TEMP_SPEAKER_OUTPUT_AUDIO_FORMAT}'
        region.save(path)
        self.frames = []
        break

    return (in_data, pyaudio.paContinue)

The pyaudio part

with p.open(format=pyaudio.paInt16,
        channels=default_speakers["maxInputChannels"],
        rate=int(default_speakers["defaultSampleRate"]),
        frames_per_buffer=pyaudio.get_sample_size(pyaudio.paInt16),
        input=True,
        input_device_index=default_speakers["index"],
        stream_callback=self.callback
) as stream:
    """
    Opena PA stream via context manager.
    After leaving the context, everything will
    be correctly closed(Stream, PyAudio manager)            
    """
    while self.ai_listen_handler.is_listening_speaker:
        time.sleep(1)

Use auditok.split to split microphone input in real-time

for region in auditok.split(
    input=None,
    sr=self.__SAMPLE_RATE,
    sw=self.__SAMPLE_WIDTH,
    ch=self.__CHANNEL,
    eth=self.__ENERGY_THRESHOLD,
    max_silence=self.__MAX_SILENCE,
    max_dur=self.__MAX_DURATION,
    min_dur=self.__MIN_DURATION
    ):
    if not self.ai_listen_handler.is_listening_mic:
        return

    path = f'{constants.TEMP_MIC_INPUT_AUDIO_DIR}/{str(time.time()) + constants.TEMP_MIC_INPUT_AUDIO_FORMAT}'
    region.save(path)

DoodleBears avatar Apr 03 '23 18:04 DoodleBears