ffmpeg-python icon indicating copy to clipboard operation
ffmpeg-python copied to clipboard

bidirectional audio stream [numpy pipe -> ffmpeg -> pipe numpy]?

Open Jackiexiao opened this issue 3 years ago • 0 comments

I want to change audio's volume / pitch / speed / sample_rate in realtime (frame by frame), how could I implement it with ffmpeg-python? I am not familiar with ffmpeg, so code below make no sense, but it demonstrate what I want to do. If someone could figure it out, I will be thankful

import ffmpeg
import numpy as np
import soundfile as sf


def split_chunk(x: np.ndarray, chunk_size: int, axis: int = 0):
    indices = np.arange(chunk_size, x.shape[axis], chunk_size)
    return np.array_split(x, indices, axis)


class StreamAudioProcess:
    def __init__(
        self, ori_sr: int, tgt_sr: int, volume: float, speed: float, pitch: float
    ) -> None:
        input_kwargs = {}
        output_kwargs = {}
        self.process = (
            ffmpeg.input('pipe:', format='f32le', **input_kwargs)
            .audio.filter("aresample", tgt_sr)
            .audio.filter("atempo", speed)
            .output('pipe:', format='f32le', **output_kwargs)
            .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True)
        )

    def transform(
        self,
        audio_chunk: np.ndarray,
    ) -> np.ndarray:
        dtype = np.float32
        n_channels = 1
        self.process.stdin.write(audio_chunk.astype(dtype).tobytes())
        buffer = self.process.stdout.read(audio_chunk.shape[0])
        waveform = np.frombuffer(buffer, dtype=dtype).reshape(-1, n_channels)
        return waveform


if __name__ == '__main__':
    wavpath = 'demo.wav'
    audio, ori_sr = sf.read(wavpath)
    tgt_sr = 16000

    chunk_ms = 50  # ms
    chunk_size = chunk_ms * ori_sr // 1000
    audio_chunks = split_chunk(audio, chunk_size)

    sap = StreamAudioProcess(ori_sr, tgt_sr, volume=1.2, speed=1.2, pitch=1.2)
    res = []
    for i, audio_chunk in enumerate(audio_chunks):
        res.append(sap.transform(audio_chunk))
    process_audio = np.concatenate(res, axis=0)
    sf.write('stream-audio-transform.wav', process_audio, ori_sr)

Jackiexiao avatar Aug 24 '22 08:08 Jackiexiao