ffmpeg-python
ffmpeg-python copied to clipboard
bidirectional audio stream [numpy pipe -> ffmpeg -> pipe numpy]?
I want to change audio's volume / pitch / speed / sample_rate in realtime (frame by frame), how could I implement it with ffmpeg-python? I am not familiar with ffmpeg, so code below make no sense, but it demonstrate what I want to do. If someone could figure it out, I will be thankful
import ffmpeg
import numpy as np
import soundfile as sf
def split_chunk(x: np.ndarray, chunk_size: int, axis: int = 0):
indices = np.arange(chunk_size, x.shape[axis], chunk_size)
return np.array_split(x, indices, axis)
class StreamAudioProcess:
def __init__(
self, ori_sr: int, tgt_sr: int, volume: float, speed: float, pitch: float
) -> None:
input_kwargs = {}
output_kwargs = {}
self.process = (
ffmpeg.input('pipe:', format='f32le', **input_kwargs)
.audio.filter("aresample", tgt_sr)
.audio.filter("atempo", speed)
.output('pipe:', format='f32le', **output_kwargs)
.run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True)
)
def transform(
self,
audio_chunk: np.ndarray,
) -> np.ndarray:
dtype = np.float32
n_channels = 1
self.process.stdin.write(audio_chunk.astype(dtype).tobytes())
buffer = self.process.stdout.read(audio_chunk.shape[0])
waveform = np.frombuffer(buffer, dtype=dtype).reshape(-1, n_channels)
return waveform
if __name__ == '__main__':
wavpath = 'demo.wav'
audio, ori_sr = sf.read(wavpath)
tgt_sr = 16000
chunk_ms = 50 # ms
chunk_size = chunk_ms * ori_sr // 1000
audio_chunks = split_chunk(audio, chunk_size)
sap = StreamAudioProcess(ori_sr, tgt_sr, volume=1.2, speed=1.2, pitch=1.2)
res = []
for i, audio_chunk in enumerate(audio_chunks):
res.append(sap.transform(audio_chunk))
process_audio = np.concatenate(res, axis=0)
sf.write('stream-audio-transform.wav', process_audio, ori_sr)