ffmpeg-python
ffmpeg-python copied to clipboard
rtsp -> memory -> file
I'm doing some ML work on a rtsp stream with both audio and video, but my model need only audio data to do inference. I want to separate the audio and video data of the RTSP stream and save it to memory, make a copy of the audio part of the data and send it to the model for inference, and maintain a queue to store the last 5 seconds of the RTSP stream. Whenever my model detects that there is a target event in the audio, I save the last 5 seconds of audio and video as an MP4 file for confirmation. Here is my test code
import ffmpeg
import time
from queue import Queue
import threading
frame_queue = Queue(maxsize=5)
flag = False
def concattuple(frames):
a = b""
for item in frames:
a = a + bytes(item)
return a
def save_video():
return (
ffmpeg.input("pipe:", format="rawvideo", pix_fmt="rgb24", s="2560x1440", r=25)
.output("pipe:")
.run_async(pipe_stdin=True, pipe_stdout=True)
)
def save_audio():
return (
ffmpeg.input(
"pipe:", format="s16le", acodec="pcm_s16le", ac=2, ar="32000"
)
.output("pipe:", acodec="copy")
.run_async(pipe_stdin=True, pipe_stdout=True)
)
def save_audio_video(audio_frames, video_frames, output_file):
audio_pipe = save_audio()
video_pipe = save_video()
audio_pipe.stdin.write(concattuple(audio_frames))
video_pipe.stdin.write(concattuple(video_frames))
ffmpeg.input(
audio_pipe, format="s16le", acodec="pcm_s16le", ac=2, ar="32000"
).input(video_pipe, format="rawvideo", pix_fmt="rgb24", s="2560x1440", r=30).output(
output_file, acodec="copy", vcodec="copy", shortest=None
).run()
def receive_rtsp_stream():
probe = ffmpeg.probe(
"rtsp://url"
)
video_stream = next(
(stream for stream in probe["streams"] if stream["codec_type"] == "video"), None
)
audio_stream = next(
(stream for stream in probe["streams"] if stream["codec_type"] == "audio"), None
)
if video_stream is None or audio_stream is None:
print("cannot find stream")
return
process1 = (
ffmpeg.input(
"rtsp://url",
rtsp_transport="tcp",
)
.video.output("pipe:", format="rawvideo", pix_fmt="rgb24")
.overwrite_output()
.run_async(pipe_stdout=True)
)
process2 = (
ffmpeg.input(
"rtsp://url",
rtsp_transport="tcp",
)
.audio.output("pipe:", format="s16le", acodec="pcm_s16le", ac=1, ar="32000")
.overwrite_output()
.run_async(pipe_stdout=True)
)
for video_frame, audio_frame in zip(
iter(lambda: process1.stdout.read(2560 * 1440 * 3), b""),
iter(lambda: process2.stdout.read(4096), b""),
):
# ML function
if frame_queue.full():
frame_queue.get()
frame_queue.put((video_frame, audio_frame))
if flag == True:
video_frames, audio_frames = zip(list(frame_queue.queue))
save_audio_video(audio_frames, video_frames, "output.mp4")
t = threading.Thread(target=receive_rtsp_stream)
t.start()
while True:
user_input = input("enter to exit")
if user_input:
flag = True
print("save to file")
break
It's just a test code, since I don't know how to get the audio and video data of the whole frame every second loop, the reading part is to write a sample. Please tell me if there exits a way to better achieve the goal.