ffmpeg-python icon indicating copy to clipboard operation
ffmpeg-python copied to clipboard

rtsp -> memory -> file

Open yypyyds opened this issue 2 years ago • 0 comments

I'm doing some ML work on a rtsp stream with both audio and video, but my model need only audio data to do inference. I want to separate the audio and video data of the RTSP stream and save it to memory, make a copy of the audio part of the data and send it to the model for inference, and maintain a queue to store the last 5 seconds of the RTSP stream. Whenever my model detects that there is a target event in the audio, I save the last 5 seconds of audio and video as an MP4 file for confirmation. Here is my test code

import ffmpeg
import time
from queue import Queue
import threading

frame_queue = Queue(maxsize=5)
flag = False


def concattuple(frames):
    a = b""
    for item in frames:
        a = a + bytes(item)

    return a


def save_video():
    return (
        ffmpeg.input("pipe:", format="rawvideo", pix_fmt="rgb24", s="2560x1440", r=25)
        .output("pipe:")
        .run_async(pipe_stdin=True, pipe_stdout=True)
    )


def save_audio():
    return (
        ffmpeg.input(
            "pipe:", format="s16le", acodec="pcm_s16le", ac=2, ar="32000"
        ) 
        .output("pipe:", acodec="copy")
        .run_async(pipe_stdin=True, pipe_stdout=True)
    )


def save_audio_video(audio_frames, video_frames, output_file):
    audio_pipe = save_audio()
    video_pipe = save_video()
    audio_pipe.stdin.write(concattuple(audio_frames))
    video_pipe.stdin.write(concattuple(video_frames))

    ffmpeg.input(
        audio_pipe, format="s16le", acodec="pcm_s16le", ac=2, ar="32000"
    ).input(video_pipe, format="rawvideo", pix_fmt="rgb24", s="2560x1440", r=30).output(
        output_file, acodec="copy", vcodec="copy", shortest=None
    ).run()


def receive_rtsp_stream():
    probe = ffmpeg.probe(
        "rtsp://url"
    )
    video_stream = next(
        (stream for stream in probe["streams"] if stream["codec_type"] == "video"), None
    )
    audio_stream = next(
        (stream for stream in probe["streams"] if stream["codec_type"] == "audio"), None
    )

    if video_stream is None or audio_stream is None:
        print("cannot find stream")
        return

    process1 = (
        ffmpeg.input(
            "rtsp://url",
            rtsp_transport="tcp",
        )
        .video.output("pipe:", format="rawvideo", pix_fmt="rgb24")
        .overwrite_output()
        .run_async(pipe_stdout=True)
    )

    process2 = (
        ffmpeg.input(
            "rtsp://url",
            rtsp_transport="tcp",
        )
        .audio.output("pipe:", format="s16le", acodec="pcm_s16le", ac=1, ar="32000")
        .overwrite_output()
        .run_async(pipe_stdout=True)
    )

    for video_frame, audio_frame in zip(
        iter(lambda: process1.stdout.read(2560 * 1440 * 3), b""),
        iter(lambda: process2.stdout.read(4096), b""),
    ):
        # ML function
        if frame_queue.full():
            frame_queue.get()
        frame_queue.put((video_frame, audio_frame))
        if flag == True:
            video_frames, audio_frames = zip(list(frame_queue.queue))
            save_audio_video(audio_frames, video_frames, "output.mp4")


t = threading.Thread(target=receive_rtsp_stream)
t.start()

while True:
    user_input = input("enter to exit")
    if user_input:
        flag = True
        print("save to file")
        break

It's just a test code, since I don't know how to get the audio and video data of the whole frame every second loop, the reading part is to write a sample. Please tell me if there exits a way to better achieve the goal.

yypyyds avatar Nov 06 '23 06:11 yypyyds