[BUG] mp4 videos variable framerate, unreadable by some windows software
Please only report one bug per issue!
Describe the bug Using the attached code, on a pi5, imx500 camera, the resultant video is reported variable frame rate by kdenlive and unreadable by Vegas Pro on a Windows pc.
To Reproduce
#!/usr/bin/env python3
"""Based on imx500_object_detection_demo.py."""
import argparse
import sys
from functools import lru_cache
import cv2
import numpy as np
import time
import datetime
from picamera2 import MappedArray, Picamera2, Preview
from picamera2.devices import IMX500
from picamera2.devices.imx500 import (NetworkIntrinsics,postprocess_nanodet_detection)
from picamera2.encoders import H264Encoder
from picamera2.outputs import CircularOutput2, PyavOutput
from libcamera import controls
# detection objects
objects = ["cat","bear","clock","person"]
threshold = 0.5 # set detection threshold
# video settings
v_width = 2028 # video width
v_height = 1520 # video height
v_length = 5 # seconds
show_detects = 0 # show detections on video
# initialise
last_detections = []
label = " ( "
encoding = False
class Detection:
def __init__(self, coords, category, conf, metadata):
"""Create a Detection object, recording the bounding box, category and confidence."""
self.category = category
self.conf = conf
self.box = imx500.convert_inference_coords(coords, metadata, picam2)
def parse_detections(metadata: dict):
"""Parse the output tensor into a number of detected objects, scaled to the ISP output."""
global last_detections
bbox_normalization = intrinsics.bbox_normalization
bbox_order = intrinsics.bbox_order
threshold = args.threshold
iou = args.iou
max_detections = args.max_detections
np_outputs = imx500.get_outputs(metadata, add_batch=True)
input_w, input_h = imx500.get_input_size()
if np_outputs is None:
return last_detections
if intrinsics.postprocess == "nanodet":
boxes, scores, classes = \
postprocess_nanodet_detection(outputs=np_outputs[0], conf=threshold, iou_thres=iou,
max_out_dets=max_detections)[0]
from picamera2.devices.imx500.postprocess import scale_boxes
boxes = scale_boxes(boxes, 1, 1, input_h, input_w, False, False)
else:
boxes, scores, classes = np_outputs[0][0], np_outputs[1][0], np_outputs[2][0]
if bbox_normalization:
boxes = boxes / input_h
if bbox_order == "xy":
boxes = boxes[:, [1, 0, 3, 2]]
boxes = np.array_split(boxes, 4, axis=1)
boxes = zip(*boxes)
last_detections = [
Detection(box, category, score, metadata)
for box, score, category in zip(boxes, scores, classes)
if score > threshold
]
return last_detections
@lru_cache
def get_labels():
labels = intrinsics.labels
if intrinsics.ignore_dash_labels:
labels = [label for label in labels if label and label != "-"]
return labels
def draw_detections(request, stream="main"):
"""Draw the detections for this request onto the ISP output."""
global label,show_detects, mp4_anno,scale
detections = last_results
if detections is None:
return
labels = get_labels()
with MappedArray(request, stream) as m:
for detection in detections:
x, y, w, h = detection.box
label = f"{labels[int(detection.category)]} ({detection.conf:.2f})"
if show_detects == 1:
# Calculate text size and position
(text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
text_x = x + 5
text_y = y + 15
# Create a copy of the array to draw the background with opacity
overlay = m.array.copy()
# Draw the background rectangle on the overlay
cv2.rectangle(overlay,
(text_x, text_y - text_height),
(text_x + text_width, text_y + baseline),
(255, 255, 255), # Background color (white)
cv2.FILLED)
alpha = 0.30
cv2.addWeighted(overlay, alpha, m.array, 1 - alpha, 0, m.array)
# Draw text on top of the background
cv2.putText(m.array, label, (text_x, text_y),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
# Draw detection box
cv2.rectangle(m.array, (x, y), (x + w, y + h), (0, 255, 0, 0), thickness=2)
if intrinsics.preserve_aspect_ratio:
b_x, b_y, b_w, b_h = imx500.get_roi_scaled(request)
color = (255, 0, 0) # red
cv2.putText(m.array, "ROI", (b_x + 5, b_y + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
cv2.rectangle(m.array, (b_x, b_y), (b_x + b_w, b_y + b_h), (255, 0, 0, 0))
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str, help="Path of the model",
default="/usr/share/imx500-models/imx500_network_ssd_mobilenetv2_fpnlite_320x320_pp.rpk")
parser.add_argument("--fps", type=int, help="Frames per second")
parser.add_argument("--bbox-normalization", action=argparse.BooleanOptionalAction, help="Normalize bbox")
parser.add_argument("--bbox-order", choices=["yx", "xy"], default="yx",
help="Set bbox order yx -> (y0, x0, y1, x1) xy -> (x0, y0, x1, y1)")
parser.add_argument("--threshold", type=float, default=0.55, help="Detection threshold")
parser.add_argument("--iou", type=float, default=0.65, help="Set iou threshold")
parser.add_argument("--max-detections", type=int, default=10, help="Set max detections")
parser.add_argument("--ignore-dash-labels", action=argparse.BooleanOptionalAction, help="Remove '-' labels ")
parser.add_argument("--postprocess", choices=["", "nanodet"],
default=None, help="Run post process of type")
parser.add_argument("-r", "--preserve-aspect-ratio", action=argparse.BooleanOptionalAction,
help="preserve the pixel aspect ratio of the input tensor")
parser.add_argument("--labels", type=str,
help="Path to the labels file")
parser.add_argument("--print-intrinsics", action="store_true",
help="Print JSON network_intrinsics then exit")
return parser.parse_args()
if __name__ == "__main__":
args = get_args()
# This must be called before instantiation of Picamera2
imx500 = IMX500(args.model)
intrinsics = imx500.network_intrinsics
if not intrinsics:
intrinsics = NetworkIntrinsics()
intrinsics.task = "object detection"
elif intrinsics.task != "object detection":
print("Network is not an object detection task", file=sys.stderr)
exit()
# Override intrinsics from args
for key, value in vars(args).items():
if key == 'labels' and value is not None:
with open(value, 'r') as f:
intrinsics.labels = f.read().splitlines()
elif hasattr(intrinsics, key) and value is not None:
setattr(intrinsics, key, value)
# Defaults
if intrinsics.labels is None:
with open("assets/coco_labels.txt", "r") as f:
intrinsics.labels = f.read().splitlines()
intrinsics.update_with_defaults()
if args.print_intrinsics:
print(intrinsics)
exit()
# Configure and start Picamera2.
model_h, model_w = imx500.get_input_size()
video_w, video_h = v_width,v_height
main = {'size': (video_w, video_h), 'format': 'YUV420'}
lores = {'size': (model_w, model_h), 'format': 'YUV420'}
picam2 = Picamera2(imx500.camera_num)
config = picam2.create_preview_configuration(main, lores=lores,controls={"FrameRate": intrinsics.inference_rate}, buffer_count=12)
imx500.show_network_fw_progress_bar()
picam2.configure(config)
encoder = H264Encoder(bitrate=2000000)
circular = CircularOutput2(buffer_duration_ms=5000)
picam2.start_preview(Preview.QTGL, x=0, y=0, width=480, height=480)
picam2.start_recording(encoder, circular)
if intrinsics.preserve_aspect_ratio:
imx500.set_auto_aspect_ratio()
last_results = None
picam2.pre_callback = draw_detections
while True:
last_results = parse_detections(picam2.capture_metadata())
# capture frame
frame = picam2.capture_array('lores')
frame = cv2.cvtColor(frame, cv2.COLOR_YUV420p2RGB)
frame = frame[0:320, 0:320]
# detected label
data = label.split("(")
category = data[0][:-1]
value = data[1][:-1]
if category in objects and float(value) > threshold:
# restart timer
startrec = time.monotonic()
# start recording
if not encoding:
encoding = True
now = datetime.datetime.now()
timestamp = now.strftime("%y%m%d_%H%M%S")
print("New Detection",timestamp,label)
circular.open_output(PyavOutput("/run/shm/" + timestamp +".mp4"))
# stop recording after v_length + 5 seconds to empty circular buffer
if encoding and (time.monotonic() - startrec > v_length + 5):
now = datetime.datetime.now()
timestamp2 = now.strftime("%y%m%d_%H%M%S")
print("Stopped Record", timestamp2)
circular.close_output()
encoding = False
category = ""
value = 0.0
label = " ( "
Expected behaviour Usable mp4 videos
Console Output, Screenshots If applicable, any console output or screenshots that show the problem and associated error messages.
Hardware : Pi5, imx500 camera
Additional context Add any other context about the problem here.
Based on the previous discussion in the forum, should that be variable bit rate or variable framerate in the title?
Sorry my error, framerate
Thinking about this for a moment, it seems a slightly trickier problem than one might have expected:
- It's actually not straightforward to know that you've got a fixed framerate video. None of the encoder/output layers know about this, so we'd have to think how they might find out, or whether the user has to flag it.
- It's harder to know what the fixed framerate is. You can ask a camera for 30fps, but because it has its own clocks, and register settings provided by a manufacturer that may involve some "compromise", some sensors may leave you a few percent off your target.
- We can never guarantee that you won't ever drop frames.
- Furthermore, getting any of this wrong will lead to loss of sync when there's an audio stream as well.
So overall, I think it's unlikely we'll be making any changes here for the time being, particularly when there's the option just to rewrite the timestamps using FFmpeg.
OK. I understand your point of view, but a bit disappointing if you can't write mp4s without use of other software.
When l convert h264 to MP4 with ffmpeg l get a warning "Timestamps are unset in a packet for stream 0. This is deprecated and will stop working in the future. Fix your code to set the timestamps properly" which l haven't resolved yet.
You can try pyav to write .mp4 directly from python, it has advantages with handling timestamps: https://github.com/raspberrypi/picamera2/blob/fb031cb49cb2af4dab7439be7a90784c9d3db1ab/picamera2/outputs/pyavoutput.py#L15-L18 Example: https://github.com/raspberrypi/picamera2/blob/main/examples/pyav_capture.py
Aren't l already using pyav in the code ?
circular.open_output(PyavOutput("/run/shm/" + timestamp +".mp4"))
Sorry, thought you were using ffmpeg explicit at some point.
So the PyavOutput is giving you the
Timestamps are unset in a packet for stream 0. This is deprecated and will stop working in the future. Fix your code to set the timestamps properly
warning? I got this warning when using an ffmpeg output, but not with pyav.
Sorry for the confusion. In my original post l was using pyav. It was suggested l use ffmpeg to fix it but l pointed out l get this warning with ffmpeg when converting h264 to MP4. You responded to my 2nd part.
Hi just a note when I use the example code at https://github.com/raspberrypi/picamera2/blob/main/examples/pyav_capture.py
I cannot open the resulting video files in QuickTime or iMovie on a Mac. The files do open in VLC but hang a bit on the first frame. If I convert them using ffmpeg they then do work but this introduces another step.
This appears resolved from my testing, MP4s now work in Vegas Pro 22
https://github.com/Gordon999/PI_imx500_detection
This appears resolved from my testing, MP4s now work in Vegas Pro 22
https://github.com/Gordon999/PI_imx500_detection
Great, glad it's behaving better, even if no idea why!