inference
inference copied to clipboard
⚡️ Speed up function `serialise_image` by 679% in PR #1672 (`bruno/parent-metadata-serialization`)
⚡️ This pull request contains optimizations for PR #1672
If you approve this dependent PR, these changes will be merged into the original PR branch bruno/parent-metadata-serialization.
This PR will be automatically closed if the original PR is merged.
📄 679% (6.79x) speedup for serialise_image in inference/core/workflows/core_steps/common/serializers.py
⏱️ Runtime : 7.36 milliseconds → 944 microseconds (best of 306 runs)
📝 Explanation and details
The optimization eliminates expensive Pydantic model instantiation and serialization by replacing ParentOrigin.from_origin_coordinates_system().model_dump() calls with a simple helper function _parent_origin_as_dict() that directly constructs the required dictionary.
Key changes:
- Bypassed Pydantic overhead: The original code created
ParentOriginBaseModel instances just to immediately serialize them to dictionaries viamodel_dump(). This involves validation, field processing, and internal Pydantic machinery. - Direct dictionary construction: The new
_parent_origin_as_dict()function directly maps the four required fields (offset_x,offset_y,width,height) from the origin coordinates to a dictionary, eliminating all intermediate object creation.
Why this is faster:
- Reduced object allocation: Eliminates creation of temporary Pydantic model instances that are immediately discarded
- Avoided validation overhead: Skips Pydantic's field validation and processing pipeline
- Simplified data flow: Direct attribute access and dictionary construction is much faster than class instantiation + serialization
Performance characteristics:
The optimization shows dramatic speedups (400-700%) specifically for test cases involving crops/slices where parent_metadata.parent_id != root_metadata.parent_id, as these trigger the expensive Pydantic operations. Non-crop cases see minimal impact since they skip this code path entirely. The profiler data confirms this - the original code spent 44.2% of its time in ParentOrigin.from_origin_coordinates_system() and model_dump() calls, which are completely eliminated in the optimized version.
✅ Correctness verification report:
| Test | Status |
|---|---|
| ⏪ Replay Tests | 🔘 None Found |
| ⚙️ Existing Unit Tests | 🔘 None Found |
| 🔎 Concolic Coverage Tests | 🔘 None Found |
| 🌀 Generated Regression Tests | ✅ 1137 Passed |
| 📊 Tests Coverage | 100.0% |
🌀 Generated Regression Tests and Runtime
from typing import Any, Dict
# imports
import pytest
from inference.core.workflows.core_steps.common.serializers import \
serialise_image
# --- Mocked classes and constants for testing ---
# Constants used in the serializer
PARENT_ID_KEY = "parent_id"
PARENT_ORIGIN_KEY = "parent_origin"
ROOT_PARENT_ID_KEY = "root_parent_id"
ROOT_PARENT_ORIGIN_KEY = "root_parent_origin"
# Mock OriginCoordinatesSystem
class OriginCoordinatesSystem:
def __init__(self, left_top_x, left_top_y, origin_width, origin_height):
self.left_top_x = left_top_x
self.left_top_y = left_top_y
self.origin_width = origin_width
self.origin_height = origin_height
# Mock VideoMetadata
class VideoMetadata:
def __init__(self, duration, frame_rate):
self.duration = duration
self.frame_rate = frame_rate
def dict(self):
return {"duration": self.duration, "frame_rate": self.frame_rate}
# Mock ParentMetadata
class ParentMetadata:
def __init__(self, parent_id, origin_coordinates):
self.parent_id = parent_id
self.origin_coordinates = origin_coordinates
# Mock WorkflowImageData
class WorkflowImageData:
def __init__(
self,
base64_image,
parent_metadata,
workflow_root_ancestor_metadata,
video_metadata=None,
):
self.base64_image = base64_image
self.parent_metadata = parent_metadata
self.workflow_root_ancestor_metadata = workflow_root_ancestor_metadata
self.video_metadata = video_metadata
from inference.core.workflows.core_steps.common.serializers import \
serialise_image
# --- Unit tests ---
# 1. Basic Test Cases
def test_basic_serialisation_no_video_metadata():
"""Test basic serialisation with no video metadata and parent/root IDs are equal (no crop/slice)."""
origin = OriginCoordinatesSystem(0, 0, 100, 100)
parent_metadata = ParentMetadata("parent1", origin)
root_metadata = ParentMetadata("parent1", origin)
image = WorkflowImageData("abc123", parent_metadata, root_metadata)
codeflash_output = serialise_image(image); result = codeflash_output # 1.38μs -> 1.38μs (0.000% faster)
def test_basic_serialisation_with_video_metadata():
"""Test basic serialisation with video metadata present."""
origin = OriginCoordinatesSystem(10, 20, 200, 300)
parent_metadata = ParentMetadata("parent1", origin)
root_metadata = ParentMetadata("parent1", origin)
video_metadata = VideoMetadata(duration=10.5, frame_rate=30)
image = WorkflowImageData("xyz789", parent_metadata, root_metadata, video_metadata)
codeflash_output = serialise_image(image); result = codeflash_output # 1.94μs -> 1.83μs (6.00% faster)
def test_basic_crop_serialisation():
"""Test serialisation for an image that is a crop (parent/root IDs differ)."""
parent_origin = OriginCoordinatesSystem(5, 10, 50, 60)
root_origin = OriginCoordinatesSystem(0, 0, 100, 120)
parent_metadata = ParentMetadata("parent2", parent_origin)
root_metadata = ParentMetadata("parent1", root_origin)
image = WorkflowImageData("cropdata", parent_metadata, root_metadata)
codeflash_output = serialise_image(image); result = codeflash_output # 16.4μs -> 3.02μs (445% faster)
def test_basic_crop_with_video_metadata():
"""Test crop/slice serialisation with video metadata."""
parent_origin = OriginCoordinatesSystem(1, 2, 3, 4)
root_origin = OriginCoordinatesSystem(0, 0, 10, 10)
parent_metadata = ParentMetadata("parentB", parent_origin)
root_metadata = ParentMetadata("parentA", root_origin)
video_metadata = VideoMetadata(duration=99.9, frame_rate=60)
image = WorkflowImageData("vidimg", parent_metadata, root_metadata, video_metadata)
codeflash_output = serialise_image(image); result = codeflash_output # 15.7μs -> 3.35μs (369% faster)
# 2. Edge Test Cases
def test_empty_base64_image():
"""Test with an empty base64 image string."""
origin = OriginCoordinatesSystem(0, 0, 1, 1)
parent_metadata = ParentMetadata("id1", origin)
root_metadata = ParentMetadata("id1", origin)
image = WorkflowImageData("", parent_metadata, root_metadata)
codeflash_output = serialise_image(image); result = codeflash_output # 1.26μs -> 1.25μs (0.718% faster)
def test_zero_offsets_and_minimal_dimensions():
"""Test with zero offsets and minimal dimensions."""
origin = OriginCoordinatesSystem(0, 0, 1, 1)
parent_metadata = ParentMetadata("id1", origin)
root_metadata = ParentMetadata("id2", origin)
image = WorkflowImageData("minimg", parent_metadata, root_metadata)
codeflash_output = serialise_image(image); result = codeflash_output # 15.7μs -> 2.98μs (429% faster)
def test_negative_offsets():
"""Test with negative offsets in origin coordinates."""
parent_origin = OriginCoordinatesSystem(-10, -20, 50, 60)
root_origin = OriginCoordinatesSystem(-100, -200, 500, 600)
parent_metadata = ParentMetadata("parentX", parent_origin)
root_metadata = ParentMetadata("parentY", root_origin)
image = WorkflowImageData("negimg", parent_metadata, root_metadata)
codeflash_output = serialise_image(image); result = codeflash_output # 16.2μs -> 3.09μs (424% faster)
def test_none_video_metadata():
"""Test with video_metadata explicitly set to None."""
origin = OriginCoordinatesSystem(1, 2, 3, 4)
parent_metadata = ParentMetadata("parent", origin)
root_metadata = ParentMetadata("parent", origin)
image = WorkflowImageData("nonevid", parent_metadata, root_metadata, None)
codeflash_output = serialise_image(image); result = codeflash_output # 1.19μs -> 1.21μs (1.73% slower)
def test_large_offsets_and_dimensions():
"""Test with very large offsets and dimensions."""
parent_origin = OriginCoordinatesSystem(999999, 888888, 777777, 666666)
root_origin = OriginCoordinatesSystem(444444, 333333, 222222, 111111)
parent_metadata = ParentMetadata("pL", parent_origin)
root_metadata = ParentMetadata("rL", root_origin)
image = WorkflowImageData("largeimg", parent_metadata, root_metadata)
codeflash_output = serialise_image(image); result = codeflash_output # 16.2μs -> 3.10μs (420% faster)
def test_parent_id_and_root_id_are_empty_strings():
"""Test with empty strings for parent_id and root_parent_id."""
origin = OriginCoordinatesSystem(1, 2, 3, 4)
parent_metadata = ParentMetadata("", origin)
root_metadata = ParentMetadata("", origin)
image = WorkflowImageData("emptyidimg", parent_metadata, root_metadata)
codeflash_output = serialise_image(image); result = codeflash_output # 1.19μs -> 1.24μs (4.03% slower)
def test_parent_and_root_ids_are_none():
"""Test with None for parent_id and root_parent_id."""
origin = OriginCoordinatesSystem(1, 2, 3, 4)
parent_metadata = ParentMetadata(None, origin)
root_metadata = ParentMetadata(None, origin)
image = WorkflowImageData("noneidimg", parent_metadata, root_metadata)
codeflash_output = serialise_image(image); result = codeflash_output # 1.38μs -> 1.43μs (3.49% slower)
def test_parent_and_root_ids_are_different_none_and_str():
"""Test with parent_id None and root_parent_id as string."""
origin = OriginCoordinatesSystem(1, 2, 3, 4)
parent_metadata = ParentMetadata(None, origin)
root_metadata = ParentMetadata("root", origin)
image = WorkflowImageData("diffidimg", parent_metadata, root_metadata)
codeflash_output = serialise_image(image); result = codeflash_output # 16.7μs -> 3.18μs (426% faster)
def test_parent_and_root_ids_are_different_str_and_none():
"""Test with parent_id as string and root_parent_id None."""
origin = OriginCoordinatesSystem(1, 2, 3, 4)
parent_metadata = ParentMetadata("parent", origin)
root_metadata = ParentMetadata(None, origin)
image = WorkflowImageData("diffidimg2", parent_metadata, root_metadata)
codeflash_output = serialise_image(image); result = codeflash_output # 15.6μs -> 3.10μs (404% faster)
# 3. Large Scale Test Cases
def test_large_base64_image_string():
"""Test with a large base64 image string (1000 chars)."""
large_base64 = "A" * 1000
origin = OriginCoordinatesSystem(0, 0, 100, 100)
parent_metadata = ParentMetadata("parent", origin)
root_metadata = ParentMetadata("parent", origin)
image = WorkflowImageData(large_base64, parent_metadata, root_metadata)
codeflash_output = serialise_image(image); result = codeflash_output # 1.17μs -> 1.22μs (4.09% slower)
def test_large_number_of_unique_crop_metadata():
"""Test serialisation for many unique crop slices (1000 distinct parent/root pairs)."""
for i in range(1, 1001):
parent_origin = OriginCoordinatesSystem(i, i+1, i+2, i+3)
root_origin = OriginCoordinatesSystem(i+4, i+5, i+6, i+7)
parent_metadata = ParentMetadata(f"parent_{i}", parent_origin)
root_metadata = ParentMetadata(f"root_{i}", root_origin)
image = WorkflowImageData(f"img_{i}", parent_metadata, root_metadata)
codeflash_output = serialise_image(image); result = codeflash_output # 6.35ms -> 778μs (715% faster)
def test_large_video_metadata_values():
"""Test with large values in video metadata."""
origin = OriginCoordinatesSystem(0, 0, 100, 100)
parent_metadata = ParentMetadata("parent", origin)
root_metadata = ParentMetadata("parent", origin)
video_metadata = VideoMetadata(duration=1e6, frame_rate=1e3)
image = WorkflowImageData("largevid", parent_metadata, root_metadata, video_metadata)
codeflash_output = serialise_image(image); result = codeflash_output # 1.44μs -> 1.38μs (4.27% faster)
def test_large_dimensions_and_offsets():
"""Test with maximal dimensions and offsets within reasonable bounds."""
max_val = 999_999_999
origin = OriginCoordinatesSystem(max_val, max_val, max_val, max_val)
parent_metadata = ParentMetadata("parent_max", origin)
root_metadata = ParentMetadata("root_max", origin)
image = WorkflowImageData("maximg", parent_metadata, root_metadata)
codeflash_output = serialise_image(image); result = codeflash_output # 17.3μs -> 2.31μs (649% faster)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
from dataclasses import dataclass
from typing import Any, Dict, Optional
# imports
import pytest
from inference.core.workflows.core_steps.common.serializers import \
serialise_image
# --- Mocked dependencies and constants for testing ---
# Constants used in serialise_image
PARENT_ID_KEY = "parent_id"
PARENT_ORIGIN_KEY = "parent_origin"
ROOT_PARENT_ID_KEY = "root_parent_id"
ROOT_PARENT_ORIGIN_KEY = "root_parent_origin"
# Mock for video_metadata with a dict() method
@dataclass
class VideoMetadata:
frame_idx: int
timestamp: float
def dict(self):
return {"frame_idx": self.frame_idx, "timestamp": self.timestamp}
# Mock for OriginCoordinatesSystem
@dataclass
class OriginCoordinatesSystem:
left_top_x: int
left_top_y: int
origin_width: int
origin_height: int
# Mock for ParentMetadata
@dataclass
class ParentMetadata:
parent_id: str
origin_coordinates: OriginCoordinatesSystem
# Mock for WorkflowImageData
@dataclass
class WorkflowImageData:
base64_image: str
video_metadata: Optional[VideoMetadata]
parent_metadata: ParentMetadata
workflow_root_ancestor_metadata: ParentMetadata
from inference.core.workflows.core_steps.common.serializers import \
serialise_image
# --- Unit Tests ---
# 1. Basic Test Cases
def test_serialise_image_basic_no_crop():
"""Test with a normal image, no crop (parent_id == root_parent_id), with video metadata."""
origin = OriginCoordinatesSystem(0, 0, 100, 100)
parent_meta = ParentMetadata("img_1", origin)
root_meta = ParentMetadata("img_1", origin)
video_meta = VideoMetadata(frame_idx=0, timestamp=1.23)
image = WorkflowImageData("abc123==", video_meta, parent_meta, root_meta)
codeflash_output = serialise_image(image); result = codeflash_output # 1.95μs -> 1.85μs (5.39% faster)
def test_serialise_image_basic_crop():
"""Test with a cropped image (parent_id != root_parent_id), with video metadata."""
crop_origin = OriginCoordinatesSystem(10, 20, 50, 60)
root_origin = OriginCoordinatesSystem(0, 0, 100, 100)
parent_meta = ParentMetadata("img_2", crop_origin)
root_meta = ParentMetadata("img_1", root_origin)
video_meta = VideoMetadata(frame_idx=5, timestamp=9.87)
image = WorkflowImageData("def456==", video_meta, parent_meta, root_meta)
codeflash_output = serialise_image(image); result = codeflash_output # 18.6μs -> 3.48μs (436% faster)
def test_serialise_image_basic_no_video_metadata():
"""Test with no video metadata (video_metadata is None)."""
origin = OriginCoordinatesSystem(0, 0, 100, 100)
parent_meta = ParentMetadata("img_1", origin)
root_meta = ParentMetadata("img_1", origin)
image = WorkflowImageData("xyz789==", None, parent_meta, root_meta)
codeflash_output = serialise_image(image); result = codeflash_output # 1.21μs -> 1.20μs (0.832% faster)
# 2. Edge Test Cases
def test_serialise_image_edge_empty_base64():
"""Test with empty base64 string."""
origin = OriginCoordinatesSystem(0, 0, 1, 1)
parent_meta = ParentMetadata("img_1", origin)
root_meta = ParentMetadata("img_1", origin)
image = WorkflowImageData("", None, parent_meta, root_meta)
codeflash_output = serialise_image(image); result = codeflash_output # 1.01μs -> 1.11μs (8.99% slower)
def test_serialise_image_edge_zero_offsets_and_min_size():
"""Test with zero offsets and minimum positive size."""
origin = OriginCoordinatesSystem(0, 0, 1, 1)
crop_origin = OriginCoordinatesSystem(0, 0, 1, 1)
parent_meta = ParentMetadata("crop_img", crop_origin)
root_meta = ParentMetadata("root_img", origin)
image = WorkflowImageData("tiny==", None, parent_meta, root_meta)
codeflash_output = serialise_image(image); result = codeflash_output # 17.6μs -> 3.09μs (471% faster)
def test_serialise_image_edge_negative_offsets():
"""Test with negative offsets (simulate crop outside parent)."""
crop_origin = OriginCoordinatesSystem(-10, -20, 30, 40)
root_origin = OriginCoordinatesSystem(0, 0, 100, 100)
parent_meta = ParentMetadata("crop_img", crop_origin)
root_meta = ParentMetadata("root_img", root_origin)
image = WorkflowImageData("neg==", None, parent_meta, root_meta)
codeflash_output = serialise_image(image); result = codeflash_output # 16.8μs -> 2.48μs (578% faster)
def test_serialise_image_edge_large_offsets_and_sizes():
"""Test with very large offsets and sizes."""
crop_origin = OriginCoordinatesSystem(999999, 888888, 777777, 666666)
root_origin = OriginCoordinatesSystem(0, 0, 1000000, 1000000)
parent_meta = ParentMetadata("large_crop", crop_origin)
root_meta = ParentMetadata("large_root", root_origin)
image = WorkflowImageData("largebase64==", None, parent_meta, root_meta)
codeflash_output = serialise_image(image); result = codeflash_output # 16.8μs -> 2.65μs (531% faster)
def test_serialise_image_edge_parent_id_empty_string():
"""Test with empty string for parent_id."""
origin = OriginCoordinatesSystem(0, 0, 100, 100)
parent_meta = ParentMetadata("", origin)
root_meta = ParentMetadata("root_img", origin)
image = WorkflowImageData("emptyid==", None, parent_meta, root_meta)
codeflash_output = serialise_image(image); result = codeflash_output # 15.6μs -> 2.56μs (510% faster)
def test_serialise_image_edge_parent_id_none():
"""Test with None for parent_id (should not crash, but None != root id triggers crop metadata)."""
origin = OriginCoordinatesSystem(0, 0, 100, 100)
parent_meta = ParentMetadata(None, origin)
root_meta = ParentMetadata("root_img", origin)
image = WorkflowImageData("noneid==", None, parent_meta, root_meta)
codeflash_output = serialise_image(image); result = codeflash_output # 15.5μs -> 3.00μs (419% faster)
# 3. Large Scale Test Cases
def test_serialise_image_large_base64_string():
"""Test with a very large base64 string (simulate large image)."""
origin = OriginCoordinatesSystem(0, 0, 1000, 1000)
parent_meta = ParentMetadata("img_1", origin)
root_meta = ParentMetadata("img_1", origin)
large_base64 = "A" * 1000 # 1000 chars
image = WorkflowImageData(large_base64, None, parent_meta, root_meta)
codeflash_output = serialise_image(image); result = codeflash_output # 1.07μs -> 1.12μs (4.54% slower)
def test_serialise_image_large_number_of_crops():
"""Test serialising many cropped images with different parent/root ids and origins."""
for i in range(10): # Keep under 1000 for performance
crop_origin = OriginCoordinatesSystem(i, i*2, 100+i, 200+i)
root_origin = OriginCoordinatesSystem(0, 0, 1000, 1000)
parent_meta = ParentMetadata(f"crop_{i}", crop_origin)
root_meta = ParentMetadata("root_img", root_origin)
image = WorkflowImageData(f"img{i}==", None, parent_meta, root_meta)
codeflash_output = serialise_image(image); result = codeflash_output # 82.2μs -> 11.7μs (605% faster)
def test_serialise_image_large_video_metadata():
"""Test with large video metadata values."""
origin = OriginCoordinatesSystem(0, 0, 100, 100)
parent_meta = ParentMetadata("img_1", origin)
root_meta = ParentMetadata("img_1", origin)
video_meta = VideoMetadata(frame_idx=999999, timestamp=123456.789)
image = WorkflowImageData("bigvideo==", video_meta, parent_meta, root_meta)
codeflash_output = serialise_image(image); result = codeflash_output # 1.54μs -> 1.51μs (1.98% faster)
def test_serialise_image_large_scale_stress():
"""Stress test: serialise 100 images with varying metadata."""
for i in range(100): # Keep under 1000 for performance
crop_origin = OriginCoordinatesSystem(i, i+1, i+2, i+3)
root_origin = OriginCoordinatesSystem(0, 0, 1000, 1000)
parent_meta = ParentMetadata(f"crop_{i}", crop_origin)
root_meta = ParentMetadata("root_img", root_origin)
video_meta = VideoMetadata(frame_idx=i, timestamp=i*0.1)
image = WorkflowImageData(f"img{i}==", video_meta, parent_meta, root_meta)
codeflash_output = serialise_image(image); result = codeflash_output # 677μs -> 94.7μs (616% faster)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
To edit these changes git checkout codeflash/optimize-pr1672-2025-11-02T01.50.52 and push.