langflow
langflow copied to clipboard
⚡️ Speed up function `get_artifact_type` by 32% in PR #6981 (`data-to-json`)
⚡️ This pull request contains optimizations for PR #6981
If you approve this dependent PR, these changes will be merged into the original PR branch data-to-json.
This PR will be automatically closed if the original PR is merged.
📄 32% (0.32x) speedup for get_artifact_type in src/backend/base/langflow/graph/utils.py
⏱️ Runtime : 168 microseconds → 128 microseconds (best of 155 runs)
📝 Explanation and details
To optimize the given Python program for faster execution, we can focus on reducing unnecessary operations and streamlining type checks. Here's the optimized version.
Changes Made.
- Removed the
match-casestatement: Althoughmatch-case(structural pattern matching) is elegant, it can introduce overhead compared to direct type checks withisinstance. - Reordered checks for efficiency: The checks for
JSON,str,dict,listandMessageappear first before analyzing thebuild_result. - Consolidated
Generatorchecks: Only one final check forGeneratoris needed if none of the other types match. - Immediate returns: Instead of setting a variable and checking again at the end, the function returns the result immediately when the type is determined.
This reduces the overhead of multiple checks and simplifies the logic flow, making the function more efficient.
✅ Correctness verification report:
| Test | Status |
|---|---|
| ⚙️ Existing Unit Tests | 🔘 None Found |
| 🌀 Generated Regression Tests | ✅ 42 Passed |
| ⏪ Replay Tests | 🔘 None Found |
| 🔎 Concolic Coverage Tests | 🔘 None Found |
| 📊 Tests Coverage |
🌀 Generated Regression Tests Details
from __future__ import annotations
import json
from collections.abc import Generator
from enum import Enum
from typing import Any
from uuid import UUID
# imports
import pytest # used for our unit tests
from langflow.graph.utils import get_artifact_type
from loguru import logger
class ArtifactType(str, Enum):
TEXT = "text"
RECORD = "record"
OBJECT = "object"
ARRAY = "array"
STREAM = "stream"
UNKNOWN = "unknown"
MESSAGE = "message"
class JSON:
def __init__(self, data):
self.data = data
class Message(JSON):
def __init__(self, text="", sender=None):
super().__init__(data={})
self.text = text
self.sender = sender
from langflow.graph.utils import get_artifact_type
# unit tests
def test_basic_types():
# Test JSON object
codeflash_output = get_artifact_type(JSON(data={"key": "value"}), None)
# Test string
codeflash_output = get_artifact_type("This is a string", None)
# Test dictionary
codeflash_output = get_artifact_type({"key": "value"}, None)
# Test list
codeflash_output = get_artifact_type([1, 2, 3], None)
# Test Message object
codeflash_output = get_artifact_type(Message(text="Hello", sender="user"), None)
def test_edge_cases():
# Test empty string
codeflash_output = get_artifact_type("", None)
# Test empty dictionary
codeflash_output = get_artifact_type({}, None)
# Test empty list
codeflash_output = get_artifact_type([], None)
# Test None value
codeflash_output = get_artifact_type(None, None)
def test_mixed_types():
# Test dictionary with mixed types
codeflash_output = get_artifact_type({"key1": "value1", "key2": [1, 2, 3], "key3": {"nested_key": "nested_value"}}, None)
# Test list with mixed types
codeflash_output = get_artifact_type(["string", 123, {"key": "value"}, [1, 2, 3]], None)
def test_complex_nested_structures():
# Test nested JSON object
codeflash_output = get_artifact_type(JSON(data={"key": {"nested_key": "nested_value"}}), None)
# Test nested list of dictionaries
codeflash_output = get_artifact_type([{"key": "value"}, {"key2": "value2"}], None)
def test_large_scale():
# Test large dictionary
large_dict = {f"key_{i}": f"value_{i}" for i in range(1000)}
codeflash_output = get_artifact_type(large_dict, None)
# Test large list
large_list = [i for i in range(1000)]
codeflash_output = get_artifact_type(large_list, None)
def test_invalid_and_unexpected_types():
# Test integer
codeflash_output = get_artifact_type(123, None)
# Test float
codeflash_output = get_artifact_type(123.45, None)
# Test boolean
codeflash_output = get_artifact_type(True, None)
# Test custom object
codeflash_output = get_artifact_type(object(), None)
from __future__ import annotations
import json
from collections.abc import Generator
from enum import Enum
from typing import Any
from uuid import UUID
# imports
import pytest # used for our unit tests
from langflow.graph.utils import get_artifact_type
class ArtifactType(str, Enum):
TEXT = "text"
RECORD = "record"
OBJECT = "object"
ARRAY = "array"
STREAM = "stream"
UNKNOWN = "unknown"
MESSAGE = "message"
class JSON:
def __init__(self, data=None):
self.data = data if data is not None else {}
class Message(JSON):
def __init__(self, text=None, sender=None):
super().__init__()
self.text = text
self.sender = sender
from langflow.graph.utils import get_artifact_type
# unit tests
def test_basic_json():
# Test with JSON type
codeflash_output = get_artifact_type(JSON(), None)
def test_basic_string():
# Test with string type
codeflash_output = get_artifact_type("example text", None)
def test_basic_dict():
# Test with dictionary type
codeflash_output = get_artifact_type({"key": "value"}, None)
def test_basic_list():
# Test with list type
codeflash_output = get_artifact_type([1, 2, 3], None)
def test_basic_message():
# Test with Message type
codeflash_output = get_artifact_type(Message(), None)
def test_unknown_integer():
# Test with integer type
codeflash_output = get_artifact_type(42, None)
def test_unknown_float():
# Test with float type
codeflash_output = get_artifact_type(3.14, None)
def test_unknown_custom_object():
# Test with custom object type
class CustomObject:
pass
codeflash_output = get_artifact_type(CustomObject(), None)
def test_edge_empty_string():
# Test with empty string
codeflash_output = get_artifact_type("", None)
def test_edge_empty_dict():
# Test with empty dictionary
codeflash_output = get_artifact_type({}, None)
def test_edge_empty_list():
# Test with empty list
codeflash_output = get_artifact_type([], None)
def test_edge_none_value():
# Test with None value
codeflash_output = get_artifact_type(None, None)
def test_edge_boolean_value():
# Test with boolean value
codeflash_output = get_artifact_type(True, None)
def test_complex_nested_dict():
# Test with nested dictionary
codeflash_output = get_artifact_type({"nested": {"key": "value"}}, None)
def test_complex_list_of_dicts():
# Test with list of dictionaries
codeflash_output = get_artifact_type([{"key1": "value1"}, {"key2": "value2"}], None)
def test_complex_message_with_nested_json():
# Test with Message having nested JSON
codeflash_output = get_artifact_type(Message(text=JSON(data={"key": "value"})), None)
def test_large_scale_list():
# Test with large list
codeflash_output = get_artifact_type(list(range(1000)), None)
def test_large_scale_dict():
# Test with large dictionary
codeflash_output = get_artifact_type({i: i for i in range(1000)}, None)
def test_large_scale_json():
# Test with large JSON
codeflash_output = get_artifact_type(JSON(data={i: i for i in range(1000)}), None)
def test_large_scale_message():
# Test with large Message
codeflash_output = get_artifact_type(Message(text="a" * 1000000), None)
def test_mixed_list():
# Test with list of mixed types
codeflash_output = get_artifact_type([1, "text", {"key": "value"}], None)
def test_special_characters_string():
# Test with string containing special characters
codeflash_output = get_artifact_type("!@#$%^&*()", None)
def test_special_characters_dict():
# Test with dictionary containing special characters in keys
codeflash_output = get_artifact_type({"!@#": "$%^"}, None)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
To edit these changes git checkout codeflash/optimize-pr6981-2025-04-24T19.42.55 and push.