langflow icon indicating copy to clipboard operation
langflow copied to clipboard

⚡️ Speed up function `get_artifact_type` by 32% in PR #6981 (`data-to-json`)

Open codeflash-ai[bot] opened this issue 7 months ago • 0 comments

⚡️ This pull request contains optimizations for PR #6981

If you approve this dependent PR, these changes will be merged into the original PR branch data-to-json.

This PR will be automatically closed if the original PR is merged.


📄 32% (0.32x) speedup for get_artifact_type in src/backend/base/langflow/graph/utils.py

⏱️ Runtime : 168 microseconds 128 microseconds (best of 155 runs)

📝 Explanation and details

To optimize the given Python program for faster execution, we can focus on reducing unnecessary operations and streamlining type checks. Here's the optimized version.

Changes Made.

  1. Removed the match-case statement: Although match-case (structural pattern matching) is elegant, it can introduce overhead compared to direct type checks with isinstance.
  2. Reordered checks for efficiency: The checks for JSON, str, dict, list and Message appear first before analyzing the build_result.
  3. Consolidated Generator checks: Only one final check for Generator is needed if none of the other types match.
  4. Immediate returns: Instead of setting a variable and checking again at the end, the function returns the result immediately when the type is determined.

This reduces the overhead of multiple checks and simplifies the logic flow, making the function more efficient.

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 🔘 None Found
🌀 Generated Regression Tests 42 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 🔘 None Found
📊 Tests Coverage
🌀 Generated Regression Tests Details
from __future__ import annotations

import json
from collections.abc import Generator
from enum import Enum
from typing import Any
from uuid import UUID

# imports
import pytest  # used for our unit tests
from langflow.graph.utils import get_artifact_type
from loguru import logger


class ArtifactType(str, Enum):
    TEXT = "text"
    RECORD = "record"
    OBJECT = "object"
    ARRAY = "array"
    STREAM = "stream"
    UNKNOWN = "unknown"
    MESSAGE = "message"

class JSON:
    def __init__(self, data):
        self.data = data

class Message(JSON):
    def __init__(self, text="", sender=None):
        super().__init__(data={})
        self.text = text
        self.sender = sender
from langflow.graph.utils import get_artifact_type


# unit tests
def test_basic_types():
    # Test JSON object
    codeflash_output = get_artifact_type(JSON(data={"key": "value"}), None)
    # Test string
    codeflash_output = get_artifact_type("This is a string", None)
    # Test dictionary
    codeflash_output = get_artifact_type({"key": "value"}, None)
    # Test list
    codeflash_output = get_artifact_type([1, 2, 3], None)
    # Test Message object
    codeflash_output = get_artifact_type(Message(text="Hello", sender="user"), None)

def test_edge_cases():
    # Test empty string
    codeflash_output = get_artifact_type("", None)
    # Test empty dictionary
    codeflash_output = get_artifact_type({}, None)
    # Test empty list
    codeflash_output = get_artifact_type([], None)
    # Test None value
    codeflash_output = get_artifact_type(None, None)

def test_mixed_types():
    # Test dictionary with mixed types
    codeflash_output = get_artifact_type({"key1": "value1", "key2": [1, 2, 3], "key3": {"nested_key": "nested_value"}}, None)
    # Test list with mixed types
    codeflash_output = get_artifact_type(["string", 123, {"key": "value"}, [1, 2, 3]], None)


def test_complex_nested_structures():
    # Test nested JSON object
    codeflash_output = get_artifact_type(JSON(data={"key": {"nested_key": "nested_value"}}), None)
    # Test nested list of dictionaries
    codeflash_output = get_artifact_type([{"key": "value"}, {"key2": "value2"}], None)

def test_large_scale():
    # Test large dictionary
    large_dict = {f"key_{i}": f"value_{i}" for i in range(1000)}
    codeflash_output = get_artifact_type(large_dict, None)
    # Test large list
    large_list = [i for i in range(1000)]
    codeflash_output = get_artifact_type(large_list, None)

def test_invalid_and_unexpected_types():
    # Test integer
    codeflash_output = get_artifact_type(123, None)
    # Test float
    codeflash_output = get_artifact_type(123.45, None)
    # Test boolean
    codeflash_output = get_artifact_type(True, None)
    # Test custom object
    codeflash_output = get_artifact_type(object(), None)



from __future__ import annotations

import json
from collections.abc import Generator
from enum import Enum
from typing import Any
from uuid import UUID

# imports
import pytest  # used for our unit tests
from langflow.graph.utils import get_artifact_type


class ArtifactType(str, Enum):
    TEXT = "text"
    RECORD = "record"
    OBJECT = "object"
    ARRAY = "array"
    STREAM = "stream"
    UNKNOWN = "unknown"
    MESSAGE = "message"

class JSON:
    def __init__(self, data=None):
        self.data = data if data is not None else {}

class Message(JSON):
    def __init__(self, text=None, sender=None):
        super().__init__()
        self.text = text
        self.sender = sender
from langflow.graph.utils import get_artifact_type


# unit tests
def test_basic_json():
    # Test with JSON type
    codeflash_output = get_artifact_type(JSON(), None)

def test_basic_string():
    # Test with string type
    codeflash_output = get_artifact_type("example text", None)

def test_basic_dict():
    # Test with dictionary type
    codeflash_output = get_artifact_type({"key": "value"}, None)

def test_basic_list():
    # Test with list type
    codeflash_output = get_artifact_type([1, 2, 3], None)

def test_basic_message():
    # Test with Message type
    codeflash_output = get_artifact_type(Message(), None)

def test_unknown_integer():
    # Test with integer type
    codeflash_output = get_artifact_type(42, None)

def test_unknown_float():
    # Test with float type
    codeflash_output = get_artifact_type(3.14, None)

def test_unknown_custom_object():
    # Test with custom object type
    class CustomObject:
        pass
    codeflash_output = get_artifact_type(CustomObject(), None)



def test_edge_empty_string():
    # Test with empty string
    codeflash_output = get_artifact_type("", None)

def test_edge_empty_dict():
    # Test with empty dictionary
    codeflash_output = get_artifact_type({}, None)

def test_edge_empty_list():
    # Test with empty list
    codeflash_output = get_artifact_type([], None)

def test_edge_none_value():
    # Test with None value
    codeflash_output = get_artifact_type(None, None)

def test_edge_boolean_value():
    # Test with boolean value
    codeflash_output = get_artifact_type(True, None)

def test_complex_nested_dict():
    # Test with nested dictionary
    codeflash_output = get_artifact_type({"nested": {"key": "value"}}, None)

def test_complex_list_of_dicts():
    # Test with list of dictionaries
    codeflash_output = get_artifact_type([{"key1": "value1"}, {"key2": "value2"}], None)

def test_complex_message_with_nested_json():
    # Test with Message having nested JSON
    codeflash_output = get_artifact_type(Message(text=JSON(data={"key": "value"})), None)

def test_large_scale_list():
    # Test with large list
    codeflash_output = get_artifact_type(list(range(1000)), None)

def test_large_scale_dict():
    # Test with large dictionary
    codeflash_output = get_artifact_type({i: i for i in range(1000)}, None)

def test_large_scale_json():
    # Test with large JSON
    codeflash_output = get_artifact_type(JSON(data={i: i for i in range(1000)}), None)

def test_large_scale_message():
    # Test with large Message
    codeflash_output = get_artifact_type(Message(text="a" * 1000000), None)

def test_mixed_list():
    # Test with list of mixed types
    codeflash_output = get_artifact_type([1, "text", {"key": "value"}], None)

def test_special_characters_string():
    # Test with string containing special characters
    codeflash_output = get_artifact_type("!@#$%^&*()", None)

def test_special_characters_dict():
    # Test with dictionary containing special characters in keys
    codeflash_output = get_artifact_type({"!@#": "$%^"}, None)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

To edit these changes git checkout codeflash/optimize-pr6981-2025-04-24T19.42.55 and push.

Codeflash

codeflash-ai[bot] avatar Apr 24 '25 19:04 codeflash-ai[bot]