langflow icon indicating copy to clipboard operation
langflow copied to clipboard

⚡️ Speed up function `data_to_text` by 47,222% in PR #6981 (`data-to-json`)

Open codeflash-ai[bot] opened this issue 7 months ago • 0 comments

⚡️ This pull request contains optimizations for PR #6981

If you approve this dependent PR, these changes will be merged into the original PR branch data-to-json.

This PR will be automatically closed if the original PR is merged.


📄 47,222% (472.22x) speedup for data_to_text in src/backend/base/langflow/helpers/data.py

⏱️ Runtime : 7.13 milliseconds 15.1 microseconds (best of 24 runs)

📝 Explanation and details

Let's optimize the existing code for better performance. We will improve some parts for faster execution and less memory usage by avoiding unnecessary checks and simplifying the loop within the data_to_text_list function.

Optimizations.

  1. Removed unnecessary data_list creation and type checking by directly working with data and handling the JSON type check within the main loop.
  2. Simplified data extraction and format_dict updates.
  3. Updated error-raising to be more direct and inline, minimizing concatenation operations and redundant dictionary checks.
  4. Converted sep parameter handling to use a more concise approach within the data_to_text function.

By making these changes, we streamline the code, improve its readability, and eliminate some redundant operations, all of which contribute to better execution performance.

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 🔘 None Found
🌀 Generated Regression Tests 7 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 🔘 None Found
📊 Tests Coverage
🌀 Generated Regression Tests Details
from collections import defaultdict

# imports
import pytest  # used for our unit tests
from langflow.helpers.data import data_to_text


# function to test
class JSON:
    """Represents a JSON object.

    Attributes:
        data (dict): The JSON data.
    """
    def __init__(self, data):
        self.data = data
from langflow.helpers.data import data_to_text

# unit tests


def test_basic_list_json():
    # List of JSON Objects with Simple Template
    template = "Hello {text}"
    data = [JSON(data={"text": "Alice"}), JSON(data={"text": "Bob"})]
    codeflash_output = data_to_text(template, data)


def test_missing_key_list_json():
    # Missing Key in List of JSON Objects
    template = "Hello {text}"
    data = [JSON(data={"name": "Alice"}), JSON(data={"text": "Bob"})]
    codeflash_output = data_to_text(template, data)


def test_nested_dict_list_json():
    # List of JSON Objects with Nested Dictionaries
    template = "{name} is {age} years old"
    data = [JSON(data={"data": {"name": "Alice", "age": 25}}), JSON(data={"data": {"name": "Bob", "age": 30}})]
    codeflash_output = data_to_text(template, data)

def test_template_none():
    # Template is None
    template = None
    data = JSON(data={"text": "world"})
    with pytest.raises(ValueError):
        data_to_text(template, data)

def test_template_not_string():
    # Template is Not a String
    template = 12345
    data = JSON(data={"text": "world"})
    with pytest.raises(TypeError):
        data_to_text(template, data)


def test_empty_data_list():
    # Empty Data List
    template = "Hello {text}"
    data = []
    codeflash_output = data_to_text(template, data)

def test_data_none():
    # Data is None
    template = "Hello {text}"
    data = None
    codeflash_output = data_to_text(template, data)




def test_large_list_json():
    # Large List of JSON Objects
    template = "Hello {text}"
    data = [JSON(data={"text": f"User{i}"}) for i in range(1000)]
    expected_output = "\n".join([f"Hello User{i}" for i in range(1000)])
    codeflash_output = data_to_text(template, data)




def test_custom_separator():
    # Custom Separator
    template = "Hello {text}"
    data = [JSON(data={"text": "Alice"}), JSON(data={"text": "Bob"})]
    codeflash_output = data_to_text(template, data, sep=", ")
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

from collections import defaultdict

# imports
import pytest  # used for our unit tests
from langflow.helpers.data import data_to_text


# function to test
class Data:
    pass

class JSON(Data):
    # We will phase out the Data class in favor of the JSON class
    # But they'll have the same interface and will be interchangeable
    """Represents a JSON object.

    Attributes:
        data (dict): The JSON data.
    """
    def __init__(self, data):
        self.data = data
from langflow.helpers.data import data_to_text

# unit tests



def test_list_json_simple_template():
    # List of JSON objects with simple template
    template = "Hello {text}"
    data = [JSON(data={"text": "Alice"}), JSON(data={"text": "Bob"})]
    codeflash_output = data_to_text(template, data)

def test_list_json_nested_dict():
    # List of JSON objects with nested dictionary
    template = "{name} is {age}"
    data = [JSON(data={"data": {"name": "Alice", "age": 25}}), JSON(data={"data": {"name": "Bob", "age": 30}})]
    codeflash_output = data_to_text(template, data)




def test_template_none():
    # Template is None
    template = None
    data = JSON(data={"text": "world"})
    with pytest.raises(ValueError):
        data_to_text(template, data)

def test_template_not_string():
    # Template is not a string
    template = 123
    data = JSON(data={"text": "world"})
    with pytest.raises(TypeError):
        data_to_text(template, data)

def test_data_none():
    # Data is None
    template = "Hello {text}"
    data = None
    codeflash_output = data_to_text(template, data)



def test_large_list_json_objects():
    # Large list of JSON objects
    template = "Hello {text}"
    data = [JSON(data={"text": f"User{i}"}) for i in range(1000)]
    expected_output = "\n".join([f"Hello User{i}" for i in range(1000)])
    codeflash_output = data_to_text(template, data)



def test_custom_separator():
    # Custom separator
    template = "Hello {text}"
    data = [JSON(data={"text": "Alice"}), JSON(data={"text": "Bob"})]
    codeflash_output = data_to_text(template, data, sep=", ")

def test_no_separator():
    # No separator (empty string)
    template = "Hello {text}"
    data = [JSON(data={"text": "Alice"}), JSON(data={"text": "Bob"})]
    codeflash_output = data_to_text(template, data, sep="")
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

To edit these changes git checkout codeflash/optimize-pr6981-2025-04-24T20.09.18 and push.

Codeflash

codeflash-ai[bot] avatar Apr 24 '25 20:04 codeflash-ai[bot]