langflow
langflow copied to clipboard
⚡️ Speed up function `data_to_text` by 47,222% in PR #6981 (`data-to-json`)
⚡️ This pull request contains optimizations for PR #6981
If you approve this dependent PR, these changes will be merged into the original PR branch data-to-json.
This PR will be automatically closed if the original PR is merged.
📄 47,222% (472.22x) speedup for data_to_text in src/backend/base/langflow/helpers/data.py
⏱️ Runtime : 7.13 milliseconds → 15.1 microseconds (best of 24 runs)
📝 Explanation and details
Let's optimize the existing code for better performance. We will improve some parts for faster execution and less memory usage by avoiding unnecessary checks and simplifying the loop within the data_to_text_list function.
Optimizations.
- Removed unnecessary
data_listcreation and type checking by directly working withdataand handling theJSONtype check within the main loop. - Simplified data extraction and
format_dictupdates. - Updated error-raising to be more direct and inline, minimizing concatenation operations and redundant dictionary checks.
- Converted
sepparameter handling to use a more concise approach within thedata_to_textfunction.
By making these changes, we streamline the code, improve its readability, and eliminate some redundant operations, all of which contribute to better execution performance.
✅ Correctness verification report:
| Test | Status |
|---|---|
| ⚙️ Existing Unit Tests | 🔘 None Found |
| 🌀 Generated Regression Tests | ✅ 7 Passed |
| ⏪ Replay Tests | 🔘 None Found |
| 🔎 Concolic Coverage Tests | 🔘 None Found |
| 📊 Tests Coverage |
🌀 Generated Regression Tests Details
from collections import defaultdict
# imports
import pytest # used for our unit tests
from langflow.helpers.data import data_to_text
# function to test
class JSON:
"""Represents a JSON object.
Attributes:
data (dict): The JSON data.
"""
def __init__(self, data):
self.data = data
from langflow.helpers.data import data_to_text
# unit tests
def test_basic_list_json():
# List of JSON Objects with Simple Template
template = "Hello {text}"
data = [JSON(data={"text": "Alice"}), JSON(data={"text": "Bob"})]
codeflash_output = data_to_text(template, data)
def test_missing_key_list_json():
# Missing Key in List of JSON Objects
template = "Hello {text}"
data = [JSON(data={"name": "Alice"}), JSON(data={"text": "Bob"})]
codeflash_output = data_to_text(template, data)
def test_nested_dict_list_json():
# List of JSON Objects with Nested Dictionaries
template = "{name} is {age} years old"
data = [JSON(data={"data": {"name": "Alice", "age": 25}}), JSON(data={"data": {"name": "Bob", "age": 30}})]
codeflash_output = data_to_text(template, data)
def test_template_none():
# Template is None
template = None
data = JSON(data={"text": "world"})
with pytest.raises(ValueError):
data_to_text(template, data)
def test_template_not_string():
# Template is Not a String
template = 12345
data = JSON(data={"text": "world"})
with pytest.raises(TypeError):
data_to_text(template, data)
def test_empty_data_list():
# Empty Data List
template = "Hello {text}"
data = []
codeflash_output = data_to_text(template, data)
def test_data_none():
# Data is None
template = "Hello {text}"
data = None
codeflash_output = data_to_text(template, data)
def test_large_list_json():
# Large List of JSON Objects
template = "Hello {text}"
data = [JSON(data={"text": f"User{i}"}) for i in range(1000)]
expected_output = "\n".join([f"Hello User{i}" for i in range(1000)])
codeflash_output = data_to_text(template, data)
def test_custom_separator():
# Custom Separator
template = "Hello {text}"
data = [JSON(data={"text": "Alice"}), JSON(data={"text": "Bob"})]
codeflash_output = data_to_text(template, data, sep=", ")
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
from collections import defaultdict
# imports
import pytest # used for our unit tests
from langflow.helpers.data import data_to_text
# function to test
class Data:
pass
class JSON(Data):
# We will phase out the Data class in favor of the JSON class
# But they'll have the same interface and will be interchangeable
"""Represents a JSON object.
Attributes:
data (dict): The JSON data.
"""
def __init__(self, data):
self.data = data
from langflow.helpers.data import data_to_text
# unit tests
def test_list_json_simple_template():
# List of JSON objects with simple template
template = "Hello {text}"
data = [JSON(data={"text": "Alice"}), JSON(data={"text": "Bob"})]
codeflash_output = data_to_text(template, data)
def test_list_json_nested_dict():
# List of JSON objects with nested dictionary
template = "{name} is {age}"
data = [JSON(data={"data": {"name": "Alice", "age": 25}}), JSON(data={"data": {"name": "Bob", "age": 30}})]
codeflash_output = data_to_text(template, data)
def test_template_none():
# Template is None
template = None
data = JSON(data={"text": "world"})
with pytest.raises(ValueError):
data_to_text(template, data)
def test_template_not_string():
# Template is not a string
template = 123
data = JSON(data={"text": "world"})
with pytest.raises(TypeError):
data_to_text(template, data)
def test_data_none():
# Data is None
template = "Hello {text}"
data = None
codeflash_output = data_to_text(template, data)
def test_large_list_json_objects():
# Large list of JSON objects
template = "Hello {text}"
data = [JSON(data={"text": f"User{i}"}) for i in range(1000)]
expected_output = "\n".join([f"Hello User{i}" for i in range(1000)])
codeflash_output = data_to_text(template, data)
def test_custom_separator():
# Custom separator
template = "Hello {text}"
data = [JSON(data={"text": "Alice"}), JSON(data={"text": "Bob"})]
codeflash_output = data_to_text(template, data, sep=", ")
def test_no_separator():
# No separator (empty string)
template = "Hello {text}"
data = [JSON(data={"text": "Alice"}), JSON(data={"text": "Bob"})]
codeflash_output = data_to_text(template, data, sep="")
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
To edit these changes git checkout codeflash/optimize-pr6981-2025-04-24T20.09.18 and push.