fix: enhance Filter Data with JQ Query, Column Selection and Index selection
Consolidates JSON parsing, data filtering, and key extraction into a single streamlined component. Adds support for JQ queries, column selection, and index-based filtering, eliminating the need for separate Parse JSON and Extract Key components.
If this is approved we can remove the following:
src/backend/base/langflow/components/processing/extract_key.py
src/backend/base/langflow/components/processing/filter_data_values.py
src/backend/base/langflow/components/processing/parse_json_data.py
Sure can do!
Hey @vasconceloscezar
Please, check 12bac03 (#4774) and see if that makes sense.
Essentially, since we strive for returning a data object to maintain compatibility between components, I made it so if the jq query returns a list, it puts it inside the Data object.
Why don't we keep it as a Data list? I rather have a list being returned than a results field.
It's counterintuitive having a list being transformed in a result. Especially if the starter JSON is a list already.
I believe we soon will have more components to handle Data Lists and so on, so this whole List handling will be even better.
Example of not ideal results:
Start Data List:
Filtering with: .[2] | {"missionId": .missionId}
This is not ideal, bc the user will need to add another component just for re-parse that list again.
Thoughts @ogabrielluiz ?
It is unintuitive but it would be better to add a component that makes flow work rather than having them change the component they want to use because it does not work with a list of Data objects.
@vasconceloscezar I've just added a new type: langflow.schema.data_set.DataSet (we can improve it in this PR) that can deal with a list of Data so we can display that more clearly now.
β‘οΈ Codeflash found optimizations for this PR
π 3,866% (38.66x) speedup for FilterDataComponent._apply_column_filter in src/backend/base/langflow/components/processing/filter_data.py
β±οΈ Runtime : 11.1 milliseconds β 281 microseconds (best of 148 runs)
π Explanation and details
Sure, here is the optimized version of the program.
Changes made.
- Changed
columnsparameter type tosetfor faster membership checking. - Converted
columnslist to a setcolumns_setinside the function if it was a list, to take advantage ofO(1)average-time complexity for set membership checks compared toO(n)for lists.
β Correctness verification report:
| Test | Status |
|---|---|
| βοΈ Existing Unit Tests | π None Found |
| π Generated Regression Tests | β 29 Passed |
| βͺ Replay Tests | π None Found |
| π Concolic Coverage Tests | π None Found |
| π Tests Coverage | undefined |
π Generated Regression Tests Details
from typing import Any
# imports
import pytest # used for our unit tests
from langflow.components.processing.filter_data import FilterDataComponent
# function to test
from langflow.custom import Component
# unit tests
# Initialize the class instance
filter_data_component = FilterDataComponent()
def test_basic_functionality():
# Basic functionality test case
data = {'a': 1, 'b': 2, 'c': 3}
columns = ['a', 'c']
expected = {'a': 1, 'c': 3}
codeflash_output = filter_data_component._apply_column_filter(data, columns)
def test_no_matching_columns():
# No matching columns test case
data = {'a': 1, 'b': 2, 'c': 3}
columns = ['d', 'e']
expected = {}
codeflash_output = filter_data_component._apply_column_filter(data, columns)
def test_empty_data():
# Empty data test case
data = {}
columns = ['a', 'b']
expected = {}
codeflash_output = filter_data_component._apply_column_filter(data, columns)
def test_empty_columns():
# Empty columns test case
data = {'a': 1, 'b': 2, 'c': 3}
columns = []
expected = {}
codeflash_output = filter_data_component._apply_column_filter(data, columns)
def test_non_dictionary_data():
# Non-dictionary data test cases
data = [1, 2, 3]
columns = ['a', 'b']
expected = [1, 2, 3]
codeflash_output = filter_data_component._apply_column_filter(data, columns)
data = "string"
columns = ['a', 'b']
expected = "string"
codeflash_output = filter_data_component._apply_column_filter(data, columns)
def test_non_string_keys():
# Non-string keys test case
data = {1: 'one', 2: 'two'}
columns = [1]
expected = {1: 'one'}
codeflash_output = filter_data_component._apply_column_filter(data, columns)
def test_mixed_data_types_in_columns():
# Mixed data types in columns test case
data = {'a': 1, 'b': 2, 3: 'three'}
columns = ['a', 3]
expected = {'a': 1, 3: 'three'}
codeflash_output = filter_data_component._apply_column_filter(data, columns)
def test_large_scale():
# Large scale test case
data = {f'key_{i}': i for i in range(1000)}
columns = [f'key_{i}' for i in range(1000)]
expected = {f'key_{i}': i for i in range(1000)}
codeflash_output = filter_data_component._apply_column_filter(data, columns)
def test_nested_dictionaries():
# Nested dictionaries test case
data = {'a': {'nested': 1}, 'b': 2}
columns = ['a']
expected = {'a': {'nested': 1}}
codeflash_output = filter_data_component._apply_column_filter(data, columns)
def test_columns_with_non_existent_keys():
# Columns with non-existent keys test case
data = {'a': 1, 'b': 2}
columns = ['a', 'c']
expected = {'a': 1}
codeflash_output = filter_data_component._apply_column_filter(data, columns)
def test_case_sensitivity():
# Case sensitivity test case
data = {'a': 1, 'A': 2}
columns = ['a']
expected = {'a': 1}
codeflash_output = filter_data_component._apply_column_filter(data, columns)
def test_immutable_data_types():
# Immutable data types test case
data = (1, 2, 3)
columns = ['a', 'b']
expected = (1, 2, 3)
codeflash_output = filter_data_component._apply_column_filter(data, columns)
def test_columns_with_duplicates():
# Columns with duplicates test case
data = {'a': 1, 'b': 2, 'c': 3}
columns = ['a', 'a', 'b']
expected = {'a': 1, 'b': 2}
codeflash_output = filter_data_component._apply_column_filter(data, columns)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
from typing import Any
# imports
import pytest # used for our unit tests
from langflow.components.processing.filter_data import FilterDataComponent
# function to test
from langflow.custom import Component
# unit tests
# Basic Functionality
def test_standard_dict_with_matching_columns():
component = FilterDataComponent()
data = {'a': 1, 'b': 2, 'c': 3}
columns = ['a', 'c']
codeflash_output = component._apply_column_filter(data, columns)
def test_standard_dict_with_non_matching_columns():
component = FilterDataComponent()
data = {'a': 1, 'b': 2, 'c': 3}
columns = ['d', 'e']
codeflash_output = component._apply_column_filter(data, columns)
# Edge Cases
def test_empty_data_dict():
component = FilterDataComponent()
data = {}
columns = ['a', 'b']
codeflash_output = component._apply_column_filter(data, columns)
def test_empty_columns_list():
component = FilterDataComponent()
data = {'a': 1, 'b': 2}
columns = []
codeflash_output = component._apply_column_filter(data, columns)
def test_columns_list_with_all_keys():
component = FilterDataComponent()
data = {'a': 1, 'b': 2}
columns = ['a', 'b']
codeflash_output = component._apply_column_filter(data, columns)
# Non-Dictionary Data
def test_list_input():
component = FilterDataComponent()
data = [1, 2, 3]
columns = ['a', 'b']
codeflash_output = component._apply_column_filter(data, columns)
def test_string_input():
component = FilterDataComponent()
data = 'string'
columns = ['a', 'b']
codeflash_output = component._apply_column_filter(data, columns)
def test_integer_input():
component = FilterDataComponent()
data = 123
columns = ['a', 'b']
codeflash_output = component._apply_column_filter(data, columns)
# Mixed Data Types in Dictionary
def test_dict_with_mixed_value_types():
component = FilterDataComponent()
data = {'a': 1, 'b': 'string', 'c': [1, 2, 3]}
columns = ['a', 'c']
codeflash_output = component._apply_column_filter(data, columns)
# Large Scale Test Cases
def test_large_dict():
component = FilterDataComponent()
data = {f'key{i}': i for i in range(1000)}
columns = [f'key{i}' for i in range(5000, 6000)]
expected_output = {f'key{i}': i for i in range(5000, 6000)}
codeflash_output = component._apply_column_filter(data, columns)
def test_large_columns_list():
component = FilterDataComponent()
data = {f'key{i}': i for i in range(100)}
columns = [f'key{i}' for i in range(1000)]
expected_output = {f'key{i}': i for i in range(100)}
codeflash_output = component._apply_column_filter(data, columns)
# Duplicate Columns in List
def test_duplicate_columns():
component = FilterDataComponent()
data = {'a': 1, 'b': 2, 'c': 3}
columns = ['a', 'a', 'b']
codeflash_output = component._apply_column_filter(data, columns)
# Columns Not Strings
def test_non_string_columns():
component = FilterDataComponent()
data = {'a': 1, 'b': 2, 'c': 3}
columns = [1, 2, 3]
codeflash_output = component._apply_column_filter(data, columns)
# Case Sensitivity
def test_case_sensitivity_in_columns():
component = FilterDataComponent()
data = {'a': 1, 'A': 2}
columns = ['a']
codeflash_output = component._apply_column_filter(data, columns)
def test_case_sensitivity_with_mixed_case_columns():
component = FilterDataComponent()
data = {'a': 1, 'A': 2}
columns = ['a', 'A']
codeflash_output = component._apply_column_filter(data, columns)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
β‘οΈ Codeflash found optimizations for this PR
π 60% (0.60x) speedup for FilterDataComponent._apply_column_filter in src/backend/base/langflow/components/processing/filter_data.py
β±οΈ Runtime : 323 microseconds β 201 microseconds (best of 94 runs)
π Explanation and details
Here's an optimized version of your program.
Optimizations.
- Avoiding set conversion: The original code converts the
columnslist to a set (columns_set), which has some overhead. Instead, we iterate directly over thecolumnslist and check if each element exists in thedatadictionary. - Direct dictionary access: The optimized version directly accesses the dictionary with
data[k]for any key incolumns, only if it exists indata, reducing the need for the additional check of set membership.
β Correctness verification report:
| Test | Status |
|---|---|
| βοΈ Existing Unit Tests | π None Found |
| π Generated Regression Tests | β 34 Passed |
| βͺ Replay Tests | π None Found |
| π Concolic Coverage Tests | π None Found |
| π Tests Coverage | undefined |
π Generated Regression Tests Details
from typing import Any
# imports
import pytest # used for our unit tests
from langflow.components.processing.filter_data import FilterDataComponent
# function to test
from langflow.custom import Component
# unit tests
# Basic Functionality
def test_standard_dictionary_matching_columns():
component = FilterDataComponent()
data = {'a': 1, 'b': 2, 'c': 3}
columns = ['a', 'b']
codeflash_output = component._apply_column_filter(data, columns)
def test_standard_dictionary_no_matching_columns():
component = FilterDataComponent()
data = {'a': 1, 'b': 2, 'c': 3}
columns = ['d', 'e']
codeflash_output = component._apply_column_filter(data, columns)
# Edge Cases
def test_empty_dictionary():
component = FilterDataComponent()
data = {}
columns = ['a', 'b']
codeflash_output = component._apply_column_filter(data, columns)
def test_empty_columns_list():
component = FilterDataComponent()
data = {'a': 1, 'b': 2}
columns = []
codeflash_output = component._apply_column_filter(data, columns)
def test_columns_list_with_non_existent_keys():
component = FilterDataComponent()
data = {'a': 1, 'b': 2}
columns = ['c', 'd']
codeflash_output = component._apply_column_filter(data, columns)
# Non-Dictionary Data
def test_list_input():
component = FilterDataComponent()
data = [1, 2, 3]
columns = ['a', 'b']
codeflash_output = component._apply_column_filter(data, columns)
def test_string_input():
component = FilterDataComponent()
data = 'hello'
columns = ['h', 'e']
codeflash_output = component._apply_column_filter(data, columns)
def test_integer_input():
component = FilterDataComponent()
data = 123
columns = ['1', '2']
codeflash_output = component._apply_column_filter(data, columns)
# Complex Data Structures
def test_nested_dictionary():
component = FilterDataComponent()
data = {'a': {'b': 2}, 'c': 3}
columns = ['a', 'c']
codeflash_output = component._apply_column_filter(data, columns)
def test_dictionary_with_list_values():
component = FilterDataComponent()
data = {'a': [1, 2], 'b': [3, 4]}
columns = ['a']
codeflash_output = component._apply_column_filter(data, columns)
# Large Scale Test Cases
def test_large_dictionary():
component = FilterDataComponent()
data = {f'key{i}': i for i in range(1000)}
columns = [f'key{i}' for i in range(500)]
expected_output = {f'key{i}': i for i in range(500)}
codeflash_output = component._apply_column_filter(data, columns)
def test_large_columns_list():
component = FilterDataComponent()
data = {'a': 1, 'b': 2, 'c': 3}
columns = [f'key{i}' for i in range(1000)]
codeflash_output = component._apply_column_filter(data, columns)
# Special Characters in Keys
def test_special_characters_in_keys():
component = FilterDataComponent()
data = {'a!': 1, 'b@': 2, 'c#': 3}
columns = ['a!', 'b@']
codeflash_output = component._apply_column_filter(data, columns)
# Mixed Data Types in Columns List
def test_mixed_data_types_in_columns_list():
component = FilterDataComponent()
data = {'a': 1, 'b': 2}
columns = ['a', 2]
codeflash_output = component._apply_column_filter(data, columns)
# Unicode Characters in Keys
def test_unicode_characters_in_keys():
component = FilterDataComponent()
data = {'δ½ ε₯½': 1, 'δΈη': 2}
columns = ['δ½ ε₯½']
codeflash_output = component._apply_column_filter(data, columns)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
from typing import Any
# imports
import pytest # used for our unit tests
from langflow.components.processing.filter_data import FilterDataComponent
# function to test
from langflow.custom import Component
# unit tests
@pytest.fixture
def filter_component():
return FilterDataComponent()
# Basic Functionality
def test_standard_dict_with_matching_columns(filter_component):
data = {'a': 1, 'b': 2, 'c': 3}
columns = ['a', 'b']
expected = {'a': 1, 'b': 2}
codeflash_output = filter_component._apply_column_filter(data, columns)
def test_standard_dict_with_non_matching_columns(filter_component):
data = {'a': 1, 'b': 2, 'c': 3}
columns = ['d', 'e']
expected = {}
codeflash_output = filter_component._apply_column_filter(data, columns)
# Edge Cases
def test_empty_dict(filter_component):
data = {}
columns = ['a', 'b']
expected = {}
codeflash_output = filter_component._apply_column_filter(data, columns)
def test_empty_columns_list(filter_component):
data = {'a': 1, 'b': 2, 'c': 3}
columns = []
expected = {}
codeflash_output = filter_component._apply_column_filter(data, columns)
def test_empty_dict_and_empty_columns_list(filter_component):
data = {}
columns = []
expected = {}
codeflash_output = filter_component._apply_column_filter(data, columns)
def test_columns_list_with_keys_not_in_dict(filter_component):
data = {'a': 1, 'b': 2}
columns = ['c', 'd']
expected = {}
codeflash_output = filter_component._apply_column_filter(data, columns)
# Data Type Variations
def test_non_dict_data_list(filter_component):
data = [1, 2, 3]
columns = ['a', 'b']
expected = [1, 2, 3]
codeflash_output = filter_component._apply_column_filter(data, columns)
def test_non_dict_data_string(filter_component):
data = 'string'
columns = ['a', 'b']
expected = 'string'
codeflash_output = filter_component._apply_column_filter(data, columns)
def test_non_dict_data_int(filter_component):
data = 123
columns = ['a', 'b']
expected = 123
codeflash_output = filter_component._apply_column_filter(data, columns)
def test_dict_with_non_string_keys(filter_component):
data = {1: 'a', 2: 'b'}
columns = ['1', '2']
expected = {}
codeflash_output = filter_component._apply_column_filter(data, columns)
# Nested Dictionaries
def test_nested_dict_with_matching_columns(filter_component):
data = {'a': {'nested': 1}, 'b': 2}
columns = ['a']
expected = {'a': {'nested': 1}}
codeflash_output = filter_component._apply_column_filter(data, columns)
def test_nested_dict_with_non_matching_columns(filter_component):
data = {'a': {'nested': 1}, 'b': 2}
columns = ['c']
expected = {}
codeflash_output = filter_component._apply_column_filter(data, columns)
# Large Scale Test Cases
def test_large_dict_with_partial_column_match(filter_component):
data = {f'key{i}': i for i in range(1000)}
columns = [f'key{i}' for i in range(500)]
expected = {f'key{i}': i for i in range(500)}
codeflash_output = filter_component._apply_column_filter(data, columns)
def test_large_dict_with_no_column_match(filter_component):
data = {f'key{i}': i for i in range(1000)}
columns = [f'key{i}' for i in range(1000, 2000)]
expected = {}
codeflash_output = filter_component._apply_column_filter(data, columns)
# Special Characters in Keys
def test_dict_with_special_character_keys(filter_component):
data = {'a!@#': 1, 'b$%^': 2}
columns = ['a!@#']
expected = {'a!@#': 1}
codeflash_output = filter_component._apply_column_filter(data, columns)
def test_dict_with_whitespace_in_keys(filter_component):
data = {'a key': 1, 'another key': 2}
columns = ['a key']
expected = {'a key': 1}
codeflash_output = filter_component._apply_column_filter(data, columns)
# Case Sensitivity
def test_dict_with_mixed_case_keys(filter_component):
data = {'Key': 1, 'key': 2}
columns = ['Key']
expected = {'Key': 1}
codeflash_output = filter_component._apply_column_filter(data, columns)
def test_columns_list_with_different_cases(filter_component):
data = {'Key': 1, 'key': 2}
columns = ['key']
expected = {'key': 2}
codeflash_output = filter_component._apply_column_filter(data, columns)
# Duplicate Columns in List
def test_columns_list_with_duplicates(filter_component):
data = {'a': 1, 'b': 2, 'c': 3}
columns = ['a', 'a', 'b']
expected = {'a': 1, 'b': 2}
codeflash_output = filter_component._apply_column_filter(data, columns)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
β‘οΈ Codeflash found optimizations for this PR
π 36% (0.36x) speedup for FilterDataComponent._filter_by_index in src/backend/base/langflow/components/processing/filter_data.py
β±οΈ Runtime : 859 microseconds β 630 microseconds (best of 5 runs)
π Explanation and details
Here are some optimizations to improve the performance of the provided code.
β Correctness verification report:
| Test | Status |
|---|---|
| βοΈ Existing Unit Tests | π None Found |
| π Generated Regression Tests | β 10 Passed |
| βͺ Replay Tests | π None Found |
| π Concolic Coverage Tests | π None Found |
| π Tests Coverage | undefined |
π Generated Regression Tests Details
from __future__ import annotations
from collections.abc import Callable
from typing import Any
import pandas as pd
# imports
import pytest # used for our unit tests
from cachetools import TTLCache
from langflow.components.processing.filter_data import FilterDataComponent
from langflow.custom import Component
from langflow.custom.custom_component.base_component import BaseComponent
from langflow.graph.vertex.base import Vertex
from langflow.schema import Data
from langflow.schema.schema import OutputValue
from langflow.services.tracing.schema import Log
from langflow.services.tracing.service import TracingService
# unit tests
class TestFilterDataComponent:
@pytest.fixture
def component(self):
# Create an instance of FilterDataComponent for testing
component = FilterDataComponent()
return component
from __future__ import annotations
import sys
import time
import tracemalloc
from collections.abc import Callable
from typing import Any
import pandas as pd
# imports
import pytest # used for our unit tests
from cachetools import TTLCache
from langflow.components.processing.filter_data import FilterDataComponent
from langflow.custom import Component
from langflow.custom.custom_component.base_component import BaseComponent
from langflow.graph.vertex.base import Vertex
from langflow.schema import Data
from langflow.schema.schema import OutputValue
from langflow.services.tracing.schema import Log
from langflow.services.tracing.service import TracingService
# unit tests
# Basic Functionality
def test_multiple_rows_dataframe():
df = pd.DataFrame({'A': [1, 2, 3]})
component = FilterDataComponent()
component.index = 1
codeflash_output = component._filter_by_index(df)
# Edge Cases
def test_index_out_of_range():
df = pd.DataFrame({'A': [1, 2, 3]})
component = FilterDataComponent()
component.index = 5
with pytest.raises(IndexError):
component._filter_by_index(df)
def test_index_none():
df = pd.DataFrame({'A': [1, 2, 3]})
component = FilterDataComponent()
component.index = None
codeflash_output = component._filter_by_index(df)
# DataFrame with Various Data Types
def test_different_data_types():
df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c'], 'C': [1.1, 2.2, 3.3]})
component = FilterDataComponent()
component.index = 1
codeflash_output = component._filter_by_index(df)
# Large Scale Test Cases
def test_index_max_integer():
df = pd.DataFrame({'A': range(100)})
component = FilterDataComponent()
component.index = sys.maxsize
with pytest.raises(IndexError):
component._filter_by_index(df)
def test_index_min_integer():
df = pd.DataFrame({'A': range(100)})
component = FilterDataComponent()
component.index = -sys.maxsize - 1
with pytest.raises(IndexError):
component._filter_by_index(df)
# Performance and Scalability
β‘οΈ Codeflash found optimizations for this PR
π 269% (2.69x) speedup for FilterDataComponent._is_safe_jq_query in src/backend/base/langflow/components/processing/filter_data.py
β±οΈ Runtime : 614 microseconds β 166 microseconds (best of 97 runs)
π Explanation and details
Here is an optimized version of the given program.
Changes Made.
-
Short-Circuit Check on Length.
- Added a check for the length of the query string at the beginning of the method before performing the character validation. This avoids the unnecessary computation if the query is too long.
-
Avoiding
setLookup for Characters.- Instead of using
setforsafe_chars, which incurs the overhead of constructing the set and checking membership against aset, I used astr. Theinoperator is more efficient for short strings as compared to sets in Python.
- Instead of using
By implementing these changes, the program avoids unnecessary checks and constructs, potentially improving the overall performance of the function.
β Correctness verification report:
| Test | Status |
|---|---|
| βοΈ Existing Unit Tests | π None Found |
| π Generated Regression Tests | β 84 Passed |
| βͺ Replay Tests | π None Found |
| π Concolic Coverage Tests | π None Found |
| π Tests Coverage | undefined |
π Generated Regression Tests Details
import pytest # used for our unit tests
from langflow.components.processing.filter_data import FilterDataComponent
# function to test
from langflow.custom import Component
# unit tests
@pytest.fixture
def component():
return FilterDataComponent(max_query_length=100)
def test_valid_queries_with_safe_characters(component):
codeflash_output = component._is_safe_jq_query(".")
codeflash_output = component._is_safe_jq_query("foo.bar")
codeflash_output = component._is_safe_jq_query("foo[0].bar")
codeflash_output = component._is_safe_jq_query("foo + bar - baz")
codeflash_output = component._is_safe_jq_query("foo * bar / baz")
def test_invalid_queries_with_unsafe_characters(component):
codeflash_output = not component._is_safe_jq_query("foo$bar")
codeflash_output = not component._is_safe_jq_query("foo@bar")
codeflash_output = not component._is_safe_jq_query("foo#bar")
codeflash_output = not component._is_safe_jq_query("foo!bar")
codeflash_output = not component._is_safe_jq_query("foo:bar")
def test_queries_exceeding_maximum_length(component):
codeflash_output = not component._is_safe_jq_query("a" * 101)
codeflash_output = not component._is_safe_jq_query("foo.bar.baz" * 11)
def test_empty_query(component):
codeflash_output = component._is_safe_jq_query("")
def test_queries_with_only_whitespace(component):
codeflash_output = component._is_safe_jq_query(" ")
codeflash_output = component._is_safe_jq_query("\t")
codeflash_output = component._is_safe_jq_query("\n")
codeflash_output = component._is_safe_jq_query(" ")
def test_queries_with_mixed_safe_and_unsafe_characters(component):
codeflash_output = not component._is_safe_jq_query("foo.bar$baz")
codeflash_output = not component._is_safe_jq_query("[email protected]")
codeflash_output = not component._is_safe_jq_query("foo#bar.baz")
def test_queries_with_edge_case_characters(component):
codeflash_output = component._is_safe_jq_query("aZ0.[]() +-*/<>=|,")
codeflash_output = component._is_safe_jq_query("A1.[]() +-*/<>=|,")
def test_queries_with_nested_structures(component):
codeflash_output = component._is_safe_jq_query("foo[0][1]")
codeflash_output = component._is_safe_jq_query("foo[bar[baz]]")
codeflash_output = component._is_safe_jq_query("foo(bar(baz))")
def test_queries_with_complex_expressions(component):
codeflash_output = component._is_safe_jq_query("foo + bar - baz * qux / quux")
codeflash_output = component._is_safe_jq_query("foo > bar && baz < qux || quux == corge")
def test_queries_with_commas_and_pipes(component):
codeflash_output = component._is_safe_jq_query("foo, bar, baz")
codeflash_output = component._is_safe_jq_query("foo | bar | baz")
def test_queries_with_maximum_length(component):
codeflash_output = component._is_safe_jq_query("a" * 100)
codeflash_output = component._is_safe_jq_query("foo.bar" * 14 + "foo")
def test_queries_with_special_characters_at_start_or_end(component):
codeflash_output = component._is_safe_jq_query(".foo")
codeflash_output = component._is_safe_jq_query("foo.")
codeflash_output = component._is_safe_jq_query("[foo]")
codeflash_output = component._is_safe_jq_query("foo[]")
codeflash_output = component._is_safe_jq_query("foo()")
def test_queries_with_repeated_characters(component):
codeflash_output = component._is_safe_jq_query("foo...bar")
codeflash_output = component._is_safe_jq_query("foo+++bar")
codeflash_output = component._is_safe_jq_query("foo***bar")
def test_queries_with_mixed_case_sensitivity(component):
codeflash_output = component._is_safe_jq_query("Foo.Bar")
codeflash_output = component._is_safe_jq_query("foo.BAR")
codeflash_output = component._is_safe_jq_query("FOO.bar")
def test_queries_with_numeric_values(component):
codeflash_output = component._is_safe_jq_query("foo123")
codeflash_output = component._is_safe_jq_query("foo[123]")
codeflash_output = component._is_safe_jq_query("foo + 123")
def test_queries_with_edge_case_lengths(component):
codeflash_output = component._is_safe_jq_query("a" * 99)
codeflash_output = not component._is_safe_jq_query("a" * 101)
def test_large_scale_queries(component):
codeflash_output = component._is_safe_jq_query("a" * 50 + "b" * 50)
codeflash_output = not component._is_safe_jq_query("foo.bar" * 15)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
import pytest # used for our unit tests
from langflow.components.processing.filter_data import FilterDataComponent
# function to test
from langflow.custom import Component
# unit tests
# Fixture to create an instance of FilterDataComponent with a specific max_query_length
@pytest.fixture
def filter_data_component():
return FilterDataComponent(max_query_length=50)
def test_basic_valid_input(filter_data_component):
# Basic valid inputs
codeflash_output = filter_data_component._is_safe_jq_query("a.b.c")
codeflash_output = filter_data_component._is_safe_jq_query("123[4](5)")
codeflash_output = filter_data_component._is_safe_jq_query("a + b - c * d / e")
codeflash_output = filter_data_component._is_safe_jq_query("x < y >= z")
codeflash_output = filter_data_component._is_safe_jq_query("foo|bar, baz")
def test_empty_query(filter_data_component):
# Empty query should be valid
codeflash_output = filter_data_component._is_safe_jq_query("")
def test_maximum_length_query(filter_data_component):
# Query exactly at the maximum length limit
codeflash_output = filter_data_component._is_safe_jq_query("a" * 50)
def test_exceeding_maximum_length_query(filter_data_component):
# Query exceeding the maximum length limit
codeflash_output = filter_data_component._is_safe_jq_query("a" * 51)
def test_invalid_characters(filter_data_component):
# Queries containing invalid characters
codeflash_output = filter_data_component._is_safe_jq_query("a$b")
codeflash_output = filter_data_component._is_safe_jq_query("query_with_@_symbol")
codeflash_output = filter_data_component._is_safe_jq_query("query_with_#_symbol")
codeflash_output = filter_data_component._is_safe_jq_query("query_with_!_symbol")
def test_edge_cases_for_special_characters(filter_data_component):
# Queries with special characters at the boundaries
codeflash_output = filter_data_component._is_safe_jq_query(".")
codeflash_output = filter_data_component._is_safe_jq_query("[]")
def test_mixed_valid_and_invalid_characters(filter_data_component):
# Queries containing a mix of valid and invalid characters
codeflash_output = filter_data_component._is_safe_jq_query("a.b$c")
codeflash_output = filter_data_component._is_safe_jq_query("x+y@z")
def test_whitespace_handling(filter_data_component):
# Queries with leading, trailing, or multiple consecutive spaces
codeflash_output = filter_data_component._is_safe_jq_query(" a.b ")
codeflash_output = filter_data_component._is_safe_jq_query("a + b")
def test_single_character_queries(filter_data_component):
# Queries with a single character, both valid and invalid
codeflash_output = filter_data_component._is_safe_jq_query("a")
codeflash_output = filter_data_component._is_safe_jq_query("1")
codeflash_output = filter_data_component._is_safe_jq_query("+")
codeflash_output = filter_data_component._is_safe_jq_query("@")
codeflash_output = filter_data_component._is_safe_jq_query("$")
def test_boundary_conditions(filter_data_component):
# Queries just below and just above the maximum length
codeflash_output = filter_data_component._is_safe_jq_query("a" * 49)
codeflash_output = filter_data_component._is_safe_jq_query("a" * 51)
def test_complex_valid_queries(filter_data_component):
# Complex valid queries
codeflash_output = filter_data_component._is_safe_jq_query("a.b[1](2) + c.d - e.f / g.h * i.j")
codeflash_output = filter_data_component._is_safe_jq_query("foo | bar, baz < qux >= quux")
def test_performance_and_scalability():
# Large queries to assess performance
large_component = FilterDataComponent(max_query_length=1000)
codeflash_output = large_component._is_safe_jq_query("a" * 1000)
codeflash_output = large_component._is_safe_jq_query("a" * 999)
def test_unicode_and_non_ascii_characters(filter_data_component):
# Queries containing Unicode or non-ASCII characters
codeflash_output = filter_data_component._is_safe_jq_query("δ½ ε₯½")
codeflash_output = filter_data_component._is_safe_jq_query("γγγ«γ‘γ―")
codeflash_output = filter_data_component._is_safe_jq_query("ΠΡΠΈΠ²Π΅Ρ")
def test_numeric_edge_cases(filter_data_component):
# Queries containing only numeric characters
codeflash_output = filter_data_component._is_safe_jq_query("1234567890")
def test_special_cases_with_operators(filter_data_component):
# Queries with only operators or a mix of operators and valid characters
codeflash_output = filter_data_component._is_safe_jq_query("+")
codeflash_output = filter_data_component._is_safe_jq_query("a+b-c*d/e")
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.