pypika
pypika copied to clipboard
Recursion Error while building large SQL statement
Hi, I'm trying to build a large SQL statement, but Python keeps shooting out of the play. So, basically, I need to fetch all information for a large number of items at once. This is the query:
items = [...] # list of > 10,000 keys
table = Table("MYTABLE")
query = Query \
.from_(table) \
.select("*") \
.where(
Criterion.any([table.ID == x for x in items])
) \
.where(
(table.DATETIMELOCAL >= f"{start_date:%Y-%m-%d}")
&
(table.DATETIMELOCAL < f"{end_date:%Y-%m-%d}")
) \
.orderby('NAME') \
.orderby('DATETIMEUTC')
This is the issue:
File "/opt/local/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pypika/utils.py", line 50, in _copy
result = func(self_copy, *args, **kwargs)
File "/opt/local/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pypika/queries.py", line 928, in where
if not self._validate_table(criterion):
File "/opt/local/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pypika/queries.py", line 1153, in _validate_table
for field in term.fields_():
File "/opt/local/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pypika/terms.py", line 57, in fields_
return set(self.find_(Field))
File "/opt/local/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pypika/terms.py", line 37, in find_
return [node for node in self.nodes_() if isinstance(node, type)]
File "/opt/local/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pypika/terms.py", line 37, in <listcomp>
return [node for node in self.nodes_() if isinstance(node, type)]
File "/opt/local/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pypika/terms.py", line 743, in nodes_
yield from self.left.nodes_()
File "/opt/local/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pypika/terms.py", line 743, in nodes_
yield from self.left.nodes_()
File "/opt/local/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pypika/terms.py", line 743, in nodes_
yield from self.left.nodes_()
[Previous line repeated 986 more times]
File "/opt/local/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pypika/terms.py", line 742, in nodes_
yield from self.right.nodes_()
File "/opt/local/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pypika/terms.py", line 743, in nodes_
yield from self.left.nodes_()
File "/opt/local/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pypika/terms.py", line 547, in nodes_
yield from self.table.nodes_()
RecursionError: maximum recursion depth exceeded
It looks like we have an issue in using Criterion.any([table.ID == x for x in items])
when items
is too large.
Please, let me know where I can help.
Thanks!
It's probably too late for @ceandrade, but here's a workaround for Criterion.any()
and Criterion.all()
that doesn't use recursion:
class ChainedComparatorCriterion(Criterion):
def __init__(
self, comparator: Comparator, terms: Iterable[Term], alias: Optional[str] = None
) -> None:
super().__init__(alias)
if not terms:
raise ValueError('You must specify at least one term.')
self.comparator = comparator
self.terms = terms
def nodes_(self) -> Iterator[NodeT]:
yield self
for term in self.terms:
yield from term.nodes_()
@property
def is_aggregate(self) -> Optional[bool]:
return resolve_is_aggregate([term.is_aggregate for term in [self.left, self.right]])
@builder
def replace_table(
self, current_table: Optional[Table], new_table: Optional[Table]
) -> 'ChainedComparatorCriterion':
self.terms = [term.replace_table(current_table, new_table) for term in self.terms]
def get_sql(self, quote_char: str = '"', with_alias: bool = False, **kwargs: Any) -> str:
sql = f' {self.comparator.value} '.join(
term.get_sql(quote_char=quote_char, **{**kwargs, 'subcriterion': True})
for term in self.terms
)
# `kwargs['subcriterion']` seems to be `False` when it shouldn't be, so always wrap it.
sql = f'({sql})'
if with_alias:
return format_alias_sql(sql, self.alias, **kwargs)
return sql
class AllCriterion(ChainedComparatorCriterion):
def __init__(self, terms: Iterable[Term], alias: Optional[str] = None) -> None:
super().__init__(Boolean.and_, terms, alias)
class AnyCriterion(ChainedComparatorCriterion):
def __init__(self, terms: Iterable[Term], alias: Optional[str] = None) -> None:
super().__init__(Boolean.or_, terms, alias)
Thanks @sangaline! This looks a very nice solution indeed. In my case, I ended up breaking the query into smaller subqueries and concatenated the results. It adds some bookkeeping and some load to the DB, though.
I will keep your solution for future use!
I will leave this thread open because the core issue has not been solved yet, I guess.