duckdb_engine
duckdb_engine copied to clipboard
Supported join types
Does duckdb_engine support joins like semi, anti, left join and other duckdb joins listed here? What is the syntax for using them?
FYI I worked around this by overriding the duckdb dialect to handle custom joins. Example usage is
fooTable.join(barTable, onclause=expr, full={'strictness': 'semi'}) to do a semi join.
class CustomDuckDBCompiler(PGCompiler):
def visit_join(self, join, asfrom=False, from_linter=None, **kwargs):
# Check for custom join strictness
full = getattr(join, 'full', None)
if full and isinstance(full, dict):
join_type = full.get('type', '').upper()
strictness = full.get('strictness', '').upper()
valid_types = {'INNER', 'LEFT', 'RIGHT', 'FULL', 'CROSS', ''}
valid_strictness = {'OUTER', 'SEMI', 'ANTI', 'ANY', 'ASOF', ''}
if strictness not in valid_strictness:
raise ValueError(f"Invalid join strictness: {strictness}")
if strictness == "ANY":
# 'ANY' is equivalent to 'OUTER' in DuckDB
warnings.warn("Join strictness 'ANY' is 'OUTER' in DuckDB: using 'OUTER' instead.")
strictness = 'OUTER'
if join_type == "INNER":
# 'ANY' is equivalent to 'OUTER' in DuckDB
warnings.warn(
"Join strictness 'INNER' is achieved by the absence of OUTER: e.g. 'LEFT JOIN', 'JOIN', etc."
)
join_type = ''
non_directional_strictness = {
"SEMI",
"ANTI",
"ASOF",
}
if join_type in {"INNER", "FULL", "CROSS"}:
assert strictness == "", "Join type INNER, FULL, CROSS cannot be used with strictness"
if strictness in non_directional_strictness:
if join_type != '':
warnings.warn(
f"Join type {join_type} is not applicable for strictness {strictness}: "
+ "order of tables in JOIN used to determine direction."
)
join_type = ''
if join_type not in valid_types:
raise ValueError(f"Invalid join type: {join_type}")
join_str = ""
if join_type:
join_str += f"{join_type} "
if strictness:
join_str += f"{strictness} "
join_str += "JOIN"
if from_linter:
from_linter.edges.update(itertools.product(join.left._from_objects, join.right._from_objects))
return (f"{self.process(join.left, asfrom=True, from_linter=from_linter, **kwargs)} " +
f"{join_str} {self.process(join.right, asfrom=True, from_linter=from_linter, **kwargs)} " +
f"ON {self.process(join.onclause, asfrom=asfrom, from_linter=from_linter, **kwargs)}")
# Fallback to default
return super().visit_join(join, asfrom=asfrom, from_linter=from_linter, **kwargs)
class CustomDuckDBDialect(module.Dialect):
statement_compiler = CustomDuckDBCompiler
sys.modules[__name__].CustomDuckDBDialect = CustomDuckDBDialect
registry.register("duckdb.custom", __name__, "CustomDuckDBDialect")