duckdb_engine icon indicating copy to clipboard operation
duckdb_engine copied to clipboard

Supported join types

Open zbs opened this issue 4 months ago • 1 comments

Does duckdb_engine support joins like semi, anti, left join and other duckdb joins listed here? What is the syntax for using them?

zbs avatar Aug 11 '25 04:08 zbs

FYI I worked around this by overriding the duckdb dialect to handle custom joins. Example usage is

fooTable.join(barTable, onclause=expr, full={'strictness': 'semi'}) to do a semi join.

    class CustomDuckDBCompiler(PGCompiler):
        def visit_join(self, join, asfrom=False, from_linter=None, **kwargs):
            # Check for custom join strictness
            full = getattr(join, 'full', None)
            if full and isinstance(full, dict):
                join_type = full.get('type', '').upper()
                strictness = full.get('strictness', '').upper()

                valid_types = {'INNER', 'LEFT', 'RIGHT', 'FULL', 'CROSS', ''}
                valid_strictness = {'OUTER', 'SEMI', 'ANTI', 'ANY', 'ASOF', ''}

                if strictness not in valid_strictness:
                    raise ValueError(f"Invalid join strictness: {strictness}")

                if strictness == "ANY":
                    # 'ANY' is equivalent to 'OUTER' in DuckDB
                    warnings.warn("Join strictness 'ANY' is 'OUTER' in DuckDB: using 'OUTER' instead.")
                    strictness = 'OUTER'

                if join_type == "INNER":
                    # 'ANY' is equivalent to 'OUTER' in DuckDB
                    warnings.warn(
                        "Join strictness 'INNER' is achieved by the absence of OUTER: e.g. 'LEFT JOIN', 'JOIN', etc."
                    )
                    join_type = ''

                non_directional_strictness = {
                    "SEMI",
                    "ANTI",
                    "ASOF",
                }

                if join_type in {"INNER", "FULL", "CROSS"}:
                    assert strictness == "", "Join type INNER, FULL, CROSS cannot be used with strictness"

                if strictness in non_directional_strictness:
                    if join_type != '':
                        warnings.warn(
                            f"Join type {join_type} is not applicable for strictness {strictness}: "
                            + "order of tables in JOIN used to determine direction."
                        )
                        join_type = ''

                if join_type not in valid_types:
                    raise ValueError(f"Invalid join type: {join_type}")

                join_str = ""
                if join_type:
                    join_str += f"{join_type} "
                if strictness:
                    join_str += f"{strictness} "
                join_str += "JOIN"

                if from_linter:
                    from_linter.edges.update(itertools.product(join.left._from_objects, join.right._from_objects))


                return (f"{self.process(join.left, asfrom=True, from_linter=from_linter, **kwargs)} " +
                        f"{join_str} {self.process(join.right, asfrom=True, from_linter=from_linter, **kwargs)} " +
                        f"ON {self.process(join.onclause, asfrom=asfrom, from_linter=from_linter, **kwargs)}")

            # Fallback to default
            return super().visit_join(join, asfrom=asfrom, from_linter=from_linter, **kwargs)

    class CustomDuckDBDialect(module.Dialect):
        statement_compiler = CustomDuckDBCompiler

    sys.modules[__name__].CustomDuckDBDialect = CustomDuckDBDialect
    registry.register("duckdb.custom", __name__, "CustomDuckDBDialect")

zbs avatar Aug 12 '25 17:08 zbs