ibis
ibis copied to clipboard
bug: validation error on geospatial subquery
xref: https://ibis-project.zulipchat.com/#narrow/stream/405330-duckdb/topic/spatial.20filtering.20joins/near/472566239
cc: @cboettig
(data from "https://data.source.coop/cboettig/ca30x30/ca_areas.parquet")
In [1]: import ibis
In [2]: import ibis
...: from ibis import _
...: con = ibis.duckdb.connect()
In [3]: t = (con
...: .read_parquet("ca_areas.parquet")
...: .cast({"SHAPE": "geometry"})
...: .mutate(area = _.SHAPE.area())
...: )
In [4]: geo23 = t.filter(_.Release_Year==2023)
In [5]: geo24 = t.filter(_.Release_Year==2024)
In [6]: geo24.filter(geo24.SHAPE.intersects(geo23.SHAPE))
File ~/mambaforge/envs/ibis-geo/lib/python3.11/site-packages/ibis/backends/duckdb/__init__.py:1377, in Backend._to_duckdb_relation(self, expr, params, limit)
1375 if table_expr.schema().geospatial:
1376 self._load_extensions(["spatial"])
-> 1377 return self.con.sql(sql)
BinderException: Binder Error: Referenced table "t1" not found!
Candidate tables: "t0"
This is the SQL
SELECT
*
REPLACE (ST_ASWKB("SHAPE") AS "SHAPE")
FROM (
SELECT
"t0"."OBJECTID",
"t0"."cpad_ACCESS_TYP",
"t0"."cpad_PARK_NAME",
"t0"."cpad_MNG_AGENCY",
"t0"."cpad_MNG_AG_LEV",
"t0"."reGAP",
"t0"."Easement",
"t0"."TYPE",
"t0"."CA_County_Name",
"t0"."CA_Region_Name",
"t0"."TerrMar",
"t0"."CA_Ecoregion_Name",
"t0"."ACCESS_TYP",
"t0"."MNG_AGNCY",
"t0"."MNG_AG_LEV",
"t0"."UNIT_NAME",
"t0"."DefaultSelection",
"t0"."CA_Ecoregion_Acres",
"t0"."CA_Region_Acres",
"t0"."CA_County_Acres",
"t0"."Acres",
"t0"."CA_Marine_Acres",
"t0"."Release_Year",
"t0"."mgmt_stack",
ST_GEOMFROMWKB("t0"."SHAPE") AS "SHAPE",
"t0"."SHAPE_bbox",
ST_AREA(ST_GEOMFROMWKB("t0"."SHAPE")) AS "area"
FROM "ibis_read_parquet_5hm2ugs3hncnzec3syb3gzdo5e" AS "t0"
WHERE
"t0"."Release_Year" = 2024
AND ST_INTERSECTS(ST_GEOMFROMWKB("t0"."SHAPE"), "t1"."SHAPE")
)
From @cpcloud:
Yeah, I think it's an issue with validation. We're letting this through, and we probably shouldn't.
Here's query that doesn't error (very slow to run)
from ibis.interactive import *
con = ibis.duckdb.connect()
ca30 = (
con.read_parquet("https://data.source.coop/cboettig/ca30x30/ca_areas.parquet")
.cast({"SHAPE": "geometry"})
.mutate(area=_.SHAPE.area())
)
t2 = ca30.filter(_.Release_Year == 2023)
expr = ca30.filter(
_.Release_Year == 2024,
lambda ca30: ~ca30.SHAPE.intersects(t2.SHAPE).any(),
)