sqlx
sqlx copied to clipboard
Double left join -> wrong NULL inference -> runtime exception.
Bug Description
Double left join leads to faulty Option / nullability inference. I have provided a minimal repro. There are two nullability inference issues:
- LEFT JOIN-ed table columns are inferred as not Optional
- The FROM table columns are inferred as Optional
Minimal Reproduction
https://github.com/Palmik/sqlx-issue-repro
The schema:
CREATE TABLE foo (
id BIGINT PRIMARY KEY,
name TEXT NOT NULL
);
CREATE TABLE baz (
id BIGINT PRIMARY KEY,
name TEXT NOT NULL
);
CREATE TABLE bar (
id BIGINT PRIMARY KEY,
foo_id BIGINT NOT NULL REFERENCES foo(id),
baz_id BIGINT REFERENCES baz(id),
name TEXT NOT NULL
);
The sql query:
SELECT
foo.id,
foo.name,
bar.id AS "bar_id",
bar.name AS "bar_name",
baz.id AS "baz_id",
baz.name AS "baz_name"
FROM foo
LEFT JOIN bar ON bar.foo_id = foo.id
LEFT JOIN baz ON baz.id = bar.baz_id
You would expect foo.id and foo.name to not be optional, and the other columns to be optional. SQLx infers foo.* and baz.* as optional, and bar.* as not optional. One can circumvent the issue by using the AS "bar_id?" syntax to mark the columns as optional manually.
Running the code produces:
thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: ColumnDecode { index: "2", source: UnexpectedNullError }', src/main.rs:77:6
If you make sure that all rows exist, e.g. commenting out line 27, you get:
Record { id: Some(1), name: Some("foo1"), bar_id: 1, bar_name: "bar1", baz_id: Some(1), baz_name: Some("baz1") }
Record { id: Some(2), name: Some("foo2"), bar_id: 2, bar_name: "bar2", baz_id: Some(2), baz_name: Some("baz2") }
Record { id: Some(1), name: Some("foo1"), bar_id: 3, bar_name: "bar3", baz_id: None, baz_name: None }
Record { id: Some(2), name: Some("foo2"), bar_id: 4, bar_name: "bar4", baz_id: None, baz_name: None }
Info
- SQLx version:
0.7.2 - SQLx features enabled:
sqlx = { version = "0.7.2", features = ["postgres", "uuid", "runtime-tokio-native-tls"] } - Database server and version: PostgreSQL 16, but also reproducible on 15.2
- Operating system: linux
rustc --version:rustc 1.71.1 (eb26296b5 2023-08-03)
@Palmik can you please post the output of EXPLAIN (VERBOSE, FORMAT JSON) <query>
QUERY PLAN
-------------------------------------------------------------------------------------
[ +
{ +
"Plan": { +
"Node Type": "Hash Join", +
"Parallel Aware": false, +
"Async Capable": false, +
"Join Type": "Left", +
"Startup Cost": 16.61, +
"Total Cost": 44.65, +
"Plan Rows": 1200, +
"Plan Width": 93, +
"Output": ["foo.id", "foo.name", "bar.id", "bar.name", "baz.id", "baz.name"],+
"Inner Unique": false, +
"Hash Cond": "(foo.id = bar.foo_id)", +
"Plans": [ +
{ +
"Node Type": "Seq Scan", +
"Parent Relationship": "Outer", +
"Parallel Aware": false, +
"Async Capable": false, +
"Relation Name": "foo", +
"Schema": "public", +
"Alias": "foo", +
"Startup Cost": 0.00, +
"Total Cost": 22.00, +
"Plan Rows": 1200, +
"Plan Width": 40, +
"Output": ["foo.id", "foo.name"] +
}, +
{ +
"Node Type": "Hash", +
"Parent Relationship": "Inner", +
"Parallel Aware": false, +
"Async Capable": false, +
"Startup Cost": 16.56, +
"Total Cost": 16.56, +
"Plan Rows": 4, +
"Plan Width": 61, +
"Output": ["bar.id", "bar.name", "bar.foo_id", "baz.id", "baz.name"], +
"Plans": [ +
{ +
"Node Type": "Nested Loop", +
"Parent Relationship": "Outer", +
"Parallel Aware": false, +
"Async Capable": false, +
"Join Type": "Left", +
"Startup Cost": 0.16, +
"Total Cost": 16.56, +
"Plan Rows": 4, +
"Plan Width": 61, +
"Output": ["bar.id", "bar.name", "bar.foo_id", "baz.id", "baz.name"],+
"Inner Unique": true, +
"Plans": [ +
{ +
"Node Type": "Seq Scan", +
"Parent Relationship": "Outer", +
"Parallel Aware": false, +
"Async Capable": false, +
"Relation Name": "bar", +
"Schema": "public", +
"Alias": "bar", +
"Startup Cost": 0.00, +
"Total Cost": 1.04, +
"Plan Rows": 4, +
"Plan Width": 29, +
"Output": ["bar.id", "bar.foo_id", "bar.baz_id", "bar.name"] +
}, +
{ +
"Node Type": "Memoize", +
"Parent Relationship": "Inner", +
"Parallel Aware": false, +
"Async Capable": false, +
"Startup Cost": 0.16, +
"Total Cost": 6.18, +
"Plan Rows": 1, +
"Plan Width": 40, +
"Output": ["baz.id", "baz.name"], +
"Cache Key": "bar.baz_id", +
"Cache Mode": "logical", +
"Plans": [ +
{ +
"Node Type": "Index Scan", +
"Parent Relationship": "Outer", +
"Parallel Aware": false, +
"Async Capable": false, +
"Scan Direction": "Forward", +
"Index Name": "baz_pkey", +
"Relation Name": "baz", +
"Schema": "public", +
"Alias": "baz", +
"Startup Cost": 0.15, +
"Total Cost": 6.17, +
"Plan Rows": 1, +
"Plan Width": 40, +
"Output": ["baz.id", "baz.name"], +
"Index Cond": "(baz.id = bar.baz_id)" +
} +
] +
} +
] +
} +
] +
} +
] +
} +
} +
]
(1 row)
Hi, are you able to reproduce?
I ran into this as well on my project with similar table structure & joins. What I discovered is using sqlx::query_as_unchecked! seemed to work properly. This was on sqlx v0.7.3. Feels like query_as should work properly though - but I'm new to sqlx so not sure what should really happen here.
Hello!
I ran into this issue with a single left join.
create table a (
id_a text primary key not null
);
create table b (
id_b text primary key not null,
id_a text references a (id_a)
);
insert into b (id_b) values ('1');
use serde::Deserialize;
use sqlx::{postgres::PgPoolOptions, query_as};
#[derive(Debug, Deserialize)]
pub struct Rows {
pub id_a: Option<String>,
}
#[tokio::main]
async fn main() {
let db = PgPoolOptions::new().connect("postgresql://127.0.0.1/labs").await.unwrap();
query_as!(
Rows,
r#"
select a.id_a
from b
left join a on a.id_a = b.id_a
where b.id_b = $1
"#,
"1"
)
.fetch_all(&db)
.await
.unwrap();
}
the query plan:
[
{
"Plan": {
"Node Type": "Nested Loop",
"Parallel Aware": false,
"Async Capable": false,
"Join Type": "Left",
"Startup Cost": 0.3,
"Total Cost": 16.35,
"Plan Rows": 1,
"Plan Width": 32,
"Output": ["a.id_a"],
"Inner Unique": true,
"Plans": [
{
"Node Type": "Index Scan",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Async Capable": false,
"Scan Direction": "Forward",
"Index Name": "b_pkey",
"Relation Name": "b",
"Schema": "public",
"Alias": "b",
"Startup Cost": 0.15,
"Total Cost": 8.17,
"Plan Rows": 1,
"Plan Width": 32,
"Output": ["b.id_b", "b.id_a"],
"Index Cond": "(b.id_b = '1'::text)"
},
{
"Node Type": "Index Only Scan",
"Parent Relationship": "Inner",
"Parallel Aware": false,
"Async Capable": false,
"Scan Direction": "Forward",
"Index Name": "a_pkey",
"Relation Name": "a",
"Schema": "public",
"Alias": "a",
"Startup Cost": 0.15,
"Total Cost": 8.17,
"Plan Rows": 1,
"Plan Width": 32,
"Output": ["a.id_a"],
"Index Cond": "(a.id_a = b.id_a)"
}
]
}
}
]
It works correctly if I force the id_a as optionnal :
query_as!(
Rows,
r#"
select a.id_a as "id_a?"
from b
left join a on a.id_a = b.id_a
where b.id_b = $1
"#,
"1"
)
.fetch_all(&db)
.await
.unwrap()
Strangely, it also works without the parameter binding:
query_as!(
Rows,
r#"
select a.id_a
from b
left join a on a.id_a = b.id_a
where b.id_b = '1'
"#,
)
.fetch_all(&db)
.await
.unwrap();
~Hi there I run into the same issue with a many-to-many left join query. Do you made progress? I get try decoding as an 'Option' when my left join is empty.~
Edit: I got my many-to-many work:
I had to filter the aggregated array to get a null value in the result set. Since I use Option<Vec<T>> it is now None ... I don't know though how I could maybe have it as an empty vector.