sqlx
sqlx copied to clipboard
query_as!/query! inference returns nullable entries on join when a bound parameter is absent
I've been debugging some joins recently and discovered that query_as! has different behavior when there are parameters bound or not.
This example case is with PostgreSQL and I've modified to use query! just so I could see what records were returned. Both macros seem to have the same erroneous behavior.
let records = sqlx::query!(r#"SELECT schemas.*, shares.name as share_name FROM schemas, shares WHERE share_id = shares.id AND shares.name = $1"#, "rtyler")
.fetch_all(db)
.await;
println!("RECORDS: {:?}", records);
let records = sqlx::query!(r#"SELECT schemas.*, shares.name as share_name FROM schemas, shares WHERE share_id = shares.id AND shares.name = 'rtyler'"#)
//let records = sqlx::query!(r#"SELECT schemas.*, shares.name as share_name FROM schemas, shares WHERE share_id = shares.id"#)
.fetch_all(db)
.await;
println!("RECORDS: {:?}", records);
Outputs the following:
INFO sqlx::query > SELECT schemas.*, shares.name as …; rows: 1, elapsed: 3.396ms
SELECT
schemas.*,
shares.name as share_name
FROM
schemas,
shares
WHERE
share_id = shares.id
AND shares.name = $1
RECORDS: Ok([Record { id: 970be392-5de7-479b-a6a0-b027368bcdf8, name: "samples", share_id: fcb12100-2590-496d-9578-d86e2d3ca831, created_at: 2021-05-30T21:52:44.279493Z, share_name: "rtyler" }])
INFO sqlx::query > /* SQLx ping */; rows: 0, elapsed: 309.032µs
INFO sqlx::query > /* SQLx ping */; rows: 0, elapsed: 419.070µs
INFO sqlx::query > SELECT schemas.*, shares.name as …; rows: 1, elapsed: 2.479ms
SELECT
schemas.*,
shares.name as share_name
FROM
schemas,
shares
WHERE
share_id = shares.id
AND shares.name = 'rtyler'
RECORDS: Ok([Record { id: Some(970be392-5de7-479b-a6a0-b027368bcdf8), name: Some("samples"), share_id: Some(fcb12100-2590-496d-9578-d86e2d3ca831), created_at: Some(2021-05-30T21:52:44.279493Z), share_name: Some("rtyler") }])
Something about binding a parameter seems to correct the behavior :confused:
This has been reported many times already. Postgres doesn't return any nullability information and SQLx'es additional analysis seems to be a bit buggy. See #367, #696, ~~#1126~~ for other issues about this (that haven't been closed yet).
@jplatte I've subscribed to those other tickets, shall I close this?
As-is I don't think this issue adds anything new, but it seems like your circumstances are a bit different so if you can provide some more information on your case (https://github.com/launchbadge/sqlx/issues/1126#issuecomment-845553872) that might help debug this issue.
Please include the output of
EXPLAIN (VERBOSE, FORMAT JSON) <query>(or that of a similar query plus relevant schema that reproduces the issue) as that is what is used for nullability detection in Postgres.
riverbank=# EXPLAIN SELECT schemas.*, shares.name as share_name FROM schemas, shares WHERE share_id = shares.id AND shares.name = 'rtyler';
QUERY PLAN
--------------------------------------------------------------------
Hash Join (cost=22.19..42.42 rows=4 width=104)
Hash Cond: (schemas.share_id = shares.id)
-> Seq Scan on schemas (cost=0.00..18.10 rows=810 width=72)
-> Hash (cost=22.12..22.12 rows=5 width=48)
-> Seq Scan on shares (cost=0.00..22.12 rows=5 width=48)
Filter: (name = 'rtyler'::text)
(6 rows)
CREATE TABLE public.schemas (
id uuid DEFAULT public.gen_random_uuid() NOT NULL,
name text NOT NULL,
share_id uuid NOT NULL,
created_at timestamp with time zone DEFAULT now() NOT NULL
);
CREATE TABLE public.shares (
id uuid DEFAULT public.gen_random_uuid() NOT NULL,
name text NOT NULL,
created_at timestamp with time zone DEFAULT now() NOT NULL
);
Specifically we need the output of EXPLAIN (VERBOSE, FORMAT JSON) <query>, not just EXPLAIN <query>.
Oops, I misunderstood, here ya goes
riverbank=# EXPLAIN (VERBOSE, FORMAT JSON) SELECT schemas.*, shares.name as share_name FROM schemas, shares WHERE share_id = shares.id AND shares.name = 'rtyler';
QUERY PLAN
----------------------------------------------------------------------------------------------------------
[ +
{ +
"Plan": { +
"Node Type": "Hash Join", +
"Parallel Aware": false, +
"Join Type": "Inner", +
"Startup Cost": 22.19, +
"Total Cost": 42.42, +
"Plan Rows": 4, +
"Plan Width": 104, +
"Output": ["schemas.id", "schemas.name", "schemas.share_id", "schemas.created_at", "shares.name"],+
"Inner Unique": true, +
"Hash Cond": "(schemas.share_id = shares.id)", +
"Plans": [ +
{ +
"Node Type": "Seq Scan", +
"Parent Relationship": "Outer", +
"Parallel Aware": false, +
"Relation Name": "schemas", +
"Schema": "public", +
"Alias": "schemas", +
"Startup Cost": 0.00, +
"Total Cost": 18.10, +
"Plan Rows": 810, +
"Plan Width": 72, +
"Output": ["schemas.id", "schemas.name", "schemas.share_id", "schemas.created_at"] +
}, +
{ +
"Node Type": "Hash", +
"Parent Relationship": "Inner", +
"Parallel Aware": false, +
"Startup Cost": 22.12, +
"Total Cost": 22.12, +
"Plan Rows": 5, +
"Plan Width": 48, +
"Output": ["shares.name", "shares.id"], +
"Plans": [ +
{ +
"Node Type": "Seq Scan", +
"Parent Relationship": "Outer", +
"Parallel Aware": false, +
"Relation Name": "shares", +
"Schema": "public", +
"Alias": "shares", +
"Startup Cost": 0.00, +
"Total Cost": 22.12, +
"Plan Rows": 5, +
"Plan Width": 48, +
"Output": ["shares.name", "shares.id"], +
"Filter": "(shares.name = 'rtyler'::text)" +
} +
] +
} +
] +
} +
} +
]
@rtyler yeah our heuristics haven't really been tested on cross joins. However, it looks like you probably want an inner join with shares being the primary table:
SELECT shares.name as share_name, schemas.* FROM shares INNER JOIN schemas on shares.id = share_id WHERE shares.name = 'jtyler'
You usually want to select the most specific data first (in this case shares as you're matching on shares.name), and then join related tables. If you have an index on shares.name and schemas.share_id it'll avoid those costly sequential scans.
@abonander in my experience, the type of join doesn't matter. I'm getting the same issue with inner joins as well.
(If I remove the u.id IS NULL part, it'll infer nullability correctly.)
struct Test {
id: i64,
}
sqlx::query_as!(
Test,
"
SELECT l.id
FROM levels l
INNER JOIN users AS u ON u.id = l.user_id
WHERE u.id IS NULL OR u.id = $1
",
author_id,
)
.fetch_all(connection)
.await;
# PREPARE test (int) AS
# SELECT l.id
# FROM levels l
# INNER JOIN users AS u ON u.id = l.user_id
# WHERE u.id IS NULL OR u.id = $1;
PREPARE
# EXPLAIN (VERBOSE, FORMAT JSON) EXECUTE test(1);
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------
[ +
{ +
"Plan": { +
"Node Type": "Hash Join", +
"Parallel Aware": false, +
"Join Type": "Inner", +
"Startup Cost": 10.89, +
"Total Cost": 22.54, +
"Plan Rows": 2, +
"Plan Width": 8, +
"Output": ["l.id"], +
"Inner Unique": true, +
"Hash Cond": "(l.user_id = u.id)", +
"Plans": [ +
{ +
"Node Type": "Seq Scan", +
"Parent Relationship": "Outer", +
"Parallel Aware": false, +
"Relation Name": "levels", +
"Schema": "public", +
"Alias": "l", +
"Startup Cost": 0.00, +
"Total Cost": 11.30, +
"Plan Rows": 130, +
"Plan Width": 16, +
"Output": ["l.id", "l.title", "l.user_id", "l.parent_id", "l.data", "l.is_autosaved", "l.created_at", "l.updated_at"]+
}, +
{ +
"Node Type": "Hash", +
"Parent Relationship": "Inner", +
"Parallel Aware": false, +
"Startup Cost": 10.88, +
"Total Cost": 10.88, +
"Plan Rows": 1, +
"Plan Width": 8, +
"Output": ["u.id"], +
"Plans": [ +
{ +
"Node Type": "Seq Scan", +
"Parent Relationship": "Outer", +
"Parallel Aware": false, +
"Relation Name": "users", +
"Schema": "public", +
"Alias": "u", +
"Startup Cost": 0.00, +
"Total Cost": 10.88, +
"Plan Rows": 1, +
"Plan Width": 8, +
"Output": ["u.id"], +
"Filter": "((u.id IS NULL) OR (u.id = 1))" +
} +
] +
} +
] +
} +
} +
]
(1 row)