sqlx
sqlx copied to clipboard
[query macros] faulty null inference involving left joins in Postgres
https://discordapp.com/channels/665528275556106240/694697474689859614/717103397848875049
CREATE TABLE IF NOT EXISTS colors (
color_uuid UUID NOT NULL DEFAULT uuid_generate_v4(),
color TEXT
);
insert into colors (color_uuid, color) values ('5ab49fea-1b9e-4ae1-ad7d-9fd9e0f48852','Red');
insert into colors (color_uuid, color) values ('b7bf2e01-6bdb-4620-b437-a5784a70380a','White');
insert into colors (color_uuid, color) values ('f07f0b82-7008-4d25-993b-3e2ba80d1131','Black');
CREATE TABLE IF NOT EXISTS objects (
object_uuid UUID NOT NULL DEFAULT uuid_generate_v4(),
colors UUID[]
);
insert into objects (colors) values ('{"5ab49fea-1b9e-4ae1-ad7d-9fd9e0f48852","b7bf2e01-6bdb-4620-b437-a5784a70380a"}');
insert into objects (colors) values ('{"5ab49fea-1b9e-4ae1-ad7d-9fd9e0f48852"}');
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let pool = PgPool::new(&env::var("DATABASE_URL")?).await?;
let recs = sqlx::query!(
r#"SELECT o.object_uuid, o.colors,
c1.color_uuid as color1_uuid, c1.color as color1_color,
c2.color_uuid as color2_uuid, c2.color as color2_color
FROM objects o
LEFT JOIN colors c1 ON c1.color_uuid = o.colors[1]
LEFT JOIN colors c2 ON c2.color_uuid = o.colors[2]
"#)
.fetch_all(&pool)
.await?;
for rec in recs {
println!("{}", rec.object_uuid);
}
Ok(())
}
Causes a Decode(UnexpectedNullError)
because the second column has a null even though it was inferred to be not-null.
This probably affects any query with a left join where the columns selected from the join have NOT NULL
constraints.
I don't think we can fix this without analyzing the SQL query ourselves, Postgres just doesn't tell us enough here.
All it tells us is that c2
comes from the colors
table and that it's the second column, and when we pull the schema of colors
we can see the second column has a not null
constraint.
We could try issuing a warning if the query contains a LEFT JOIN
but there's no stable interface for that yet.
@mehcode suggests using explain verbose
, which using JSON format gives us output like this:
postgres=# explain (verbose true, format json) select * from null_join_test left join null_join_test njt on false;
QUERY PLAN
----------------------------------------------------
[ +
{ +
"Plan": { +
"Node Type": "Nested Loop", +
"Parallel Aware": false, +
"Join Type": "Left", +
"Startup Cost": 0.00, +
"Total Cost": 61.00, +
"Plan Rows": 2550, +
"Plan Width": 8, +
"Output": ["null_join_test.value", "value"],+
"Inner Unique": false, +
"Join Filter": "false", +
"Plans": [ +
{ +
"Node Type": "Seq Scan", +
"Parent Relationship": "Outer", +
"Parallel Aware": false, +
"Relation Name": "null_join_test", +
"Schema": "public", +
"Alias": "null_join_test", +
"Startup Cost": 0.00, +
"Total Cost": 35.50, +
"Plan Rows": 2550, +
"Plan Width": 4, +
"Output": ["null_join_test.value"] +
}, +
{ +
"Node Type": "Result", +
"Parent Relationship": "Inner", +
"Parallel Aware": false, +
"Startup Cost": 0.00, +
"Total Cost": 0.00, +
"Plan Rows": 0, +
"Plan Width": 4, +
"Output": ["value"], +
"One-Time Filter": "false" +
} +
] +
} +
} +
]
(1 row)
This may be enough for simple cases but I'll need to evaluate this more.
For the above query:
[
{
"Plan": {
"Node Type": "Merge Join",
"Parallel Aware": false,
"Join Type": "Right",
"Startup Cost": 672.10,
"Total Cost": 1136.80,
"Plan Rows": 30623,
"Plan Width": 144,
"Output": ["o.object_uuid", "o.colors", "c1.color_uuid", "c1.color", "c2.color_uuid", "c2.color"],
"Inner Unique": false,
"Merge Cond": "(c2.color_uuid = (o.colors[2]))",
"Plans": [
{
"Node Type": "Sort",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Startup Cost": 74.54,
"Total Cost": 77.21,
"Plan Rows": 1070,
"Plan Width": 48,
"Output": ["c2.color_uuid", "c2.color"],
"Sort Key": ["c2.color_uuid"],
"Plans": [
{
"Node Type": "Seq Scan",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Relation Name": "colors",
"Schema": "public",
"Alias": "c2",
"Startup Cost": 0.00,
"Total Cost": 20.70,
"Plan Rows": 1070,
"Plan Width": 48,
"Output": ["c2.color_uuid", "c2.color"]
}
]
},
{
"Node Type": "Sort",
"Parent Relationship": "Inner",
"Parallel Aware": false,
"Startup Cost": 597.56,
"Total Cost": 611.87,
"Plan Rows": 5724,
"Plan Width": 96,
"Output": ["o.object_uuid", "o.colors", "c1.color_uuid", "c1.color", "(o.colors[2])"],
"Sort Key": ["(o.colors[2])"],
"Plans": [
{
"Node Type": "Merge Join",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Join Type": "Left",
"Startup Cost": 149.08,
"Total Cost": 240.30,
"Plan Rows": 5724,
"Plan Width": 96,
"Output": ["o.object_uuid", "o.colors", "c1.color_uuid", "c1.color", "o.colors[2]"],
"Inner Unique": false,
"Merge Cond": "((o.colors[1]) = c1.color_uuid)",
"Plans": [
{
"Node Type": "Sort",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Startup Cost": 74.54,
"Total Cost": 77.21,
"Plan Rows": 1070,
"Plan Width": 48,
"Output": ["o.object_uuid", "o.colors", "(o.colors[1])"],
"Sort Key": ["(o.colors[1])"],
"Plans": [
{
"Node Type": "Seq Scan",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Relation Name": "objects",
"Schema": "public",
"Alias": "o",
"Startup Cost": 0.00,
"Total Cost": 20.70,
"Plan Rows": 1070,
"Plan Width": 48,
"Output": ["o.object_uuid", "o.colors", "o.colors[1]"]
}
]
},
{
"Node Type": "Sort",
"Parent Relationship": "Inner",
"Parallel Aware": false,
"Startup Cost": 74.54,
"Total Cost": 77.21,
"Plan Rows": 1070,
"Plan Width": 48,
"Output": ["c1.color_uuid", "c1.color"],
"Sort Key": ["c1.color_uuid"],
"Plans": [
{
"Node Type": "Seq Scan",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Relation Name": "colors",
"Schema": "public",
"Alias": "c1",
"Startup Cost": 0.00,
"Total Cost": 20.70,
"Plan Rows": 1070,
"Plan Width": 48,
"Output": ["c1.color_uuid", "c1.color"]
}
]
}
]
}
]
}
]
}
}
]
Potential strategy:
- Given the outermost plan, use the
Outputs
key to map (potentially aliased) columns to their original names - Perform a depth-first traversal of the plan tree, adding all columns produced by a left or right join to a
HashSet
- Mark columns in the query as nullable if they appear in this set
Would it make sense if we were able to use a struct like:
users_details
would be None
if the LEFT JOIN
didn't return anything.
struct UsersDetails {
field1: String,
field2: Option<String>
}
struct Users {
user_id: i32,
users_details: Option<UsersDetails>
}
CREATE TABLE users (
user_id integer NOT NULL
);
CREATE TABLE users_details (
user_id integer NOT NULL,
field1 text NOT NULL
field2 text
);
SELECT
users.user_id,
users_details.field1,
users_details.field2
FROM users
LEFT JOIN users_details USING (user_id)
Am I correct in thinking this should be solved in 0.5? I am using 0.5.1 and still encountering this, should I raise a bug?
Am I correct in thinking this should be solved in 0.5? I am using 0.5.1 and still encountering this, should I raise a bug?
@cameron-martin Isn't a bug you have to implement what @abonander say, for example in your query you should add an alias ("Mark columns in the query as nullable if they appear in this set"):
async fn main() -> Result<(), Box<dyn std::error::Error>> { let pool = PgPool::new(&env::var("DATABASE_URL")?).await?; let recs = sqlx::query!( r#"SELECT o.object_uuid, o.colors, c1.color_uuid as "color_1?" c2.color_uuid as "color2?" FROM objects o LEFT JOIN colors c1 ON c1.color_uuid = o.colors[1] LEFT JOIN colors c2 ON c2.color_uuid = o.colors[2] "#) .fetch_all(&pool) .await?;
for rec in recs {
println!("{}", rec.object_uuid);
}
Ok(())
}
If you use as "color_1?" inference that is null, If you use as "color_1!" inference that is not null,