sqlx query_as!/query! inference returns nullable entries on join when a bound parameter is absent

I've been debugging some joins recently and discovered that query_as! has different behavior when there are parameters bound or not.

This example case is with PostgreSQL and I've modified to use query! just so I could see what records were returned. Both macros seem to have the same erroneous behavior.

        let records = sqlx::query!(r#"SELECT schemas.*, shares.name as share_name FROM schemas, shares WHERE share_id = shares.id AND shares.name = $1"#, "rtyler")
            .fetch_all(db)
            .await;

        println!("RECORDS: {:?}", records);
        let records = sqlx::query!(r#"SELECT schemas.*, shares.name as share_name FROM schemas, shares WHERE share_id = shares.id AND shares.name = 'rtyler'"#)
        //let records = sqlx::query!(r#"SELECT schemas.*, shares.name as share_name FROM schemas, shares WHERE share_id = shares.id"#)
            .fetch_all(db)
            .await;

        println!("RECORDS: {:?}", records);

Outputs the following:

 INFO  sqlx::query           > SELECT schemas.*, shares.name as …; rows: 1, elapsed: 3.396ms

SELECT
  schemas.*,
  shares.name as share_name
FROM
  schemas,
  shares
WHERE
  share_id = shares.id
  AND shares.name = $1

RECORDS: Ok([Record { id: 970be392-5de7-479b-a6a0-b027368bcdf8, name: "samples", share_id: fcb12100-2590-496d-9578-d86e2d3ca831, created_at: 2021-05-30T21:52:44.279493Z, share_name: "rtyler" }])
 INFO  sqlx::query           > /* SQLx ping */; rows: 0, elapsed: 309.032µs
 INFO  sqlx::query           > /* SQLx ping */; rows: 0, elapsed: 419.070µs
 INFO  sqlx::query           > SELECT schemas.*, shares.name as …; rows: 1, elapsed: 2.479ms

SELECT
  schemas.*,
  shares.name as share_name
FROM
  schemas,
  shares
WHERE
  share_id = shares.id
  AND shares.name = 'rtyler'

RECORDS: Ok([Record { id: Some(970be392-5de7-479b-a6a0-b027368bcdf8), name: Some("samples"), share_id: Some(fcb12100-2590-496d-9578-d86e2d3ca831), created_at: Some(2021-05-30T21:52:44.279493Z), share_name: Some("rtyler") }])

Something about binding a parameter seems to correct the behavior :confused:

May 31 '21 17:05 rtyler

This has been reported many times already. Postgres doesn't return any nullability information and SQLx'es additional analysis seems to be a bit buggy. See #367, #696, ~~#1126~~ for other issues about this (that haven't been closed yet).

May 31 '21 17:05 jplatte

@jplatte I've subscribed to those other tickets, shall I close this?

May 31 '21 17:05 rtyler

As-is I don't think this issue adds anything new, but it seems like your circumstances are a bit different so if you can provide some more information on your case (https://github.com/launchbadge/sqlx/issues/1126#issuecomment-845553872) that might help debug this issue.

Please include the output of EXPLAIN (VERBOSE, FORMAT JSON) <query> (or that of a similar query plus relevant schema that reproduces the issue) as that is what is used for nullability detection in Postgres.

May 31 '21 17:05 jplatte

riverbank=# EXPLAIN SELECT schemas.*, shares.name as share_name FROM schemas, shares WHERE share_id = shares.id AND shares.name = 'rtyler';
                             QUERY PLAN
--------------------------------------------------------------------
 Hash Join  (cost=22.19..42.42 rows=4 width=104)
   Hash Cond: (schemas.share_id = shares.id)
   ->  Seq Scan on schemas  (cost=0.00..18.10 rows=810 width=72)
   ->  Hash  (cost=22.12..22.12 rows=5 width=48)
         ->  Seq Scan on shares  (cost=0.00..22.12 rows=5 width=48)
               Filter: (name = 'rtyler'::text)
(6 rows)

CREATE TABLE public.schemas (
    id uuid DEFAULT public.gen_random_uuid() NOT NULL,
    name text NOT NULL,
    share_id uuid NOT NULL,
    created_at timestamp with time zone DEFAULT now() NOT NULL
);

CREATE TABLE public.shares (
    id uuid DEFAULT public.gen_random_uuid() NOT NULL,
    name text NOT NULL,
    created_at timestamp with time zone DEFAULT now() NOT NULL
);

May 31 '21 21:05 rtyler

Specifically we need the output of EXPLAIN (VERBOSE, FORMAT JSON) <query>, not just EXPLAIN <query>.

Jun 01 '21 05:06 abonander

Oops, I misunderstood, here ya goes

riverbank=#  EXPLAIN (VERBOSE, FORMAT JSON) SELECT schemas.*, shares.name as share_name FROM schemas, shares WHERE share_id = shares.id AND shares.name = 'rtyler';
                                                QUERY PLAN
----------------------------------------------------------------------------------------------------------
 [                                                                                                       +
   {                                                                                                     +
     "Plan": {                                                                                           +
       "Node Type": "Hash Join",                                                                         +
       "Parallel Aware": false,                                                                          +
       "Join Type": "Inner",                                                                             +
       "Startup Cost": 22.19,                                                                            +
       "Total Cost": 42.42,                                                                              +
       "Plan Rows": 4,                                                                                   +
       "Plan Width": 104,                                                                                +
       "Output": ["schemas.id", "schemas.name", "schemas.share_id", "schemas.created_at", "shares.name"],+
       "Inner Unique": true,                                                                             +
       "Hash Cond": "(schemas.share_id = shares.id)",                                                    +
       "Plans": [                                                                                        +
         {                                                                                               +
           "Node Type": "Seq Scan",                                                                      +
           "Parent Relationship": "Outer",                                                               +
           "Parallel Aware": false,                                                                      +
           "Relation Name": "schemas",                                                                   +
           "Schema": "public",                                                                           +
           "Alias": "schemas",                                                                           +
           "Startup Cost": 0.00,                                                                         +
           "Total Cost": 18.10,                                                                          +
           "Plan Rows": 810,                                                                             +
           "Plan Width": 72,                                                                             +
           "Output": ["schemas.id", "schemas.name", "schemas.share_id", "schemas.created_at"]            +
         },                                                                                              +
         {                                                                                               +
           "Node Type": "Hash",                                                                          +
           "Parent Relationship": "Inner",                                                               +
           "Parallel Aware": false,                                                                      +
           "Startup Cost": 22.12,                                                                        +
           "Total Cost": 22.12,                                                                          +
           "Plan Rows": 5,                                                                               +
           "Plan Width": 48,                                                                             +
           "Output": ["shares.name", "shares.id"],                                                       +
           "Plans": [                                                                                    +
             {                                                                                           +
               "Node Type": "Seq Scan",                                                                  +
               "Parent Relationship": "Outer",                                                           +
               "Parallel Aware": false,                                                                  +
               "Relation Name": "shares",                                                                +
               "Schema": "public",                                                                       +
               "Alias": "shares",                                                                        +
               "Startup Cost": 0.00,                                                                     +
               "Total Cost": 22.12,                                                                      +
               "Plan Rows": 5,                                                                           +
               "Plan Width": 48,                                                                         +
               "Output": ["shares.name", "shares.id"],                                                   +
               "Filter": "(shares.name = 'rtyler'::text)"                                                +
             }                                                                                           +
           ]                                                                                             +
         }                                                                                               +
       ]                                                                                                 +
     }                                                                                                   +
   }                                                                                                     +
 ]

Jun 01 '21 16:06 rtyler

@rtyler yeah our heuristics haven't really been tested on cross joins. However, it looks like you probably want an inner join with shares being the primary table:

SELECT shares.name as share_name, schemas.* FROM shares INNER JOIN schemas on shares.id = share_id WHERE shares.name = 'jtyler'

You usually want to select the most specific data first (in this case shares as you're matching on shares.name), and then join related tables. If you have an index on shares.name and schemas.share_id it'll avoid those costly sequential scans.

Jun 02 '21 10:06 abonander

@abonander in my experience, the type of join doesn't matter. I'm getting the same issue with inner joins as well. (If I remove the u.id IS NULL part, it'll infer nullability correctly.)

    struct Test {
        id: i64,
    }

    sqlx::query_as!(
        Test,
        "
SELECT l.id
FROM levels l
INNER JOIN users AS u ON u.id = l.user_id
WHERE u.id IS NULL OR u.id = $1
",
        author_id,
    )
        .fetch_all(connection)
        .await;

# PREPARE test (int) AS
# SELECT l.id
# FROM levels l
# INNER JOIN users AS u ON u.id = l.user_id
# WHERE u.id IS NULL OR u.id = $1;
PREPARE

# EXPLAIN (VERBOSE, FORMAT JSON) EXECUTE test(1);
                                                           QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------
 [                                                                                                                              +
   {                                                                                                                            +
     "Plan": {                                                                                                                  +
       "Node Type": "Hash Join",                                                                                                +
       "Parallel Aware": false,                                                                                                 +
       "Join Type": "Inner",                                                                                                    +
       "Startup Cost": 10.89,                                                                                                   +
       "Total Cost": 22.54,                                                                                                     +
       "Plan Rows": 2,                                                                                                          +
       "Plan Width": 8,                                                                                                         +
       "Output": ["l.id"],                                                                                                      +
       "Inner Unique": true,                                                                                                    +
       "Hash Cond": "(l.user_id = u.id)",                                                                                       +
       "Plans": [                                                                                                               +
         {                                                                                                                      +
           "Node Type": "Seq Scan",                                                                                             +
           "Parent Relationship": "Outer",                                                                                      +
           "Parallel Aware": false,                                                                                             +
           "Relation Name": "levels",                                                                                           +
           "Schema": "public",                                                                                                  +
           "Alias": "l",                                                                                                        +
           "Startup Cost": 0.00,                                                                                                +
           "Total Cost": 11.30,                                                                                                 +
           "Plan Rows": 130,                                                                                                    +
           "Plan Width": 16,                                                                                                    +
           "Output": ["l.id", "l.title", "l.user_id", "l.parent_id", "l.data", "l.is_autosaved", "l.created_at", "l.updated_at"]+
         },                                                                                                                     +
         {                                                                                                                      +
           "Node Type": "Hash",                                                                                                 +
           "Parent Relationship": "Inner",                                                                                      +
           "Parallel Aware": false,                                                                                             +
           "Startup Cost": 10.88,                                                                                               +
           "Total Cost": 10.88,                                                                                                 +
           "Plan Rows": 1,                                                                                                      +
           "Plan Width": 8,                                                                                                     +
           "Output": ["u.id"],                                                                                                  +
           "Plans": [                                                                                                           +
             {                                                                                                                  +
               "Node Type": "Seq Scan",                                                                                         +
               "Parent Relationship": "Outer",                                                                                  +
               "Parallel Aware": false,                                                                                         +
               "Relation Name": "users",                                                                                        +
               "Schema": "public",                                                                                              +
               "Alias": "u",                                                                                                    +
               "Startup Cost": 0.00,                                                                                            +
               "Total Cost": 10.88,                                                                                             +
               "Plan Rows": 1,                                                                                                  +
               "Plan Width": 8,                                                                                                 +
               "Output": ["u.id"],                                                                                              +
               "Filter": "((u.id IS NULL) OR (u.id = 1))"                                                                       +
             }                                                                                                                  +
           ]                                                                                                                    +
         }                                                                                                                      +
       ]                                                                                                                        +
     }                                                                                                                          +
   }                                                                                                                            +
 ]
(1 row)

Mar 29 '22 21:03 vladbat00

sqlx sqlx copied to clipboard

query_as!/query! inference returns nullable entries on join when a bound parameter is absent

sqlx
sqlx copied to clipboard