citus icon indicating copy to clipboard operation
citus copied to clipboard

Intermediate result doesn't accept pseudo-type anyarray

Open naisila opened this issue 3 months ago • 2 comments

Steps to reproduce:

CREATE TABLE dist_table (id int);
SELECT create_distributed_table('dist_table', 'id');

SELECT count(*) FROM dist_table
WHERE EXISTS (SELECT count(*) FROM pg_catalog.pg_stats_ext_exprs);

NOTICE:  executing the command locally: SELECT count(*) AS count FROM
(SELECT intermediate_result.schemaname, intermediate_result.tablename, intermediate_result.statistics_schemaname, 
intermediate_result.statistics_name, intermediate_result.statistics_owner, intermediate_result.expr, intermediate_result.inherited, 
intermediate_result.null_frac, intermediate_result.avg_width, intermediate_result.n_distinct, intermediate_result.most_common_vals, 
intermediate_result.most_common_freqs, intermediate_result.histogram_bounds, intermediate_result.correlation, 
intermediate_result.most_common_elems, intermediate_result.most_common_elem_freqs, intermediate_result.elem_count_histogram FROM 
read_intermediate_result('109_1'::text, 'binary'::citus_copy_format) intermediate_result(schemaname name, tablename name, statistics_schemaname name,
statistics_name name, statistics_owner name, expr text, inherited boolean, null_frac real, avg_width integer, n_distinct real,
most_common_vals anyarray, most_common_freqs real[], histogram_bounds anyarray, correlation real,
most_common_elems anyarray, most_common_elem_freqs real[], elem_count_histogram real[])) pg_stats_ext_exprs

ERROR:  column "most_common_elems" has pseudo-type anyarray

In general, one can't create a table with pseudo-type, so there may be no way to fix this after all:

CREATE TABLE local_anyarray AS SELECT most_common_elems FROM pg_catalog.pg_stats_ext_exprs;
ERROR:  column "most_common_elems" has pseudo-type anyarray

naisila avatar Aug 26 '25 19:08 naisila

For reference, this is the original SQLSMITH query:

select
  subq_1.c1 as c0,
  subq_1.c1 as c1,
  subq_1.c0 as c2,

  pg_catalog.max(
    cast(case when true then cast(null as "time") else cast(null as "time") end
       as "time")) over (partition by subq_1.c0 order by subq_1.c1) as c3,

  pg_catalog.bool_and(
    cast((select applied from pg_catalog.pg_file_settings limit 1 offset 2)
       as bool)) over (partition by subq_1.c0 order by subq_1.c1) as c4,

  pg_catalog.max(
    cast(cast(nullif(case when cast(null as name) !~~ cast(null as text) then (select stanullfrac from pg_catalog.pg_statistic limit 1 offset 5)
             else (select stanullfrac from pg_catalog.pg_statistic limit 1 offset 5)
             end
          ,
        pg_catalog.float4abs(
          cast(case when ((subq_1.c0 is NULL)
                or (((cast(null as float4) < cast(null as float4))
                    or (subq_1.c0 is NULL))
                  and (subq_1.c1 is NULL)))
              and ((EXISTS (
                  select
                      (select pg_catalog.stddev_samp(total_bytes) from pg_catalog.pg_backend_memory_contexts)
                         as c0,
                      ref_0.comments as c1,
                      subq_2.c0 as c2,
                      subq_1.c0 as c3,
                      subq_2.c0 as c4
                    from
                      information_schema.sql_sizing as ref_0,
                      lateral (select
                            ref_1.flushes as c0
                          from
                            pg_catalog.pg_stat_slru as ref_1
                          where cast(null as pg_lsn) >= (select srsublsn from pg_catalog.pg_subscription_rel limit 1 offset 6)
                              ) as subq_2
                    where subq_1.c1 ~* subq_1.c1
                    limit 86))
                or (subq_1.c0 is not NULL)) then cast(null as float4) else cast(null as float4) end
             as float4))) as float4) as float4)) over (partition by subq_1.c0 order by subq_1.c0) as c5,
  subq_1.c1 as c6,
  subq_1.c1 as c7,
  subq_1.c0 as c8
from
  (select
        sample_1.a as c0,
        subq_0.c1 as c1
      from
        pg_catalog.pg_parameter_acl as sample_0 tablesample system (3.1)
          left join public.par_4 as sample_1 tablesample system (3.7)
          on (sample_1.a is NULL),
        lateral (select
              sample_2.roident as c0,
              sample_2.roname as c1,
              sample_0.parname as c2,
              (select usename from pg_catalog.pg_user_mappings limit 1 offset 6)
                 as c3,
              sample_0.paracl as c4,
              sample_1.a as c5,
              72 as c6,
              sample_0.paracl as c7,
              sample_0.paracl as c8,
              sample_0.parname as c9,
              sample_0.parname as c10
            from
              pg_catalog.pg_replication_origin as sample_2 tablesample system (4.3)
            where (select pg_catalog.min(last_idx_scan) from pg_catalog.pg_stat_sys_indexes)
                 >= (select event_time from public.events limit 1 offset 1)
              ) as subq_0
      where (((select data from pg_catalog.pg_largeobject limit 1 offset 2)
               <= (select tgargs from pg_catalog.pg_trigger limit 1 offset 5)
              )
          and (cast(null as jsonb) ?& cast(null as _text)))
        and (pg_catalog.bpchar(
            cast(subq_0.c3 as name)) !~~* subq_0.c2)
      limit 137) as subq_1
where ((false)
    and (80 is not NULL))
  or (cast(nullif(case when subq_1.c0 is NULL then (select write_time from pg_catalog.pg_stat_checkpointer limit 1 offset 3)
           else (select write_time from pg_catalog.pg_stat_checkpointer limit 1 offset 3)
           end
        ,
      (select pg_catalog.stddev_samp(null_frac) from pg_catalog.pg_stats_ext_exprs)
        ) as float8) = case when cast(nullif(cast(null as record),
          cast(null as record)) as record) <> case when cast(null as pg_lsn) > (select subskiplsn from pg_catalog.pg_subscription limit 1 offset 1)
               then cast(null as record) else cast(null as record) end
           then (select total_time from pg_catalog.pg_stat_xact_user_functions limit 1 offset 4)
         else (select total_time from pg_catalog.pg_stat_xact_user_functions limit 1 offset 4)
         end
  )

naisila avatar Aug 26 '25 19:08 naisila

Used a catalog table that has an "anyarray" column and that even exists in PG13 and tested the following to make sure since when we have this bug:

DROP TABLE IF EXISTS dist_table;
CREATE TABLE dist_table (id int);
SET citus.shard_count = 4;
SELECT create_distributed_table('dist_table', 'id');

SELECT count(*) FROM dist_table
WHERE EXISTS (SELECT stavalues5 FROM pg_statistic); -- stavalues5 is an "anyarray"
ERROR:  column "stavalues5" has pseudo-type anyarray
CONTEXT:  while executing command on localhost:9702

We receive^ the same error both on Citus 9.5.12 and on main, so seems like quite an old bug.

onurctirtir avatar Sep 08 '25 14:09 onurctirtir