Crash raised by Delete statement.(PG17.2 citus 13.0.3)
citus version:
SELECT citus_version();
---Citus 13.0.3 on x86_64-pc-linux-gnu, compiled by gcc (Debian 12.2.0-14) 12.2.0, 64-bit
PG version: 17.2 schema:
---create 5 workers, and we also found the bug can be reproduced without workers
select * from citus_add_node('docker-worker1-1', 5432);
select * from citus_add_node('docker-worker2-1', 5432);
select * from citus_add_node('docker-worker3-1', 5432);
select * from citus_add_node('docker-worker4-1', 5432);
select * from citus_add_node('docker-worker5-1', 5432);
DROP TABLE IF EXISTS t6;
create table t6 (
vkey int4 ,
pkey int4 ,
c8 timestamp ,
c9 numeric ,
c10 numeric ,
c11 text ,
c12 text ,
c13 int4 ,
c14 int4 ,
c15 timestamp
);SELECT create_reference_table('t6');
DROP TABLE IF EXISTS t9;
create table t9 (
vkey int4 ,
pkey int4 ,
c25 numeric ,
c26 int4
);SELECT create_reference_table('t9');
DROP TABLE IF EXISTS t12;
create table t12 (
vkey int4 ,
pkey int4 ,
c16 int4 ,
c17 int4 ,
c18 timestamp ,
c19 numeric ,
c20 int4 ,
c21 text ,
c22 text ,
c23 text ,
colocated_key int4
);SELECT create_distributed_table('t12','colocated_key', colocate_with:='t10');
DROP TABLE IF EXISTS t18;
create table t18 (
vkey int4 ,
pkey int4 ,
c24 timestamp
);SELECT create_distributed_table('t18','vkey', colocate_with:='t17');
DROP TABLE IF EXISTS t20;
create table t20 (
vkey int4 ,
pkey int4 ,
c0 int4 ,
c1 int4 ,
c2 text ,
c3 numeric ,
c4 numeric ,
c5 timestamp ,
c6 int4 ,
c7 numeric
);SELECT create_distributed_table('t20','vkey', colocate_with:='t18');
query:
--- query
delete from t18
where
((false::bool) in (select
null::bool as c_0
from
t12 as ref_0
where (ref_0.c23) !~~* (ref_0.c22)
except
select
true::bool as c_0
from
t18 as ref_1
where exists (
select
ref_2.c1 as c_0,
ref_2.c6 as c_1,
ref_2.pkey as c_2
from
t20 as ref_2
where false::bool)))
or ((pg_catalog.citus_version()) <= (
select
'arKwIv+u}<7XL?*74+YE' as c_0
from
(t20 as ref_3
full outer join t6 as ref_4
on (ref_3.vkey = ref_4.vkey ))
where (false::bool)
or (exists (
select
ref_5.vkey as c_0,
(ref_3.c3) = (ref_4.c10) as c_1,
ref_5.c25 as c_2,
(select c2 from t20 order by c2 limit 1 offset 6)
as c_3,
ref_5.vkey as c_4,
(select vkey from t12 order by vkey limit 1 offset 1)
as c_5,
ref_5.pkey as c_6,
ref_5.c25 as c_7,
ref_5.c25 as c_8,
ref_5.vkey as c_9
from
t9 as ref_5
where (ref_4.c11) >= ('')))
order by c_0 desc
limit 1))
log from coordinator:
Reproducible with simpler query:
delete from t18
where
((false::bool) in (select
null::bool as c_0
from
t12 as ref_0))
or ((pg_catalog.citus_version()) <= (
select
'arKwIv+u}<7XL?*74+YE' as c_0
from
(t20 as ref_3
full outer join t6 as ref_4
on (ref_3.vkey = ref_4.vkey ))));
BT:
#0 __pthread_kill_implementation (no_tid=0, signo=6, threadid=135666315147072)
at ./nptl/pthread_kill.c:44
#1 __pthread_kill_internal (signo=6, threadid=135666315147072) at ./nptl/pthread_kill.c:78
#2 __GI___pthread_kill (threadid=135666315147072, signo=signo@entry=6) at ./nptl/pthread_kill.c:89
#3 0x00007b6345a42476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
#4 0x00007b6345a287f3 in __GI_abort () at ./stdlib/abort.c:79
#5 0x00005e3e358c98f0 in ExceptionalCondition (
conditionName=conditionName@entry=0x7b6345d83ef8 "ptr == NULL || nodeTag(ptr) == type",
fileName=fileName@entry=0x7b6345d83eb8 "/home/naisila/.pgenv/pgsql-17.6/include/server/nodes/nodes.h", lineNumber=lineNumber@entry=171) at assert.c:66
#6 0x00007b6345cc2da5 in castNodeImpl (ptr=0x5e3e7048c250, type=T_OpExpr)
at /home/naisila/.pgenv/pgsql-17.6/include/server/nodes/nodes.h:171
#7 get_sublink_expr (sublink=sublink@entry=0x5e3e7048c1a0, context=context@entry=0x7ffea4512080)
at deparser/ruleutils_17.c:8283
#8 0x00007b6345cb7936 in get_rule_expr (node=node@entry=0x5e3e7048c1a0,
context=context@entry=0x7ffea4512080, showimplicit=showimplicit@entry=false)
at deparser/ruleutils_17.c:5930
#9 0x00007b6345cbd686 in get_rule_expr_paren (node=node@entry=0x5e3e7048c1a0,
context=context@entry=0x7ffea4512080, showimplicit=showimplicit@entry=false,
parentNode=parentNode@entry=0x5e3e7048c120) at deparser/ruleutils_17.c:5583
#10 0x00007b6345cb77fd in get_rule_expr (node=0x5e3e7048c120, context=context@entry=0x7ffea4512080,
showimplicit=showimplicit@entry=false) at deparser/ruleutils_17.c:5900
#11 0x00007b6345cbf691 in get_delete_query_def (query=query@entry=0x5e3e7048bb48,
context=context@entry=0x7ffea4512080) at deparser/ruleutils_17.c:3744
#12 0x00007b6345cbf9f4 in get_query_def_extended (query=query@entry=0x5e3e7048bb48,
buf=buf@entry=0x5e3e704a6538, parentnamespace=parentnamespace@entry=0x0,
distrelid=distrelid@entry=0, shardid=shardid@entry=0, resultDesc=resultDesc@entry=0x0,
colNamesVisible=false, prettyFlags=0, wrapColumn=0, startIndent=0) at deparser/ruleutils_17.c:2129
#13 0x00007b6345cbfa9f in get_query_def (query=query@entry=0x5e3e7048bb48,
buf=buf@entry=0x5e3e704a6538, parentnamespace=parentnamespace@entry=0x0,
resultDesc=resultDesc@entry=0x0, colNamesVisible=colNamesVisible@entry=false,
prettyFlags=prettyFlags@entry=0, wrapColumn=0, startIndent=0) at deparser/ruleutils_17.c:2048
#14 0x00007b6345cbfacc in pg_get_query_def (query=query@entry=0x5e3e7048bb48,
buffer=buffer@entry=0x5e3e704a6538) at deparser/ruleutils_17.c:503
#15 0x00007b6345d0a3f0 in DeparseTaskQuery (task=task@entry=0x5e3e7048ba20,
query=query@entry=0x5e3e7048bb48) at planner/deparse_shard_query.c:735
#16 0x00007b6345d0ad45 in TaskQueryString (task=task@entry=0x5e3e7048ba20)
at planner/deparse_shard_query.c:839
#17 0x00007b6345d0b1fd in TaskQueryStringAtIndex (task=task@entry=0x5e3e7048ba20, index=0)
at planner/deparse_shard_query.c:767
#18 0x00007b6345cc41ec in SendNextQuery (placementExecution=placementExecution@entry=0x5e3e70492508,
session=session@entry=0x5e3e704a6238) at executor/adaptive_executor.c:3927
#19 0x00007b6345cc4918 in StartPlacementExecutionOnSession (placementExecution=0x5e3e70492508,
session=session@entry=0x5e3e704a6238) at executor/adaptive_executor.c:3886
#20 0x00007b6345cc6266 in TransactionStateMachine (session=session@entry=0x5e3e704a6238)
at executor/adaptive_executor.c:3556
#21 0x00007b6345cc677a in ConnectionStateMachine (session=0x5e3e704a6238)
at executor/adaptive_executor.c:3100
#22 0x00007b6345cc6c26 in OpenNewConnections (workerPool=workerPool@entry=0x5e3e70492108,
newConnectionCount=1, transactionProperties=0x7ffea45125ec) at executor/adaptive_executor.c:2734
#23 0x00007b6345cc6e8f in ManageWorkerPool (workerPool=0x5e3e70492108)
at executor/adaptive_executor.c:2246
#24 0x00007b6345cc7141 in RunDistributedExecution (execution=execution@entry=0x5e3e70467718)
at executor/adaptive_executor.c:1940
#25 0x00007b6345cc763b in AdaptiveExecutor (scanState=scanState@entry=0x5e3e704596b0)
--Type <RET> for more, q to quit, c to continue without paging--
at executor/adaptive_executor.c:882
#26 0x00007b6345cc8082 in CitusExecScan (node=0x5e3e704596b0) at executor/citus_custom_scan.c:268
#27 0x00005e3e35579857 in ExecCustomScan (pstate=0x5e3e704596b0) at nodeCustom.c:121
#28 0x00005e3e355642f3 in ExecProcNodeFirst (node=0x5e3e704596b0) at execProcnode.c:464
#29 0x00005e3e3555c520 in ExecProcNode (node=0x5e3e704596b0)
at ../../../src/include/executor/executor.h:275
#30 ExecutePlan (queryDesc=queryDesc@entry=0x5e3e70431160, operation=operation@entry=CMD_DELETE,
sendTuples=sendTuples@entry=false, numberTuples=numberTuples@entry=0,
direction=direction@entry=ForwardScanDirection, dest=dest@entry=0x5e3e704474b8) at execMain.c:1648
#31 0x00005e3e3555c749 in standard_ExecutorRun (queryDesc=queryDesc@entry=0x5e3e70431160,
direction=direction@entry=ForwardScanDirection, count=count@entry=0,
execute_once=execute_once@entry=true) at execMain.c:360
#32 0x00007b6345ccec4f in CitusExecutorRun (queryDesc=0x5e3e70431160, direction=ForwardScanDirection,
count=0, execute_once=<optimized out>) at executor/multi_executor.c:247
#33 0x00007b6345c5cfcb in citus_executor_run_adapter (queryDesc=<optimized out>,
direction=<optimized out>, count=<optimized out>, run_once=<optimized out>)
at shared_library_init.c:402
#34 0x00007b6342b50e82 in pgss_ExecutorRun (queryDesc=0x5e3e70431160, direction=ForwardScanDirection,
count=0, execute_once=<optimized out>) at pg_stat_statements.c:1023
#35 0x00005e3e3555c77f in ExecutorRun (queryDesc=queryDesc@entry=0x5e3e70431160,
direction=direction@entry=ForwardScanDirection, count=count@entry=0,
execute_once=execute_once@entry=true) at execMain.c:304
#36 0x00005e3e35765df4 in ProcessQuery (plan=plan@entry=0x5e3e704466d8, sourceText=<optimized out>,
params=0x0, queryEnv=0x0, dest=dest@entry=0x5e3e704474b8, qc=qc@entry=0x7ffea4512c50)
at pquery.c:160
#37 0x00005e3e357669eb in PortalRunMulti (portal=portal@entry=0x5e3e7039b440,
isTopLevel=isTopLevel@entry=true, setHoldSnapshot=setHoldSnapshot@entry=false,
dest=dest@entry=0x5e3e704474b8, altdest=altdest@entry=0x5e3e704474b8, qc=qc@entry=0x7ffea4512c50)
at pquery.c:1275
#38 0x00005e3e35766f79 in PortalRun (portal=portal@entry=0x5e3e7039b440,
count=count@entry=9223372036854775807, isTopLevel=isTopLevel@entry=true,
run_once=run_once@entry=true, dest=dest@entry=0x5e3e704474b8,
altdest=altdest@entry=0x5e3e704474b8, qc=0x7ffea4512c50) at pquery.c:789
#39 0x00005e3e35762e76 in exec_simple_query (
query_string=query_string@entry=0x5e3e70278ea0 "delete from t18\nwhere \n((false::bool) in (select \n null::bool as c_0\n from \n t12 as ref_0))\n or ((pg_catalog.citus_version()) <= ( \n select \n 'arKwIv+u}<7XL?*74+Y"...) at postgres.c:1278
#40 0x00005e3e35764f78 in PostgresMain (dbname=<optimized out>, username=<optimized out>)
at postgres.c:4767
#41 0x00005e3e3575e025 in BackendMain (startup_data=<optimized out>, startup_data_len=<optimized out>)
at backend_startup.c:105
#42 0x00005e3e356aa238 in postmaster_child_launch (child_type=child_type@entry=B_BACKEND,
startup_data=startup_data@entry=0x7ffea4512eb4 "", startup_data_len=startup_data_len@entry=4,
client_sock=client_sock@entry=0x7ffea4512ef0) at launch_backend.c:277
#43 0x00005e3e356aea6a in BackendStartup (client_sock=client_sock@entry=0x7ffea4512ef0)
at postmaster.c:3594
#44 0x00005e3e356aeced in ServerLoop () at postmaster.c:1676
#45 0x00005e3e356b0355 in PostmasterMain (argc=argc@entry=3, argv=argv@entry=0x5e3e701dfe70)
at postmaster.c:1374
#46 0x00005e3e355c2d55 in main (argc=3, argv=0x5e3e701dfe70) at main.c:199
A couple of observations that may aid in resolving this; using @naisila 's simpler query, the crash occurs in get_sublink_expr():
foreach(l, ((BoolExpr *) sublink->testexpr)->args)
{
OpExpr *opexpr = lfirst_node(OpExpr, l); <== assert fail in here
The function lfirst_node() asserts that its argument is of type OpExpr but it is actually a PARAM node. At this point, ruletutils is looking at the lefthand side of the IN expression (false::bool) IN (select null::bool as c_0 from t12 as ref_0).
- The following SELECT statement executes without issues; it uses the same quals as the simpler query, with a minor modification of a
limit 1to ensure the scalar subquery executes ok. The query representation also has aPARAMnode for the LHS of theINsubquery, so why is this able to deparse successfully ?
select * from t18
where
((false::bool) in (select
null::bool as c_0
from
t12 as ref_0))
or ((pg_catalog.citus_version()) <= (
select
'arKwIv+u}<7XL?*74+YE' as c_0
from
(t20 as ref_3
full outer join t6 as ref_4
on (ref_3.vkey = ref_4.vkey )) limit 1));
- Its possible to create and display a rule with the problem query, suggesting its not an upstream issue:
-- Create a rule with the simplified repro query:
create rule r_foo as on insert to t18 do instead delete from t18
where
((false::bool) in (select
null::bool as c_0
from
t12 as ref_0))
or ((pg_catalog.citus_version()) <= (
select
'arKwIv+u}<7XL?*74+YE' as c_0
from
(t20 as ref_3
full outer join t6 as ref_4
on (ref_3.vkey = ref_4.vkey ))));
CREATE RULE
Time: 20.290 ms
-- Ruleutils can successfully deparse the rule:
SELECT pg_get_ruledef(oid) FROM pg_rewrite WHERE rulename='r_foo';
┌─────────────────────────────────────────────────────────────────────────────────────────────────┐
│ pg_get_ruledef │
├─────────────────────────────────────────────────────────────────────────────────────────────────┤
│ CREATE RULE r_foo AS ↵│
│ ON INSERT TO citus.t18 DO INSTEAD DELETE FROM t18 ↵│
│ WHERE ((false IN ( SELECT NULL::boolean AS c_0 ↵│
│ FROM t12 ref_0)) OR (citus_version() <= ( SELECT 'arKwIv+u}<7XL?*74+YE'::text AS c_0↵│
│ FROM (t20 ref_3 ↵│
│ FULL JOIN t6 ref_4 ON ((ref_3.vkey = ref_4.vkey)))))); │
└─────────────────────────────────────────────────────────────────────────────────────────────────┘
(1 row)
@colm-mchugh
Its possible to create and display a rule with the problem query, suggesting its not an upstream issue:
This is also reproducible on citus this way, because this deparsing here is done on the original query not the further modified/evaluated query tree. This bug appears when some one tries to deparse on a modified/evaluated query tree.
postgres=# SELECT pg_get_ruledef(oid) FROM pg_rewrite WHERE rulename='r_foo';
pg_get_ruledef
-------------------------------------------------------------------------------------------------
CREATE RULE r_foo AS +
ON INSERT TO public.t18 DO INSTEAD DELETE FROM t18 +
WHERE ((false IN ( SELECT NULL::boolean AS c_0 +
FROM t12 ref_0)) OR (citus_version() <= ( SELECT 'arKwIv+u}<7XL?*74+YE'::text AS c_0+
FROM (t20 ref_3 +
FULL JOIN t6 ref_4 ON ((ref_3.vkey = ref_4.vkey))))));
(1 row)
postgres=# select pg_catalog.citus_version();
citus_version
-------------------------------------------------------------------------------------------------------------------------------------------------------------------
Citus 14.0devel on arm-apple-darwin24.6.0, compiled by Apple clang version 17.0.0 (clang-1700.0.13.3), 64-bit gitref: local_plan_cache_multishard(sha: a42a183dc)
(1 row)
- The error is actually also reproducible with the upstream deparse, when using
pg_get_querydefinstead of pg_get_query_def (citus version).
TRAP: failed Assert("ptr == NULL || nodeTag(ptr) == type"), File: "../../../../src/include/nodes/nodes.h", Line: 171, PID: 16372
0 postgres 0x000000010286cfdc ExceptionalCondition + 108
1 postgres 0x0000000102814000 get_sublink_expr + 1088
2 postgres 0x0000000102810488 get_rule_expr + 5584
3 postgres 0x0000000102808db0 get_query_def + 1396
4 postgres 0x0000000102808828 pg_get_querydef + 80
5 citus.dylib 0x00000001039874bc DeparseTaskQuery + 64
6 citus.dylib 0x0000000103986e80 TaskQueryString + 200
7 citus.dylib 0x000000010394f4bc SendNextQuery + 76
8 citus.dylib 0x000000010394ef00 StartPlacementExecutionOnSession + 172
9 citus.dylib 0x000000010394dedc ConnectionStateMachine + 936
10 citus.dylib 0x000000010394ca3c RunDistributedExecution + 3348
11 citus.dylib 0x000000010394ba00 AdaptiveExecutor + 640
12 citus.dylib 0x000000010394f950 CitusExecScan + 48
13 postgres 0x000000010256c9e8 standard_ExecutorRun + 292
14 citus.dylib 0x000000010395560c CitusExecutorRun + 444
15 postgres 0x000000010273e938 ProcessQuery + 160
and normally with citus deparse (pg_get_query_def)
TRAP: failed Assert("ptr == NULL || nodeTag(ptr) == type"), File: "/Users/imranzaheer/Desktop/work/postgres/installs/pg17-full-icu/include/server/nodes/nodes.h", Line: 171, PID: 8659
0 postgres 0x0000000104700fdc ExceptionalCondition + 108
1 citus.dylib 0x000000010582170c get_sublink_expr + 1088
2 citus.dylib 0x0000000105816a50 get_rule_expr + 5600
3 citus.dylib 0x000000010581834c get_query_def_extended + 4340
4 citus.dylib 0x0000000105815078 pg_get_query_def + 44
5 citus.dylib 0x000000010585ee84 TaskQueryString + 284
6 citus.dylib 0x00000001058274b8 SendNextQuery + 76
7 citus.dylib 0x0000000105826efc StartPlacementExecutionOnSession + 172
8 citus.dylib 0x0000000105825ed8 ConnectionStateMachine + 936
9 citus.dylib 0x0000000105824a38 RunDistributedExecution + 3348
10 citus.dylib 0x00000001058239fc AdaptiveExecutor + 640
11 citus.dylib 0x000000010582794c CitusExecScan + 48
12 postgres 0x00000001044009e8 standard_ExecutorRun + 292
13 citus.dylib 0x000000010582d608 CitusExecutorRun + 444
14 postgres 0x00000001045d2938 ProcessQuery + 160
- But I think we cannot say that this is an upstream issue, because the rule utils deparsing always expects deparsing on the oprignal parsed qurey tree, but in case of citus we are passing an modified/evaluated query tree, see here
The following SELECT statement executes without issues;
- In case of the select stmt version mentioned, the query tree remain same as the orignal.
select * from t18
where
((false::bool) in (select
null::bool as c_0
from
t12 as ref_0))
or ((pg_catalog.citus_version()) <= (
select
'arKwIv+u}<7XL?*74+YE' as c_0
from
(t20 as ref_3
full outer join t6 as ref_4
on (ref_3.vkey = ref_4.vkey ))));
# the sublink->testexpr will gen the following tree
{OPEXPR
:opno 91 ---> '='
:opfuncid 60
:opresulttype 16
:opretset false
:opcollid 0
:inputcollid 0
:args (
{CONST
:consttype 16 --> bool
:consttypmod -1
:constcollid 0
:constlen 1
:constbyval true
:constisnull false
:location 25
:constvalue 1 [ 0 0 0 0 0 0 0 0 ]
}
{PARAM
:paramkind 2 --> PARAM_SUBLINK
:paramid 1
:paramtype 16
:paramtypmod -1
:paramcollid 0
:location -1
}
)
:location 38
}
- And the for version
deletestmt, the test expression is being evaluated further byeval_const_expressions
delete from t18
where
((false::bool) in (select
null::bool as c_0
from
t12 as ref_0))
or ((pg_catalog.citus_version()) <= (
select
'arKwIv+u}<7XL?*74+YE' as c_0
from
(t20 as ref_3
full outer join t6 as ref_4
on (ref_3.vkey = ref_4.vkey ))));
# the sublink->testexpr will gen following tree
{BOOLEXPR
:boolop not
:args (
{PARAM
:paramkind 2 --> sublink
:paramid 1
:paramtype 16 --> bool
:paramtypmod -1
:paramcollid 0
:location -1
}
)
:location -1
}
-
You can see that only the
deletestmt was modified by citus before the deparsing happens. -
As far as I noticed citus is pre evaluating the query before sending in to the other workers, here
ExecuteCoordinatorEvaluableExpressionsby callingeval_const_expressions. Here the original queryOPEXPRis being evaluated toBOOLEXPRin thesublink->testexrpbut postgres always expectsublink->testexrpto be an expr of combining operators. -
For solving this may be we can add an check here that will skip the eveluation of
SUBLINK, like this
diff --git a/src/backend/distributed/utils/citus_clauses.c b/src/backend/distributed/utils/citus_clauses.c
index 7086cec4e..ce3f1939a 100644
--- a/src/backend/distributed/utils/citus_clauses.c
+++ b/src/backend/distributed/utils/citus_clauses.c
@@ -572,6 +572,15 @@ CheckExprExecutorSafe(Node *expr)
}
}
+ /* If it's a SubLink, its testexpr should not be further evaluated */
+ else if (IsA(expr, SubLink))
+ {
+ SubLink *sublink = (SubLink *) expr;
+
+ if (sublink->testexpr)
+ return false;
+ }
+
/* If it's a FuncExpr, search in arguments */
else if (IsA(expr, FuncExpr))
{
But I am not sure whether this is a valid solution. Or may bye we can change citus rule_utils to parse such expression.
@imranzaheer612 thanks for the diagnosis, it makes total sense. Does UPDATE have the same issue do you know, or it is just impacting DELETE ?
deparsing here is done on the original query not the further modified/evaluated query tree.
yes - Postgres's ruleutils implicitly expects a parsed, but not rewritten, query tree, but Citus' distributed_planner() hook receives a parsed and rewritten query tree, and Citus further evaluates the query tree on certain codepaths, as you have identified. This difference in ruleutils usage has impacted Citus before - issues #4092, #7674 and #5621 for example. Those cases arose because of a property of the rewritten query tree. The solution (#7675) modifies citus_ruleutils. Passing in a rewritten query tree is risky on the part of Citus and arguably a misuse of the ruleutils API, with the consequence that we occasionally hit issues like this.
It's generally preferable to fix problems of this category in citus_ruleutils , by inspecting an expression and walking back the rewrite if possible, or at least deducing that a rewrite has happened and avoiding the problematic ruleutils call. In this case, because the problem is caused by an action that Citus takes and not by a property of the rewritten query tree, its worth a) investigating why that action was put in place by Citus and b) if it makes sense, reduce the scope of that action, and avoid the problem. From a quick peek at that change (#7914), it looks like it prevents PARAM types that the Postgres executor cannot handle from leaking into the executor. Skipping sublink evaluation as you suggest, or limiting the check in some other way sounds promising, but obviously we'd need to ensure that the fix #7914 is not compromised in some way.