Slow, expensive query during sync.
PGSync version: 2.1.11 Postgres version: 12.7 Elasticsearch version: 7.10 Redis version: 5.0.6 Python version: 3.8.7 Problem Description:
I have six tables:
voycapi_conversations
id - int (pk)
name - str
users_user
id - int (pk)
name - str
voyc_review_conversationreview
id - int (pk)
conversation_id - int (foreign key to voycapi_conversations)
assigned_by_id - int (foreign key to users_user)
assigned_to_id - int (foreign key to users_user)
coached_by_id - int (foreign key to users_user)
graded_by_id - int (foreign key to users_user)
resolved_by_id - int (foreign key to users_user)
reviewed_by_id - int (foreign key to users_user)
voyc_transcript_transcript
id - int (pk)
conversation_id - int (foreign key to voycapi_conversations)
voyc_transcript_utterance
id - int (pk)
transcript_id - int (foreign key to voyc_transcript_transcript)
voyc_transcript_sentence
id - int (pk)
utterance_id - int (foreign key to voyc_transcript_utterance)
I need the username of each user assigned to each of the foreign keys in the voyc_review_conversationreview table.
The only way I've managed to sync all of these fields over through PGSync is by using a schema that looks like this:
[
{
"database": "webserver",
"index": "webserver_es",
"nodes": {
"table": "voycapi_conversations",
"columns": [
"id",
"name"
],
"children": [
{
"table": "voyc_review_conversationreview",
"columns": [
"reviewed_status",
"resolved_status",
"coached_status",
"graded_status",
"date_reviewed",
"date_resolved",
"date_coached",
"date_graded",
"date_assigned"
],
"label": "review",
"relationship": {
"variant": "object",
"type": "one_to_one"
},
"children": [
{
"table": "users_user",
"columns": [
"id",
"username"
],
"label": "assigned_by",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"parent": [
"assigned_by_id"
],
"child": [
"id"
]
}
}
},
{
"table": "users_user",
"columns": [
"id",
"username"
],
"label": "assigned_to",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"parent": [
"assigned_to_id"
],
"child": [
"id"
]
}
}
},
{
"table": "users_user",
"columns": [
"id",
"username"
],
"label": "coached_by",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"parent": [
"coached_by_id"
],
"child": [
"id"
]
}
}
},
{
"table": "users_user",
"columns": [
"id",
"username"
],
"label": "graded_by",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"parent": [
"graded_by_id"
],
"child": [
"id"
]
}
}
},
{
"table": "users_user",
"columns": [
"id",
"username"
],
"label": "resolved_by",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"parent": [
"resolved_by_id"
],
"child": [
"id"
]
}
}
},
{
"table": "users_user",
"columns": [
"id",
"username"
],
"label": "reviewed_by",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"parent": [
"reviewed_by_id"
],
"child": [
"id"
]
}
}
}
]
},
{
"table": "voyc_transcript_transcript",
"columns": [
"id"
],
"label": "transcript",
"relationship": {
"variant": "object",
"type": "one_to_one"
},
"children": [
{
"table": "voyc_transcript_utterance",
"columns": [
"id"
],
"label": "utterance",
"relationship": {
"variant": "object",
"type": "one_to_many"
},
"children": [
{
"table": "voyc_transcript_sentence",
"columns": [
"id",
"text",
"words#>{0,start}"
],
"label": "sentence",
"relationship": {
"variant": "object",
"type": "one_to_many",
"foreign_key": {
"parent": [
"id"
],
"child": [
"utterance_id"
]
}
}
}
]
}
]
}
]
}
}
]
When running PGSync:
- voycapi_conversations
|- voyc_review_conversationreview
| |- users_user
| |- users_user
| |- users_user
| |- users_user
| |- users_user
| - users_user
|- voyc_transcript_transcript
| - voyc_transcript_utterance
| - voyc_transcript_sentence
This works upon the initial sync. The issue, however, is that some changes to the database causes a query that looks like this to run:
SELECT JSON_BUILD_ARRAY(anon_5._keys, anon_8._keys) AS "JSON_BUILD_ARRAY_1",
CAST(JSON_BUILD_OBJECT('id', voycapi_conversations_1.id,
'review',
anon_5.review,
'transcript',
anon_8.transcript) AS JSONB) AS "JSON_BUILD_OBJECT_1",
voycapi_conversations_1.id
FROM public.voycapi_conversations AS voycapi_conversations_1
LEFT OUTER JOIN LATERAL
(SELECT CAST(JSON_BUILD_OBJECT('voyc_review_conversationreview', CAST(JSON_BUILD_OBJECT('id', JSON_BUILD_ARRAY(voyc_review_conversationreview_1.id)) AS JSONB)) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_12._keys) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_13._keys) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_14._keys) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_15._keys) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_16._keys) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_17._keys) AS JSONB) AS _keys,
CAST(JSON_BUILD_OBJECT('reviewed_status', voyc_review_conversationreview_1.reviewed_status, 'resolved_status', voyc_review_conversationreview_1.resolved_status, 'coached_status', voyc_review_conversationreview_1.coached_status, 'graded_status', voyc_review_conversationreview_1.graded_status, 'date_reviewed', voyc_review_conversationreview_1.date_reviewed, 'date_resolved', voyc_review_conversationreview_1.date_resolved, 'date_coached', voyc_review_conversationreview_1.date_coached, 'date_graded', voyc_review_conversationreview_1.date_graded, 'date_assigned', voyc_review_conversationreview_1.date_assigned, 'assigned_by', anon_12.assigned_by, 'assigned_to', anon_13.assigned_to, 'coached_by', anon_14.coached_by, 'graded_by', anon_15.graded_by, 'resolved_by', anon_16.resolved_by, 'reviewed_by', anon_17.reviewed_by) AS JSONB) AS review,
voyc_review_conversationreview_1.conversation_id AS conversation_id
FROM public.voyc_review_conversationreview AS voyc_review_conversationreview_1
LEFT OUTER JOIN LATERAL
(SELECT CAST(JSON_BUILD_OBJECT('users_user', CAST(JSON_BUILD_OBJECT('id', users_user_1.id) AS JSONB)) AS JSONB) AS _keys,
CAST(JSON_BUILD_OBJECT('id', users_user_1.id, 'username', users_user_1.username) AS JSONB) AS assigned_by,
users_user_1.id AS id
FROM public.users_user AS users_user_1
WHERE users_user_1.id = voyc_review_conversationreview_1.assigned_by_id) AS anon_12 ON anon_12.id = voyc_review_conversationreview_1.assigned_by_id
LEFT OUTER JOIN LATERAL
(SELECT CAST(JSON_BUILD_OBJECT('users_user', CAST(JSON_BUILD_OBJECT('id', users_user_1.id) AS JSONB)) AS JSONB) AS _keys,
CAST(JSON_BUILD_OBJECT('id', users_user_1.id, 'username', users_user_1.username) AS JSONB) AS assigned_to,
users_user_1.id AS id
FROM public.users_user AS users_user_1
WHERE users_user_1.id = voyc_review_conversationreview_1.assigned_to_id) AS anon_13 ON anon_13.id = voyc_review_conversationreview_1.assigned_to_id
LEFT OUTER JOIN LATERAL
(SELECT CAST(JSON_BUILD_OBJECT('users_user', CAST(JSON_BUILD_OBJECT('id', users_user_1.id) AS JSONB)) AS JSONB) AS _keys,
CAST(JSON_BUILD_OBJECT('id', users_user_1.id, 'username', users_user_1.username) AS JSONB) AS coached_by,
users_user_1.id AS id
FROM public.users_user AS users_user_1
WHERE users_user_1.id = voyc_review_conversationreview_1.coached_by_id) AS anon_14 ON anon_14.id = voyc_review_conversationreview_1.coached_by_id
LEFT OUTER JOIN LATERAL
(SELECT CAST(JSON_BUILD_OBJECT('users_user', CAST(JSON_BUILD_OBJECT('id', users_user_1.id) AS JSONB)) AS JSONB) AS _keys,
CAST(JSON_BUILD_OBJECT('id', users_user_1.id, 'username', users_user_1.username) AS JSONB) AS graded_by,
users_user_1.id AS id
FROM public.users_user AS users_user_1
WHERE users_user_1.id = voyc_review_conversationreview_1.graded_by_id) AS anon_15 ON anon_15.id = voyc_review_conversationreview_1.graded_by_id
LEFT OUTER JOIN LATERAL
(SELECT CAST(JSON_BUILD_OBJECT('users_user', CAST(JSON_BUILD_OBJECT('id', users_user_1.id) AS JSONB)) AS JSONB) AS _keys,
CAST(JSON_BUILD_OBJECT('id', users_user_1.id, 'username', users_user_1.username) AS JSONB) AS resolved_by,
users_user_1.id AS id
FROM public.users_user AS users_user_1
WHERE users_user_1.id = voyc_review_conversationreview_1.resolved_by_id) AS anon_16 ON anon_16.id = voyc_review_conversationreview_1.resolved_by_id
LEFT OUTER JOIN LATERAL
(SELECT CAST(JSON_BUILD_OBJECT('users_user', CAST(JSON_BUILD_OBJECT('id', users_user_1.id) AS JSONB)) AS JSONB) AS _keys,
CAST(JSON_BUILD_OBJECT('id', users_user_1.id, 'username', users_user_1.username) AS JSONB) AS reviewed_by,
users_user_1.id AS id
FROM public.users_user AS users_user_1
WHERE users_user_1.id = voyc_review_conversationreview_1.reviewed_by_id) AS anon_17 ON anon_17.id = voyc_review_conversationreview_1.reviewed_by_id
WHERE voyc_review_conversationreview_1.conversation_id = voycapi_conversations_1.id) AS anon_5 ON anon_5.conversation_id = voycapi_conversations_1.id
JOIN LATERAL
(SELECT CAST(JSON_BUILD_OBJECT('voyc_transcript_transcript', CAST(JSON_BUILD_OBJECT('id', JSON_BUILD_ARRAY(voyc_transcript_transcript_1.id)) AS JSONB)) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_19._keys) AS JSONB) AS _keys,
CAST(JSON_BUILD_OBJECT('id', voyc_transcript_transcript_1.id, 'confidence', voyc_transcript_transcript_1.confidence, 'utterance', anon_19.utterance) AS JSONB) AS transcript,
voyc_transcript_transcript_1.conversation_id AS conversation_id
FROM public.voyc_transcript_transcript AS voyc_transcript_transcript_1
JOIN LATERAL
(SELECT CAST(JSON_BUILD_OBJECT('voyc_transcript_utterance', JSON_AGG(CAST(JSON_BUILD_OBJECT('id', voyc_transcript_utterance_1.id) AS JSONB))) AS JSONB) || CAST(JSON_AGG(anon_20._keys) AS JSONB) AS _keys,
JSON_AGG(CAST(JSON_BUILD_OBJECT('id', voyc_transcript_utterance_1.id, 'emotion', voyc_transcript_utterance_1.emotion, 'sentence', anon_20.sentence) AS JSONB)) AS utterance,
voyc_transcript_utterance_1.transcript_id AS transcript_id
FROM public.voyc_transcript_utterance AS voyc_transcript_utterance_1
JOIN LATERAL
(SELECT CAST(JSON_BUILD_OBJECT('voyc_transcript_sentence', JSON_AGG(CAST(JSON_BUILD_OBJECT('id', voyc_transcript_sentence_1.id) AS JSONB))) AS JSONB) AS _keys,
JSON_AGG(CAST(JSON_BUILD_OBJECT('id', voyc_transcript_sentence_1.id, 'text', voyc_transcript_sentence_1.text, 'words_{0,start}', voyc_transcript_sentence_1.words #> '{0,start}') AS JSONB)) AS sentence,
voyc_transcript_sentence_1.utterance_id AS utterance_id
FROM public.voyc_transcript_sentence AS voyc_transcript_sentence_1
WHERE voyc_transcript_sentence_1.utterance_id = voyc_transcript_utterance_1.id
GROUP BY voyc_transcript_sentence_1.utterance_id) AS anon_20 ON anon_20.utterance_id = voyc_transcript_utterance_1.id
WHERE voyc_transcript_utterance_1.transcript_id = voyc_transcript_transcript_1.id
GROUP BY voyc_transcript_utterance_1.transcript_id) AS anon_19 ON anon_19.transcript_id = voyc_transcript_transcript_1.id
WHERE voyc_transcript_transcript_1.conversation_id = voycapi_conversations_1.id
AND (voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419
OR voyc_transcript_transcript_1.id = 4273419)) AS anon_8 ON anon_8.conversation_id = voycapi_conversations_1.id
This query takes up to two minutes to complete and returns one result. See the explain here.
ps. The query and schema I posted above is only a portion of my full schema and query. I've redacted the rest and only kept the part of the query that is problematic.
I'm seeing a few seq scans in that query analysis.
Did you already run pgsync -c schema.json -a to see if there are any missing indices?
Yes I have, all indices are present.
- This is as a result of the query plan.
- Initially I had a query plan similar to yours with lots of sequential scans.
- Then I created the indices below and ran vacuum --full
- CREATE INDEX idx_review_conversationreview_conversation_id ON review_conversationreview (conversation_id)
- CREATE INDEX idx_transcript_conversation_id ON transcript (conversation_id)
- CREATE INDEX idx_user_id ON public.user (id)
- CREATE INDEX idx_utterance_transcript_id ON utterance (transcript_id)
- CREATE INDEX idx_sentence_utterance_id ON sentence (utterance_id)
- Can you try with a vacuum?
- Also/alternatively try and set
SET enable_seqscan = OFF;and re-run the query - If you have all indices and vacuum'd, can you provide the table distribution count.
- i think i have 10000 row for all tables
- pgsync -c schema.json -a
I've had a similar problems with query performance after the first sync which might be related to this problem. However in my case the generated query is so large (over 6 million characters) it causes Postgres to crash. This problem started after adding a "through_tables" relationship to the schema. Most of the generated query is in a WHERE clause as a huge number of OR statements. Here's a snippet of how part of the query looks WHERE manga_1.manga_id = %(manga_id_1)s OR manga_1.manga_id = %(manga_id_2)s OR manga_1.manga_id = %(manga_id_3)s OR ...
Here's how my schema looks. The manga and manga_service tables have both around 50k rows while services table has just 8 rows.
[
{
"database": "manga",
"index": "manga",
"setting": {
"analysis": {
"analyzer": {
"trigram_analyzer": {
"filter": [
"lowercase"
],
"type": "custom",
"tokenizer": "trigram_tokenizer"
}
},
"tokenizer": {
"trigram_tokenizer": {
"token_chars": [
"letter",
"digit"
],
"min_gram": "3",
"type": "ngram",
"max_gram": "3"
}
}
}
},
"nodes": {
"table": "manga",
"columns": [
"title",
"views",
"manga_id"
],
"transform": {
"mapping": {
"title": {
"type": "text",
"fields": {
"ngram": {
"type": "text",
"analyzer": "trigram_analyzer"
}
}
}
}
},
"children": [
{
"table": "manga_alias",
"columns": [
"title",
"manga_id"
],
"relationship": {
"variant": "object",
"type": "one_to_many"
},
"transform": {
"mapping": {
"title": {
"type": "text",
"fields": {
"ngram": {
"type": "text",
"analyzer": "trigram_analyzer"
}
}
}
}
}
},
{
"table": "services",
"columns": [
"service_name",
"service_id"
],
"relationship": {
"variant": "object",
"type": "one_to_many",
"through_tables": [
"manga_service"
]
}
}
]
}
}
]