pgsync icon indicating copy to clipboard operation
pgsync copied to clipboard

Slow, expensive query during sync.

Open voyc-jean opened this issue 3 years ago • 4 comments

PGSync version: 2.1.11 Postgres version: 12.7 Elasticsearch version: 7.10 Redis version: 5.0.6 Python version: 3.8.7 Problem Description:

I have six tables:

voycapi_conversations
    id - int (pk)
    name - str

users_user
    id - int (pk)
    name - str   

voyc_review_conversationreview
    id - int (pk)
    conversation_id - int (foreign key to voycapi_conversations)
    assigned_by_id - int (foreign key to users_user)
    assigned_to_id - int (foreign key to users_user)
    coached_by_id  - int (foreign key to users_user)
    graded_by_id  - int (foreign key to users_user)
    resolved_by_id  - int (foreign key to users_user)
    reviewed_by_id  - int (foreign key to users_user)

voyc_transcript_transcript
    id - int (pk)
    conversation_id - int (foreign key to voycapi_conversations)

voyc_transcript_utterance
    id - int (pk)
    transcript_id - int (foreign key to voyc_transcript_transcript)

voyc_transcript_sentence
    id - int (pk)
    utterance_id - int (foreign key to voyc_transcript_utterance)

I need the username of each user assigned to each of the foreign keys in the voyc_review_conversationreview table. The only way I've managed to sync all of these fields over through PGSync is by using a schema that looks like this:

[
    {
        "database": "webserver",
        "index": "webserver_es",
        "nodes": {
            "table": "voycapi_conversations",
            "columns": [
                "id",
                "name"
            ],
            "children": [
                {
                    "table": "voyc_review_conversationreview",
                    "columns": [
                        "reviewed_status",
                        "resolved_status",
                        "coached_status",
                        "graded_status",
                        "date_reviewed",
                        "date_resolved",
                        "date_coached",
                        "date_graded",
                        "date_assigned"
                    ],
                    "label": "review",
                    "relationship": {
                        "variant": "object",
                        "type": "one_to_one"
                    },
                    "children": [
                        {
                            "table": "users_user",
                            "columns": [
                                "id",
                                "username"
                            ],
                            "label": "assigned_by",
                            "relationship": {
                                "variant": "object",
                                "type": "one_to_one",
                                "foreign_key": {
                                    "parent": [
                                        "assigned_by_id"
                                    ],
                                    "child": [
                                        "id"
                                    ]
                                }
                            }
                        },
                        {
                            "table": "users_user",
                            "columns": [
                                "id",
                                "username"
                            ],
                            "label": "assigned_to",
                            "relationship": {
                                "variant": "object",
                                "type": "one_to_one",
                                "foreign_key": {
                                    "parent": [
                                        "assigned_to_id"
                                    ],
                                    "child": [
                                        "id"
                                    ]
                                }
                            }
                        },
                        {
                            "table": "users_user",
                            "columns": [
                                "id",
                                "username"
                            ],
                            "label": "coached_by",
                            "relationship": {
                                "variant": "object",
                                "type": "one_to_one",
                                "foreign_key": {
                                    "parent": [
                                        "coached_by_id"
                                    ],
                                    "child": [
                                        "id"
                                    ]
                                }
                            }
                        },
                        {
                            "table": "users_user",
                            "columns": [
                                "id",
                                "username"
                            ],
                            "label": "graded_by",
                            "relationship": {
                                "variant": "object",
                                "type": "one_to_one",
                                "foreign_key": {
                                    "parent": [
                                        "graded_by_id"
                                    ],
                                    "child": [
                                        "id"
                                    ]
                                }
                            }
                        },
                        {
                            "table": "users_user",
                            "columns": [
                                "id",
                                "username"
                            ],
                            "label": "resolved_by",
                            "relationship": {
                                "variant": "object",
                                "type": "one_to_one",
                                "foreign_key": {
                                    "parent": [
                                        "resolved_by_id"
                                    ],
                                    "child": [
                                        "id"
                                    ]
                                }
                            }
                        },
                        {
                            "table": "users_user",
                            "columns": [
                                "id",
                                "username"
                            ],
                            "label": "reviewed_by",
                            "relationship": {
                                "variant": "object",
                                "type": "one_to_one",
                                "foreign_key": {
                                    "parent": [
                                        "reviewed_by_id"
                                    ],
                                    "child": [
                                        "id"
                                    ]
                                }
                            }
                        }
                    ]
                },
                {
                    "table": "voyc_transcript_transcript",
                    "columns": [
                        "id"
                    ],
                    "label": "transcript",
                    "relationship": {
                        "variant": "object",
                        "type": "one_to_one"
                    },
                    "children": [
                        {
                            "table": "voyc_transcript_utterance",
                            "columns": [
                                "id"
                            ],
                            "label": "utterance",
                            "relationship": {
                                "variant": "object",
                                "type": "one_to_many"
                            },
                            "children": [
                                {
                                    "table": "voyc_transcript_sentence",
                                    "columns": [
                                        "id",
                                        "text",
                                        "words#>{0,start}"
                                    ],
                                    "label": "sentence",
                                    "relationship": {
                                        "variant": "object",
                                        "type": "one_to_many",
                                        "foreign_key": {
                                            "parent": [
                                                "id"
                                            ],
                                            "child": [
                                                "utterance_id"
                                            ]
                                        }
                                    }
                                }
                            ]
                        }
                    ]
                }
            ]
        }
    }
]

When running PGSync:

 - voycapi_conversations
   |- voyc_review_conversationreview
   |  |- users_user
   |  |- users_user
   |  |- users_user
   |  |- users_user
   |  |- users_user
   |   - users_user
   |- voyc_transcript_transcript
   |   - voyc_transcript_utterance
   |      - voyc_transcript_sentence

This works upon the initial sync. The issue, however, is that some changes to the database causes a query that looks like this to run:

SELECT JSON_BUILD_ARRAY(anon_5._keys, anon_8._keys) AS "JSON_BUILD_ARRAY_1",
       CAST(JSON_BUILD_OBJECT('id', voycapi_conversations_1.id,
'review',
anon_5.review,
'transcript',
anon_8.transcript) AS JSONB) AS "JSON_BUILD_OBJECT_1",
       voycapi_conversations_1.id
FROM public.voycapi_conversations AS voycapi_conversations_1
LEFT OUTER JOIN LATERAL
  (SELECT CAST(JSON_BUILD_OBJECT('voyc_review_conversationreview', CAST(JSON_BUILD_OBJECT('id', JSON_BUILD_ARRAY(voyc_review_conversationreview_1.id)) AS JSONB)) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_12._keys) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_13._keys) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_14._keys) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_15._keys) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_16._keys) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_17._keys) AS JSONB) AS _keys,
          CAST(JSON_BUILD_OBJECT('reviewed_status', voyc_review_conversationreview_1.reviewed_status, 'resolved_status', voyc_review_conversationreview_1.resolved_status, 'coached_status', voyc_review_conversationreview_1.coached_status, 'graded_status', voyc_review_conversationreview_1.graded_status, 'date_reviewed', voyc_review_conversationreview_1.date_reviewed, 'date_resolved', voyc_review_conversationreview_1.date_resolved, 'date_coached', voyc_review_conversationreview_1.date_coached, 'date_graded', voyc_review_conversationreview_1.date_graded, 'date_assigned', voyc_review_conversationreview_1.date_assigned, 'assigned_by', anon_12.assigned_by, 'assigned_to', anon_13.assigned_to, 'coached_by', anon_14.coached_by, 'graded_by', anon_15.graded_by, 'resolved_by', anon_16.resolved_by, 'reviewed_by', anon_17.reviewed_by) AS JSONB) AS review,
          voyc_review_conversationreview_1.conversation_id AS conversation_id
   FROM public.voyc_review_conversationreview AS voyc_review_conversationreview_1
   LEFT OUTER JOIN LATERAL
     (SELECT CAST(JSON_BUILD_OBJECT('users_user', CAST(JSON_BUILD_OBJECT('id', users_user_1.id) AS JSONB)) AS JSONB) AS _keys,
             CAST(JSON_BUILD_OBJECT('id', users_user_1.id, 'username', users_user_1.username) AS JSONB) AS assigned_by,
             users_user_1.id AS id
      FROM public.users_user AS users_user_1
      WHERE users_user_1.id = voyc_review_conversationreview_1.assigned_by_id) AS anon_12 ON anon_12.id = voyc_review_conversationreview_1.assigned_by_id
   LEFT OUTER JOIN LATERAL
     (SELECT CAST(JSON_BUILD_OBJECT('users_user', CAST(JSON_BUILD_OBJECT('id', users_user_1.id) AS JSONB)) AS JSONB) AS _keys,
             CAST(JSON_BUILD_OBJECT('id', users_user_1.id, 'username', users_user_1.username) AS JSONB) AS assigned_to,
             users_user_1.id AS id
      FROM public.users_user AS users_user_1
      WHERE users_user_1.id = voyc_review_conversationreview_1.assigned_to_id) AS anon_13 ON anon_13.id = voyc_review_conversationreview_1.assigned_to_id
   LEFT OUTER JOIN LATERAL
     (SELECT CAST(JSON_BUILD_OBJECT('users_user', CAST(JSON_BUILD_OBJECT('id', users_user_1.id) AS JSONB)) AS JSONB) AS _keys,
             CAST(JSON_BUILD_OBJECT('id', users_user_1.id, 'username', users_user_1.username) AS JSONB) AS coached_by,
             users_user_1.id AS id
      FROM public.users_user AS users_user_1
      WHERE users_user_1.id = voyc_review_conversationreview_1.coached_by_id) AS anon_14 ON anon_14.id = voyc_review_conversationreview_1.coached_by_id
   LEFT OUTER JOIN LATERAL
     (SELECT CAST(JSON_BUILD_OBJECT('users_user', CAST(JSON_BUILD_OBJECT('id', users_user_1.id) AS JSONB)) AS JSONB) AS _keys,
             CAST(JSON_BUILD_OBJECT('id', users_user_1.id, 'username', users_user_1.username) AS JSONB) AS graded_by,
             users_user_1.id AS id
      FROM public.users_user AS users_user_1
      WHERE users_user_1.id = voyc_review_conversationreview_1.graded_by_id) AS anon_15 ON anon_15.id = voyc_review_conversationreview_1.graded_by_id
   LEFT OUTER JOIN LATERAL
     (SELECT CAST(JSON_BUILD_OBJECT('users_user', CAST(JSON_BUILD_OBJECT('id', users_user_1.id) AS JSONB)) AS JSONB) AS _keys,
             CAST(JSON_BUILD_OBJECT('id', users_user_1.id, 'username', users_user_1.username) AS JSONB) AS resolved_by,
             users_user_1.id AS id
      FROM public.users_user AS users_user_1
      WHERE users_user_1.id = voyc_review_conversationreview_1.resolved_by_id) AS anon_16 ON anon_16.id = voyc_review_conversationreview_1.resolved_by_id
   LEFT OUTER JOIN LATERAL
     (SELECT CAST(JSON_BUILD_OBJECT('users_user', CAST(JSON_BUILD_OBJECT('id', users_user_1.id) AS JSONB)) AS JSONB) AS _keys,
             CAST(JSON_BUILD_OBJECT('id', users_user_1.id, 'username', users_user_1.username) AS JSONB) AS reviewed_by,
             users_user_1.id AS id
      FROM public.users_user AS users_user_1
      WHERE users_user_1.id = voyc_review_conversationreview_1.reviewed_by_id) AS anon_17 ON anon_17.id = voyc_review_conversationreview_1.reviewed_by_id
   WHERE voyc_review_conversationreview_1.conversation_id = voycapi_conversations_1.id) AS anon_5 ON anon_5.conversation_id = voycapi_conversations_1.id
JOIN LATERAL
  (SELECT CAST(JSON_BUILD_OBJECT('voyc_transcript_transcript', CAST(JSON_BUILD_OBJECT('id', JSON_BUILD_ARRAY(voyc_transcript_transcript_1.id)) AS JSONB)) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_19._keys) AS JSONB) AS _keys,
          CAST(JSON_BUILD_OBJECT('id', voyc_transcript_transcript_1.id, 'confidence', voyc_transcript_transcript_1.confidence, 'utterance', anon_19.utterance) AS JSONB) AS transcript,
          voyc_transcript_transcript_1.conversation_id AS conversation_id
   FROM public.voyc_transcript_transcript AS voyc_transcript_transcript_1
   JOIN LATERAL
     (SELECT CAST(JSON_BUILD_OBJECT('voyc_transcript_utterance', JSON_AGG(CAST(JSON_BUILD_OBJECT('id', voyc_transcript_utterance_1.id) AS JSONB))) AS JSONB) || CAST(JSON_AGG(anon_20._keys) AS JSONB) AS _keys,
             JSON_AGG(CAST(JSON_BUILD_OBJECT('id', voyc_transcript_utterance_1.id, 'emotion', voyc_transcript_utterance_1.emotion, 'sentence', anon_20.sentence) AS JSONB)) AS utterance,
             voyc_transcript_utterance_1.transcript_id AS transcript_id
      FROM public.voyc_transcript_utterance AS voyc_transcript_utterance_1
      JOIN LATERAL
        (SELECT CAST(JSON_BUILD_OBJECT('voyc_transcript_sentence', JSON_AGG(CAST(JSON_BUILD_OBJECT('id', voyc_transcript_sentence_1.id) AS JSONB))) AS JSONB) AS _keys,
                JSON_AGG(CAST(JSON_BUILD_OBJECT('id', voyc_transcript_sentence_1.id, 'text', voyc_transcript_sentence_1.text, 'words_{0,start}', voyc_transcript_sentence_1.words #> '{0,start}') AS JSONB)) AS sentence,
                voyc_transcript_sentence_1.utterance_id AS utterance_id
         FROM public.voyc_transcript_sentence AS voyc_transcript_sentence_1
         WHERE voyc_transcript_sentence_1.utterance_id = voyc_transcript_utterance_1.id
         GROUP BY voyc_transcript_sentence_1.utterance_id) AS anon_20 ON anon_20.utterance_id = voyc_transcript_utterance_1.id
      WHERE voyc_transcript_utterance_1.transcript_id = voyc_transcript_transcript_1.id
      GROUP BY voyc_transcript_utterance_1.transcript_id) AS anon_19 ON anon_19.transcript_id = voyc_transcript_transcript_1.id
   WHERE voyc_transcript_transcript_1.conversation_id = voycapi_conversations_1.id
     AND (voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419
          OR voyc_transcript_transcript_1.id = 4273419)) AS anon_8 ON anon_8.conversation_id = voycapi_conversations_1.id

This query takes up to two minutes to complete and returns one result. See the explain here.

ps. The query and schema I posted above is only a portion of my full schema and query. I've redacted the rest and only kept the part of the query that is problematic.

voyc-jean avatar Mar 22 '22 15:03 voyc-jean

I'm seeing a few seq scans in that query analysis. Did you already run pgsync -c schema.json -a to see if there are any missing indices?

toluaina avatar Mar 24 '22 16:03 toluaina

Yes I have, all indices are present.

voyc-jean avatar Mar 25 '22 08:03 voyc-jean

  • This is as a result of the query plan.
  • Initially I had a query plan similar to yours with lots of sequential scans.
  • Then I created the indices below and ran vacuum --full
    • CREATE INDEX idx_review_conversationreview_conversation_id ON review_conversationreview (conversation_id)
    • CREATE INDEX idx_transcript_conversation_id ON transcript (conversation_id)
    • CREATE INDEX idx_user_id ON public.user (id)
    • CREATE INDEX idx_utterance_transcript_id ON utterance (transcript_id)
    • CREATE INDEX idx_sentence_utterance_id ON sentence (utterance_id)
  • Can you try with a vacuum?
  • Also/alternatively try and set SET enable_seqscan = OFF; and re-run the query
  • If you have all indices and vacuum'd, can you provide the table distribution count.
  • i think i have 10000 row for all tables
  • pgsync -c schema.json -a

toluaina avatar Mar 31 '22 22:03 toluaina

I've had a similar problems with query performance after the first sync which might be related to this problem. However in my case the generated query is so large (over 6 million characters) it causes Postgres to crash. This problem started after adding a "through_tables" relationship to the schema. Most of the generated query is in a WHERE clause as a huge number of OR statements. Here's a snippet of how part of the query looks WHERE manga_1.manga_id = %(manga_id_1)s OR manga_1.manga_id = %(manga_id_2)s OR manga_1.manga_id = %(manga_id_3)s OR ...

Here's how my schema looks. The manga and manga_service tables have both around 50k rows while services table has just 8 rows.

[
    {
        "database": "manga",
        "index": "manga",
        "setting": {
            "analysis": {
                "analyzer": {
                    "trigram_analyzer": {
                        "filter": [
                            "lowercase"
                        ],
                        "type": "custom",
                        "tokenizer": "trigram_tokenizer"
                    }
                },
                "tokenizer": {
                    "trigram_tokenizer": {
                        "token_chars": [
                            "letter",
                            "digit"
                        ],
                        "min_gram": "3",
                        "type": "ngram",
                        "max_gram": "3"
                    }
                }
            }
        },
        "nodes": {
            "table": "manga",
            "columns": [
                "title",
                "views",
                "manga_id"
            ],
            "transform": {
                "mapping": {
                    "title": {
                        "type": "text",
                        "fields": {
                            "ngram": {
                                "type": "text",
                                "analyzer": "trigram_analyzer"
                            }
                        }
                    }
                }
            },
            "children": [
                {
                    "table": "manga_alias",
                    "columns": [
                        "title",
                        "manga_id"
                    ],
                    "relationship": {
                        "variant": "object",
                        "type": "one_to_many"
                    },
                    "transform": {
                        "mapping": {
                            "title": {
                                "type": "text",
                                "fields": {
                                    "ngram": {
                                        "type": "text",
                                        "analyzer": "trigram_analyzer"
                                    }
                                }
                            }
                        }
                    }
                },
                {
                    "table": "services",
                    "columns": [
                        "service_name",
                        "service_id"
                    ],
                    "relationship": {
                        "variant": "object",
                        "type": "one_to_many",
                        "through_tables": [
                            "manga_service"
                        ]
                    }
                }
            ]
        }
    }
]

s0hv avatar Apr 01 '22 13:04 s0hv