SMF icon indicating copy to clipboard operation
SMF copied to clipboard

Slow query in the action to find forum errors

Open live627 opened this issue 3 years ago • 4 comments

https://github.com/SimpleMachines/SMF2.1/blob/0f375f1b9b6682e9cf8415624296281b1559441e/Sources/RepairBoards.php#L603-L618

ANALYZE format=json SELECT
	t.id_topic, t.id_first_msg, t.id_last_msg,
	CASE WHEN MIN(ma.id_msg) > 0 THEN
		CASE WHEN MIN(mu.id_msg) > 0 THEN
			CASE WHEN MIN(mu.id_msg) < MIN(ma.id_msg) THEN MIN(mu.id_msg) ELSE MIN(ma.id_msg) END ELSE
		MIN(ma.id_msg) END ELSE
	MIN(mu.id_msg) END AS myid_first_msg,
	CASE WHEN MAX(ma.id_msg) > 0 THEN MAX(ma.id_msg) ELSE MIN(mu.id_msg) END AS myid_last_msg,
	t.approved, mf.approved, mf.approved AS firstmsg_approved
FROM smf_topics AS t
	LEFT JOIN smf_messages AS ma ON (ma.id_topic = t.id_topic AND ma.approved = 1)
	LEFT JOIN smf_messages AS mu ON (mu.id_topic = t.id_topic AND mu.approved = 0)
	LEFT JOIN smf_messages AS mf ON (mf.id_msg = t.id_first_msg)
WHERE t.id_topic BETWEEN 2 AND 3
GROUP BY t.id_topic, t.id_first_msg, t.id_last_msg, t.approved, mf.approved
ORDER BY t.id_topic

I had to tightly limit the range because otherwise it would timeout.

{
  "query_block": {
    "select_id": 1,
    "r_loops": 1,
    "r_total_time_ms": 8154.2,
    "filesort": {
      "sort_key": "t.id_topic, t.id_first_msg, t.id_last_msg, t.approved, mf.approved",
      "r_loops": 1,
      "r_total_time_ms": 0.0109,
      "r_used_priority_queue": false,
      "r_output_rows": 2,
      "r_buffer_size": "360",
      "temporary_table": {
        "table": {
          "table_name": "t",
          "access_type": "range",
          "possible_keys": ["PRIMARY"],
          "key": "PRIMARY",
          "key_length": "3",
          "used_key_parts": ["id_topic"],
          "r_loops": 1,
          "rows": 2,
          "r_rows": 2,
          "r_total_time_ms": 0.0099,
          "filtered": 100,
          "r_filtered": 100,
          "attached_condition": "t.id_topic between 2 and 3"
        },
        "block-nl-join": {
          "table": {
            "table_name": "ma",
            "access_type": "ALL",
            "r_loops": 1,
            "rows": 645486,
            "r_rows": 658956,
            "r_total_time_ms": 323.82,
            "filtered": 100,
            "r_filtered": 100
          },
          "buffer_type": "flat",
          "buffer_size": "256Kb",
          "join_type": "BNL",
          "attached_condition": "trigcond(ma.id_topic = t.id_topic and ma.approved = 1)",
          "r_filtered": 0.0072
        },
        "block-nl-join": {
          "table": {
            "table_name": "mu",
            "access_type": "ALL",
            "r_loops": 1,
            "rows": 645486,
            "r_rows": 658956,
            "r_total_time_ms": 325.84,
            "filtered": 100,
            "r_filtered": 100
          },
          "buffer_type": "incremental",
          "buffer_size": "256Kb",
          "join_type": "BNL",
          "attached_condition": "trigcond(mu.id_topic = t.id_topic and mu.approved = 0)",
          "r_filtered": 1.5e-4
        },
        "table": {
          "table_name": "mf",
          "access_type": "eq_ref",
          "possible_keys": ["PRIMARY", "id_member"],
          "key": "PRIMARY",
          "key_length": "4",
          "used_key_parts": ["id_msg"],
          "ref": ["smf21.t.id_first_msg"],
          "r_loops": 95,
          "rows": 1,
          "r_rows": 1,
          "r_total_time_ms": 0.0411,
          "filtered": 100,
          "r_filtered": 100
        }
      }
    }
  }
}

With new index on

ALTER TABLE smf_messages ADD INDEX idx_id_topic (id_topic, approved)

it becomes much faster

ANALYZE format=json SELECT
	t.id_topic, t.id_first_msg, t.id_last_msg,
	CASE WHEN MIN(ma.id_msg) > 0 THEN
		CASE WHEN MIN(mu.id_msg) > 0 THEN
			CASE WHEN MIN(mu.id_msg) < MIN(ma.id_msg) THEN MIN(mu.id_msg) ELSE MIN(ma.id_msg) END ELSE
		MIN(ma.id_msg) END ELSE
	MIN(mu.id_msg) END AS myid_first_msg,
	CASE WHEN MAX(ma.id_msg) > 0 THEN MAX(ma.id_msg) ELSE MIN(mu.id_msg) END AS myid_last_msg,
	t.approved, mf.approved, mf.approved AS firstmsg_approved
FROM smf_topics AS t
	LEFT JOIN smf_messages AS ma ON (ma.id_topic = t.id_topic AND ma.approved = 1)
	LEFT JOIN smf_messages AS mu ON (mu.id_topic = t.id_topic AND mu.approved = 0)
	LEFT JOIN smf_messages AS mf ON (mf.id_msg = t.id_first_msg)
WHERE t.id_topic BETWEEN 2 AND 3
GROUP BY t.id_topic, t.id_first_msg, t.id_last_msg, t.approved, mf.approved
ORDER BY t.id_topic
{
  "query_block": {
    "select_id": 1,
    "r_loops": 1,
    "r_total_time_ms": 0.4024,
    "filesort": {
      "sort_key": "t.id_topic, t.id_first_msg, t.id_last_msg, t.approved, mf.approved",
      "r_loops": 1,
      "r_total_time_ms": 0.0073,
      "r_used_priority_queue": false,
      "r_output_rows": 2,
      "r_buffer_size": "360",
      "temporary_table": {
        "table": {
          "table_name": "t",
          "access_type": "range",
          "possible_keys": ["PRIMARY"],
          "key": "PRIMARY",
          "key_length": "3",
          "used_key_parts": ["id_topic"],
          "r_loops": 1,
          "rows": 2,
          "r_rows": 2,
          "r_total_time_ms": 0.0105,
          "filtered": 100,
          "r_filtered": 100,
          "attached_condition": "t.id_topic between 2 and 3"
        },
        "table": {
          "table_name": "ma",
          "access_type": "ref",
          "possible_keys": ["id_topic"],
          "key": "id_topic",
          "key_length": "4",
          "used_key_parts": ["id_topic", "approved"],
          "ref": ["smf21.t.id_topic", "const"],
          "r_loops": 2,
          "rows": 5,
          "r_rows": 47.5,
          "r_total_time_ms": 0.0398,
          "filtered": 100,
          "r_filtered": 100,
          "using_index": true
        },
        "table": {
          "table_name": "mu",
          "access_type": "ref",
          "possible_keys": ["id_topic"],
          "key": "id_topic",
          "key_length": "4",
          "used_key_parts": ["id_topic", "approved"],
          "ref": ["smf21.t.id_topic", "const"],
          "r_loops": 95,
          "rows": 5,
          "r_rows": 0.4947,
          "r_total_time_ms": 0.1425,
          "filtered": 100,
          "r_filtered": 100,
          "using_index": true
        },
        "table": {
          "table_name": "mf",
          "access_type": "eq_ref",
          "possible_keys": ["PRIMARY", "id_member"],
          "key": "PRIMARY",
          "key_length": "4",
          "used_key_parts": ["id_msg"],
          "ref": ["smf21.t.id_first_msg"],
          "r_loops": 95,
          "rows": 1,
          "r_rows": 1,
          "r_total_time_ms": 0.009,
          "filtered": 100,
          "r_filtered": 100
        }
      }
    }
  }
}

The query coluld probably be rewritten but I want to save that for the future.

live627 avatar Aug 09 '21 10:08 live627

Could it be, that you implement the pr #6893 and so deleted the approved index on the message table? this index by his own shoujld improved your performance.

albertlast avatar Aug 09 '21 12:08 albertlast

I restored that index: query still slow

ANALYZE format=json SELECT
	t.id_topic, t.id_first_msg, t.id_last_msg,
	CASE WHEN MIN(ma.id_msg) > 0 THEN
		CASE WHEN MIN(mu.id_msg) > 0 THEN
			CASE WHEN MIN(mu.id_msg) < MIN(ma.id_msg) THEN MIN(mu.id_msg) ELSE MIN(ma.id_msg) END ELSE
		MIN(ma.id_msg) END ELSE
	MIN(mu.id_msg) END AS myid_first_msg,
	CASE WHEN MAX(ma.id_msg) > 0 THEN MAX(ma.id_msg) ELSE MIN(mu.id_msg) END AS myid_last_msg,
	t.approved, mf.approved, mf.approved AS firstmsg_approved
FROM smf_topics AS t
	LEFT JOIN smf_messages AS ma ON (ma.id_topic = t.id_topic AND ma.approved = 1)
	LEFT JOIN smf_messages AS mu ON (mu.id_topic = t.id_topic AND mu.approved = 0)
	LEFT JOIN smf_messages AS mf ON (mf.id_msg = t.id_first_msg)
WHERE t.id_topic BETWEEN 1 AND 3
GROUP BY t.id_topic, t.id_first_msg, t.id_last_msg, t.approved, mf.approved
ORDER BY t.id_topic
{
  "query_block": {
    "select_id": 1,
    "r_loops": 1,
    "r_total_time_ms": 4995.1,
    "filesort": {
      "sort_key": "t.id_topic, t.id_first_msg, t.id_last_msg, t.approved, mf.approved",
      "r_loops": 1,
      "r_total_time_ms": 0.0112,
      "r_used_priority_queue": false,
      "r_output_rows": 3,
      "r_buffer_size": "390",
      "temporary_table": {
        "table": {
          "table_name": "t",
          "access_type": "range",
          "possible_keys": ["PRIMARY"],
          "key": "PRIMARY",
          "key_length": "3",
          "used_key_parts": ["id_topic"],
          "r_loops": 1,
          "rows": 3,
          "r_rows": 3,
          "r_total_time_ms": 0.0161,
          "filtered": 100,
          "r_filtered": 100,
          "attached_condition": "t.id_topic between 1 and 3"
        },
        "table": {
          "table_name": "ma",
          "access_type": "ref",
          "possible_keys": ["idx_approved"],
          "key": "idx_approved",
          "key_length": "1",
          "used_key_parts": ["approved"],
          "ref": ["const"],
          "r_loops": 3,
          "rows": 322743,
          "r_rows": 658952,
          "r_total_time_ms": 4164.1,
          "filtered": 100,
          "r_filtered": 7.5699,
          "attached_condition": "trigcond(ma.id_topic = t.id_topic)"
        },
        "table": {
          "table_name": "mu",
          "access_type": "ref",
          "possible_keys": ["idx_approved"],
          "key": "idx_approved",
          "key_length": "1",
          "used_key_parts": ["approved"],
          "ref": ["const"],
          "r_loops": 149647,
          "rows": 4,
          "r_rows": 4,
          "r_total_time_ms": 572.44,
          "filtered": 100,
          "r_filtered": 24.992,
          "attached_condition": "trigcond(mu.id_topic = t.id_topic)"
        },
        "table": {
          "table_name": "mf",
          "access_type": "eq_ref",
          "possible_keys": ["PRIMARY", "id_member"],
          "key": "PRIMARY",
          "key_length": "4",
          "used_key_parts": ["id_msg"],
          "ref": ["smf21.t.id_first_msg"],
          "r_loops": 149647,
          "rows": 1,
          "r_rows": 1,
          "r_total_time_ms": 0.0055,
          "filtered": 100,
          "r_filtered": 100
        }
      }
    }
  }
}	

live627 avatar Aug 09 '21 13:08 live627

The topics table has a similar index structure to messages, with a standalone index on approved. Not good to have a standalone index on a boolean. #6983 addresses the issue on the messages table only, and did not change topics.

I left topics alone because I wasn't seeing issues there...

I'm still not - this db has 3M rows: image

Questions:

  • What is your value of innodb_buffer_pool_size? show variables like 'innodb_buffer_pool_size'

innodb_buffer_pool_size is supposed to be set to ~80% of your db server's memory. Problem is distributions don't know how much memory you have, so you often have ridiculously low initial values (128mb or lower) and you are expected to set it based on your needs. Unfortunately, InnoDB doesn't work well with with ridiculously low values... https://www.percona.com/blog/2015/06/02/80-ram-tune-innodb_buffer_pool_size/

sbulen avatar Aug 09 '21 14:08 sbulen

@live627 - Did you look at your innodb_buffer_pool_size?

I cannot reproduce this, not sure if it should be a 2.1.0 priority.

sbulen avatar Dec 29 '21 22:12 sbulen

Screen Shot 2023-04-21 at 4 22 59 PM

Continuing over here. I tested @live627 changes, I still see the temp tables.

jdarwood007 avatar Apr 21 '23 23:04 jdarwood007

@jdarwood007 since you use explain instead of analyze you output doesn't show anything userfull.

albertlast avatar Apr 22 '23 04:04 albertlast

Not much help from it either. I'm running it on a small board, but we know that big boards will suffer because of copying data temporary tables is always a performance hit.

-> Sort: t.id_topic  (actual time=0.399..0.403 rows=63 loops=1)
    -> Stream results  (cost=76.26 rows=0) (actual time=0.222..0.375 rows=63 loops=1)
        -> Nested loop inner join  (cost=76.26 rows=0) (actual time=0.221..0.363 rows=63 loops=1)
            -> Nested loop inner join  (cost=33.76 rows=17) (actual time=0.093..0.179 rows=63 loops=1)
                -> Table scan on ml  (cost=25.26..27.81 rows=17) (actual time=0.085..0.092 rows=63 loops=1)
                    -> Materialize  (cost=25.10..25.10 rows=17) (actual time=0.084..0.084 rows=63 loops=1)
                        -> Group aggregate: max(smf_messages.id_msg)  (cost=23.40 rows=17) (actual time=0.014..0.070 rows=63 loops=1)
                            -> Filter: (smf_messages.approved = 1)  (cost=21.70 rows=17) (actual time=0.010..0.047 rows=170 loops=1)
                                -> Covering index scan on smf_messages using current_topic  (cost=21.70 rows=170) (actual time=0.009..0.034 rows=170 loops=1)
                -> Single-row index lookup on t using PRIMARY (id_topic=ml.id_topic)  (cost=0.26 rows=1) (actual time=0.001..0.001 rows=1 loops=63)
            -> Index lookup on mf using <auto_key0> (id_topic=ml.id_topic)  (cost=0.26..2.56 rows=10) (actual time=0.003..0.003 rows=1 loops=63)
                -> Materialize  (cost=0.00..0.00 rows=0) (actual time=0.125..0.125 rows=63 loops=1)
                    -> Table scan on <temporary>  (actual time=0.093..0.100 rows=63 loops=1)
                        -> Aggregate using temporary table  (actual time=0.093..0.093 rows=63 loops=1)
                            -> Covering index scan on smf_messages using current_topic  (cost=21.70 rows=170) (actual time=0.001..0.023 rows=170 loops=1)

jdarwood007 avatar Apr 22 '23 04:04 jdarwood007

since sql get different execute by a database based on settings and size of the table, it make no sense to take a small setup to try a optimization for a big one.

albertlast avatar Apr 22 '23 04:04 albertlast