`right join` may return error result when using `HashJoin`
Bug Report
Please answer these questions before submitting your issue. Thanks!
1. Minimal reproduce step (Required)
CREATE TABLE `lrr_test` ( `COL102` double DEFAULT NULL, `COL1` double GENERATED ALWAYS AS (`COL102` + 10) STORED NOT NULL, PRIMARY KEY (`COL1`) /*T![clustered_index] CLUSTERED */ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
The following datasets can be usedï¼
mysql> select * from lrr_test; +-------------------------+-------------------------+ | COL102 | COL1 | +-------------------------+-------------------------+ | -1.704648925036604e308 | -1.704648925036604e308 | | -1.6888619680353582e308 | -1.6888619680353582e308 | | -1.6685908644498436e308 | -1.6685908644498436e308 | | -1.6311134967437805e308 | -1.6311134967437805e308 | | -1.6128280680807152e308 | -1.6128280680807152e308 | | -1.5899713947158026e308 | -1.5899713947158026e308 | | -1.5709457594070477e308 | -1.5709457594070477e308 | | -1.4925714566991343e308 | -1.4925714566991343e308 | | -1.4705985087370154e308 | -1.4705985087370154e308 | | -1.4451316666300039e308 | -1.4451316666300039e308 | | -1.3946576985986583e308 | -1.3946576985986583e308 | | -1.3695679630646804e308 | -1.3695679630646804e308 | | -1.3208992137984086e308 | -1.3208992137984086e308 | | -1.2887981369134862e308 | -1.2887981369134862e308 | | -1.2119996449796167e308 | -1.2119996449796167e308 | | -1.195172956104992e308 | -1.195172956104992e308 | | -1.1929781068369925e308 | -1.1929781068369925e308 | | -1.1746351299417647e308 | -1.1746351299417647e308 | | -1.1237012620945195e308 | -1.1237012620945195e308 | | -1.1223448185004882e308 | -1.1223448185004882e308 | | -1.0974439629672084e308 | -1.0974439629672084e308 | | -1.0657654808610821e308 | -1.0657654808610821e308 | | -1.0582598945271716e308 | -1.0582598945271716e308 | | -1.0565276887850733e308 | -1.0565276887850733e308 | | -1.0416104832981696e308 | -1.0416104832981696e308 | | -1.0368741532690337e308 | -1.0368741532690337e308 | | -1.033521479407133e308 | -1.033521479407133e308 | | -1.0232269544119505e308 | -1.0232269544119505e308 | | -9.31943312515408e307 | -9.31943312515408e307 | | -9.05107332838438e307 | -9.05107332838438e307 | | -8.276443475796885e307 | -8.276443475796885e307 | | -7.845086666145396e307 | -7.845086666145396e307 | | -7.664543340054255e307 | -7.664543340054255e307 | | -7.235369799352141e307 | -7.235369799352141e307 | | -7.047280050755922e307 | -7.047280050755922e307 | | -6.62205033356235e307 | -6.62205033356235e307 | | -6.35964999739255e307 | -6.35964999739255e307 | | -5.989391229038818e307 | -5.989391229038818e307 | | -5.974526205854541e307 | -5.974526205854541e307 | | -5.798684586589338e307 | -5.798684586589338e307 | | -4.98047732376121e307 | -4.98047732376121e307 | | -4.4623979626128605e307 | -4.4623979626128605e307 | | -4.3248436443381234e307 | -4.3248436443381234e307 | | -3.3391152928792773e307 | -3.3391152928792773e307 | | -3.2694282487729395e307 | -3.2694282487729395e307 | | -3.2461091065368577e307 | -3.2461091065368577e307 | | -2.8613054009714654e307 | -2.8613054009714654e307 | | -2.7176814604572905e307 | -2.7176814604572905e307 | | -2.1301127705458223e307 | -2.1301127705458223e307 | | -1.7280065154718344e307 | -1.7280065154718344e307 | | -1.6743061442642827e307 | -1.6743061442642827e307 | | -4.862812928655648e306 | -4.862812928655648e306 | | -3.3262533560429795e305 | -3.3262533560429795e305 | | 4.124952267435051e305 | 4.124952267435051e305 | | 5.4576487694211726e306 | 5.4576487694211726e306 | | 1.1237742400537221e307 | 1.1237742400537221e307 | | 1.569984332645614e307 | 1.569984332645614e307 | | 1.7966188405412235e307 | 1.7966188405412235e307 | | 1.8619233341238355e307 | 1.8619233341238355e307 | | 2.1152066540419881e307 | 2.1152066540419881e307 | | 2.1764927570795164e307 | 2.1764927570795164e307 | | 2.99416682762135e307 | 2.99416682762135e307 | | 3.0545414962788647e307 | 3.0545414962788647e307 | | 3.262967770716021e307 | 3.262967770716021e307 | | 3.288944887183685e307 | 3.288944887183685e307 | | 4.9025219351381e307 | 4.9025219351381e307 | | 5.250864486081297e307 | 5.250864486081297e307 | | 5.52054372134351e307 | 5.52054372134351e307 | | 6.311436996747818e307 | 6.311436996747818e307 | | 6.870852232080436e307 | 6.870852232080436e307 | | 7.501871137935436e307 | 7.501871137935436e307 | | 7.925709054822421e307 | 7.925709054822421e307 | | 8.438195254661318e307 | 8.438195254661318e307 | | 8.446731596918706e307 | 8.446731596918706e307 | | 9.43580947190119e307 | 9.43580947190119e307 | | 9.66735866233596e307 | 9.66735866233596e307 | | 1.0022043827847664e308 | 1.0022043827847664e308 | | 1.020869767928594e308 | 1.020869767928594e308 | | 1.0327408606815872e308 | 1.0327408606815872e308 | | 1.0402383684235906e308 | 1.0402383684235906e308 | | 1.0690255622829305e308 | 1.0690255622829305e308 | | 1.1623306052784659e308 | 1.1623306052784659e308 | | 1.1906116361044565e308 | 1.1906116361044565e308 | | 1.2221839628780758e308 | 1.2221839628780758e308 | | 1.3112927565356536e308 | 1.3112927565356536e308 | | 1.3307364382402157e308 | 1.3307364382402157e308 | | 1.3646958839720612e308 | 1.3646958839720612e308 | | 1.425066345632827e308 | 1.425066345632827e308 | | 1.4433864261103511e308 | 1.4433864261103511e308 | | 1.5038532858735658e308 | 1.5038532858735658e308 | | 1.5079450808097928e308 | 1.5079450808097928e308 | | 1.553628680980576e308 | 1.553628680980576e308 | | 1.6241456663280369e308 | 1.6241456663280369e308 | | 1.6295729949930798e308 | 1.6295729949930798e308 | | 1.6328703529666413e308 | 1.6328703529666413e308 | | 1.6832354056195887e308 | 1.6832354056195887e308 | | 1.7017315016390902e308 | 1.7017315016390902e308 | | 1.7134206410400048e308 | 1.7134206410400048e308 | | 1.7240829054261275e308 | 1.7240829054261275e308 | | 1.7257738639648862e308 | 1.7257738639648862e308 | | 1.7262297095455299e308 | 1.7262297095455299e308 | | 1.7905151735809062e308 | 1.7905151735809062e308 | +-------------------------+-------------------------+ 102 rows in set (0.12 sec)
first step:
select t1. col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0ï¼
explain select t1. col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0ï¼
second step:
analyze table lrr_test;
select t1. col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0ï¼
explain select t1. col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0ï¼
2. What did you expect to see? (Required)
The second step should return 49 rows!
3. What did you see instead (Required)
First step return 49 rows which is correct, and its execution plan is merge_join.
After analyze table, second step return 2401 rows which is wrong, and its execution plan is hash_join.
+-----------------------------+---------+-----------+---------------+----------------------------------+
| id | estRows | task | access object | operator info |
+-----------------------------+---------+-----------+---------------+----------------------------------+
| HashJoin_9 | NaN | root | | CARTESIAN inner join |
| ââTableReader_14(Build) | 49.00 | root | | data:TableRangeScan_13 |
| â ââTableRangeScan_13 | 49.00 | cop[tikv] | table:t2 | range:[0,+inf], keep order:false |
| ââTableReader_12(Probe) | 49.00 | root | | data:TableRangeScan_11 |
| ââTableRangeScan_11 | 49.00 | cop[tikv] | table:t1 | range:[0,+inf], keep order:false |
+-----------------------------+---------+-----------+---------------+----------------------------------+
5 rows in set (0.04 sec)
Some returned result fragmentsï¼
| 1.7905151735809062e308 | 6.311436996747818e307 | | 1.7905151735809062e308 | 5.52054372134351e307 | | 1.7905151735809062e308 | 5.250864486081297e307 | | 1.7905151735809062e308 | 4.9025219351381e307 | | 1.7905151735809062e308 | 3.288944887183685e307 | | 1.7905151735809062e308 | 3.262967770716021e307 | | 1.7905151735809062e308 | 3.0545414962788647e307 | | 1.7905151735809062e308 | 2.99416682762135e307 | | 1.7905151735809062e308 | 2.1764927570795164e307 | | 1.7905151735809062e308 | 2.1152066540419881e307 | | 1.7905151735809062e308 | 1.8619233341238355e307 | | 1.7905151735809062e308 | 1.7966188405412235e307 | | 1.7905151735809062e308 | 1.569984332645614e307 | | 1.7905151735809062e308 | 1.1237742400537221e307 | | 1.7905151735809062e308 | 5.4576487694211726e306 | | 1.7905151735809062e308 | 4.124952267435051e305 | +------------------------+------------------------+ 2401 rows in set (0.15 sec)
4. What is your TiDB version? (Required)
Release Version: v8.4.0 Edition: Community Git Commit Hash: 1832b2728d7ae494684bdeb32b4af0a5a927bf89 Git Branch: HEAD UTC Build Time: 2024-10-15 03:47:37 GoVersion: go1.23.2 Race Enabled: false Check Table Before Drop: false Store: tikv
/severity moderate /component planner /impact wrong-result
@apollodafoni: The label(s) component/planner cannot be applied, because the repository doesn't have them.
In response to this:
/severity moderate /component planner /impact wrong-result
Instructions for interacting with me using PR comments are available here. If you have questions or suggestions related to my behavior, please file an issue against the ti-community-infra/tichi repository.
inner join also return wrong result : select t1.col1, t2.col1 from lrr_test as t1 inner join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 != 3.538547054120427e307
| 1.7905151735809062e308 | -1.6311134967437805e308 |
| 1.7905151735809062e308 | -1.6685908644498436e308 |
| 1.7905151735809062e308 | -1.6888619680353582e308 |
| 1.7905151735809062e308 | -1.704648925036604e308 |
+-------------------------+-------------------------+
10404 rows in set (0.36 sec)
left join also return wrong result : select t1. col1, t2.col1 from lrr_test as t1 left join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 <= 0;
| -3.3262533560429795e305 | -1.5899713947158026e308 |
| -3.3262533560429795e305 | -1.6128280680807152e308 |
| -3.3262533560429795e305 | -1.6311134967437805e308 |
| -3.3262533560429795e305 | -1.6685908644498436e308 |
| -3.3262533560429795e305 | -1.6888619680353582e308 |
| -3.3262533560429795e305 | -1.704648925036604e308 |
+-------------------------+-------------------------+
2809 rows in set (0.28 sec)
@apollodafoni Can you change the table data into INSERT INTO or csv? It's hard to use.
And I could not reproduce the problem currently.
@winoros I can reproduce this bug. I think there is 2 unexpected thing:
- the hash join use
CARTESIAN inner join, but it actually has an equal join condition
mysql> explain select t1. col1, t2.col1 from lrr_test as t1 join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0;
+-----------------------------+---------+-----------+---------------+----------------------------------+
| id | estRows | task | access object | operator info |
+-----------------------------+---------+-----------+---------------+----------------------------------+
| HashJoin_10 | NaN | root | | CARTESIAN inner join |
| ├─TableReader_15(Build) | 49.00 | root | | data:TableRangeScan_14 |
| │ └─TableRangeScan_14 | 49.00 | cop[tikv] | table:t2 | range:[0,+inf], keep order:false |
| └─TableReader_13(Probe) | 49.00 | root | | data:TableRangeScan_12 |
| └─TableRangeScan_12 | 49.00 | cop[tikv] | table:t1 | range:[0,+inf], keep order:false |
+-----------------------------+---------+-----------+---------------+----------------------------------+
- The worst thing is the join condition is lost, you can see in the plan
HashJoin_10does not have other condtion, this makes the join result wrong.
NaN is got because the data of the table is too wide(It has values near MinFlaot64 and values near MaxFloat64), the histogram width of the column exceeds MaxFloat64, becoming +inf, then leading to NaN.