oceanbase icon indicating copy to clipboard operation
oceanbase copied to clipboard

value changed after restrict where conditio

Open ammmkilo opened this issue 1 year ago • 3 comments

Describe the bug When I use a more restrictive condition, the query results should be a subset of the previous one, but now there are values outside the set. Environment

ubuntu:~/Desktop$ uname -a
Linux ubuntu 5.15.0-91-generic #101~20.04.1-Ubuntu SMP Thu Nov 16 14:22:28 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux

obclient [d1]> select version();
+------------------------------+
| version()                    |
+------------------------------+
| 5.7.25-OceanBase_CE-v4.2.1.0 |
+------------------------------+
1 row in set (0.000 sec)

Fast Reproduce Steps(Required)


create table table_3_utf8_undef (
`pk` int primary key,
`col_bigint_undef_signed` bigint  ,
`col_bigint_undef_unsigned` bigint unsigned ,
`col_bigint_key_signed` bigint  ,
`col_bigint_key_unsigned` bigint unsigned ,
`col_float_undef_signed` float  ,
`col_float_undef_unsigned` float unsigned ,
`col_float_key_signed` float  ,
`col_float_key_unsigned` float unsigned ,
`col_double_undef_signed` double  ,
`col_double_undef_unsigned` double unsigned ,
`col_double_key_signed` double  ,
`col_double_key_unsigned` double unsigned ,
`col_decimal(40, 20)_undef_signed` decimal(40, 20)  ,
`col_decimal(40, 20)_undef_unsigned` decimal(40, 20) unsigned ,
`col_decimal(40, 20)_key_signed` decimal(40, 20)  ,
`col_decimal(40, 20)_key_unsigned` decimal(40, 20) unsigned ,
`col_char(20)_undef_signed` char(20)  ,
`col_char(20)_key_signed` char(20)  ,
`col_varchar(20)_undef_signed` varchar(20)  ,
`col_varchar(20)_key_signed` varchar(20)  ,
key (`col_bigint_key_signed`),
key (`col_bigint_key_unsigned`),
key (`col_float_key_signed`),
key (`col_float_key_unsigned`),
key (`col_double_key_signed`),
key (`col_double_key_unsigned`),
key (`col_decimal(40, 20)_key_signed`),
key (`col_decimal(40, 20)_key_unsigned`),
key (`col_char(20)_key_signed`),
key (`col_varchar(20)_key_signed`)
) character set utf8 ;
insert into table_3_utf8_undef values (0,82.1847,1,39.0425,38.1089,-1,1,94.1106,1.009,12.991,19755,-13064,0,1,79.1429,-2,1,"well",'3
','-0','e'),(1,1,20.0078,-9.183,68.1957,1,2,1,0.0001,12.991,2,71.0510,1,-1,2,12.991,12.991,'3	','1','3	','-0'),(2,-2,1,-21247,1.009,2,1.009,0.0001,36.0002,-2,2,-0,0.0001,-2,0.1598,47.1515,1.009,'3	','w','-1','e');
create table table_7_utf8_undef (
`pk` int primary key,
`col_bigint_undef_signed` bigint  ,
`col_bigint_undef_unsigned` bigint unsigned ,
`col_bigint_key_signed` bigint  ,
`col_bigint_key_unsigned` bigint unsigned ,
`col_float_undef_signed` float  ,
`col_float_undef_unsigned` float unsigned ,
`col_float_key_signed` float  ,
`col_float_key_unsigned` float unsigned ,
`col_double_undef_signed` double  ,
`col_double_undef_unsigned` double unsigned ,
`col_double_key_signed` double  ,
`col_double_key_unsigned` double unsigned ,
`col_decimal(40, 20)_undef_signed` decimal(40, 20)  ,
`col_decimal(40, 20)_undef_unsigned` decimal(40, 20) unsigned ,
`col_decimal(40, 20)_key_signed` decimal(40, 20)  ,
`col_decimal(40, 20)_key_unsigned` decimal(40, 20) unsigned ,
`col_char(20)_undef_signed` char(20)  ,
`col_char(20)_key_signed` char(20)  ,
`col_varchar(20)_undef_signed` varchar(20)  ,
`col_varchar(20)_key_signed` varchar(20)  ,
key (`col_bigint_key_signed`),
key (`col_bigint_key_unsigned`),
key (`col_float_key_signed`),
key (`col_float_key_unsigned`),
key (`col_double_key_signed`),
key (`col_double_key_unsigned`),
key (`col_decimal(40, 20)_key_signed`),
key (`col_decimal(40, 20)_key_unsigned`),
key (`col_char(20)_key_signed`),
key (`col_varchar(20)_key_signed`)
) character set utf8 ;
insert into table_7_utf8_undef values (0,-9.183,1,1.1384,2,15.1271,12.991,-2,0.0001,36.1270,79.1819,0.0001,0.0001,3.1387,52.0818,-0,0.0001,'1','3	','0','0'),(1,79,12.991,107,2,-0.0001,0,1.009,1.009,34,1,-1,69.0208,1,2,120,12.991,'3	','-1',"if",'b'),(2,-2,1,-9.183,1,12.991,0.0001,53,12.991,1.009,12.991,12.991,0.0001,-0.0001,12.991,0.0001,2,'3
','p','0','3	'),(3,-0.0001,12.991,1.009,1.009,-9.183,2,0,1,-2,1,2,1,2,1.009,2,12.991,'3
','0','k','0'),(4,1.009,0.0001,-1,12.991,2,47,2,0,12.991,12.991,1.009,0,1.009,1.009,-0.0001,6949,'-1','	3','1','m'),(5,-0,1,0,0,0.0001,28.1237,12.991,0,12.991,12.991,-0,12.991,2,2,2,1.009,'0','	3','0','	3'),(6,45.0855,1,38.1166,1,1.009,80.0284,2,122,0.0001,0,-1,11130,0,1,1,0,"know",'-0','
3','3
');


query1:


WITH `MYWITH` AS ((SELECT (~`f4`) AS `f1`,(DATE_ADD(`f4`, INTERVAL 1 WEEK)) AS `f2`,(`f5`) AS `f3` FROM (SELECT `col_double_key_signed` AS `f4`,`col_bigint_undef_signed` AS `f5`,`col_char(20)_key_signed` AS `f6` FROM `table_3_utf8_undef` USE INDEX (`col_decimal(40, 20)_key_unsigned`, `col_double_key_unsigned`)) AS `t1` 
WHERE ((BINARY `f6`*BINARY SUBSTRING(`f6`, 9)<<`f6`) OR (ROW(DAYOFMONTH(_UTF8MB4'2009-04-27'),DATE_SUB(LEFT(`f5`, 5), INTERVAL 1 DAY_MICROSECOND)) NOT IN (SELECT `col_decimal(40, 20)_key_unsigned`,`col_char(20)_undef_signed` FROM `table_3_utf8_undef` USE INDEX (`col_bigint_key_signed`))) OR ((`f6`)=(`f4`))) IS FALSE 

HAVING (NOT ((BINARY 4295699608480015836-`f2`)<=(CHARSET(`f3`)%`f1`))) OR (NOT (CAST((`f2`) AS CHAR) NOT LIKE _UTF8MB4'%0%')) AND (CAST((`f2`) AS CHAR) NOT LIKE _UTF8MB4'%0%'))
 UNION ALL (SELECT (CHARSET(1)) AS `f1`,(LOG10(0.9669438884457238)) AS `f2`,(UNIX_TIMESTAMP(_UTF8MB4'2009-08-27 08:40:08')) AS `f3` FROM (SELECT `col_bigint_key_signed` AS `f7`,`col_float_undef_unsigned` AS `f10`,`col_varchar(20)_undef_signed` AS `f9` FROM `table_7_utf8_undef`) AS `t2` JOIN (SELECT `col_double_undef_unsigned` AS `f11`,`col_decimal(40, 20)_undef_unsigned` AS `f8`,`col_char(20)_key_signed` AS `f12` FROM `table_3_utf8_undef` USE INDEX (`col_decimal(40, 20)_key_unsigned`)) AS `t3`)) SELECT * FROM `MYWITH`;

query2:


WITH `MYWITH` AS ((SELECT (~`f4`) AS `f1`,(DATE_ADD(`f4`, INTERVAL 1 WEEK)) AS `f2`,(`f5`) AS `f3` FROM (SELECT `col_double_key_signed` AS `f4`,`col_bigint_undef_signed` AS `f5`,`col_char(20)_key_signed` AS `f6` FROM `table_3_utf8_undef` USE INDEX (`col_decimal(40, 20)_key_unsigned`, `col_double_key_unsigned`)) AS `t1` 
WHERE 0 
HAVING (NOT ((BINARY 4295699608480015836-`f2`)<=(CHARSET(`f3`)%`f1`))) OR (NOT (CAST((`f2`) AS CHAR) NOT LIKE _UTF8MB4'%0%')) AND (CAST((`f2`) AS CHAR) NOT LIKE _UTF8MB4'%0%')) 
UNION ALL (SELECT (CHARSET(1)) AS `f1`,(LOG10(0.9669438884457238)) AS `f2`,(UNIX_TIMESTAMP(_UTF8MB4'2009-08-27 08:40:08')) AS `f3` FROM (SELECT `col_bigint_key_signed` AS `f7`,`col_float_undef_unsigned` AS `f10`,`col_varchar(20)_undef_signed` AS `f9` FROM `table_7_utf8_undef`) AS `t2` JOIN (SELECT `col_double_undef_unsigned` AS `f11`,`col_decimal(40, 20)_undef_unsigned` AS `f8`,`col_char(20)_key_signed` AS `f12` FROM `table_3_utf8_undef` USE INDEX (`col_decimal(40, 20)_key_unsigned`)) AS `t3`)) SELECT * FROM `MYWITH`;

Expected behavior In theory, the result of sql2 ⊆ the result of sql1. Because the condition "where 0 " in sql2 is always false, but the condition in sql1 may not be false. Actual Behavior However, the value 7532969986 changed to 1251333608 after changing the condition, seems like a logical bug:

result1:

+--------+-----------------------+------------+
| f1     | f2                    | f3         |
+--------+-----------------------+------------+
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
| binary | -0.014598727204942215 | 7532969986 |
+--------+-----------------------+------------+
21 rows in set, 2 warnings (0.019 sec)

result2:

+--------+-----------------------+------------+
| f1     | f2                    | f3         |
+--------+-----------------------+------------+
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
| binary | -0.014598727204942215 | 1251333608 |
+--------+-----------------------+------------+
21 rows in set (0.003 sec)


ammmkilo avatar Dec 15 '23 08:12 ammmkilo

@akaError please take a look

caifeizhi avatar Dec 15 '23 08:12 caifeizhi

Could you please confirm whether the simplification process we applied has been helpful for developers in diagnosing and addressing this issue?

query1:

(SELECT (`f5`) AS `f3` FROM (SELECT `col_double_key_signed` AS `f4`,`col_bigint_undef_signed` AS `f5` FROM `table_3_utf8_undef` ) AS `t1` 
WHERE ( ((`f4`))) IS FALSE )
 UNION ALL (SELECT (UNIX_TIMESTAMP(_UTF8MB4'2009-08-27 08:40:08')) AS `f3` FROM  `table_7_utf8_undef` );

result1:

+------------+
| f3         |
+------------+
|         -2 |
| 7532969986 |
| 7532969986 |
| 7532969986 |
| 7532969986 |
| 7532969986 |
| 7532969986 |
| 7532969986 |
+------------+
8 rows in set (0.001 sec)



(SELECT (`f5`) AS `f3` FROM (SELECT `col_double_key_signed` AS `f4`,`col_bigint_undef_signed` AS `f5` FROM `table_3_utf8_undef` ) AS `t1` 
WHERE 0 ) 
UNION ALL (SELECT (UNIX_TIMESTAMP(_UTF8MB4'2009-08-27 08:40:08')) AS `f3` FROM `table_7_utf8_undef` );

result2:

+------------+
| f3         |
+------------+
| 1251333608 |
| 1251333608 |
| 1251333608 |
| 1251333608 |
| 1251333608 |
| 1251333608 |
| 1251333608 |
+------------+
7 rows in set (0.000 sec)

akuluasan avatar Dec 15 '23 08:12 akuluasan

Actually, positive zero and negative zero have distinct differences. Here is a MySQL bug report for reference: https://bugs.mysql.com/bug.php?id=9037. It mainly discusses how negative zero is not considered a bug. In the context of floating-point numbers, -0 represents a very small negative number, such as -0.00000000000000000000001.

For OceanBase, the issue primarily lies in the incorrect handling of data types. Currently all float type is converted to double when calculated, here is a simplified case help to understand. Take a took about the 'cast' logic.

drop table t1;
create table t1(`col_float_undef_signed` float);
insert into t1 values (-1),(1),(2);
select distinct (0*col_float_undef_signed) from t1;
+----------------------------+
| (0*col_float_undef_signed) |
+----------------------------+
|                         -0 |
|                          0 |
|                          0 |
+----------------------------+
3 rows in set (0.002 sec)
+----------------------------+
| (0*col_float_undef_signed) |
+----------------------------+
|                         -0 |
+----------------------------+
1 row in set (0.002 sec)

OceanBase(admin@test)>explain select distinct (0*col_float_undef_signed) from t1;
+-------------------------------------------------------------------------------------------------------------------+
| Query Plan                                                                                                        |
+-------------------------------------------------------------------------------------------------------------------+
| =================================================                                                                 |
| |ID|OPERATOR         |NAME|EST.ROWS|EST.TIME(us)|                                                                 |
| -------------------------------------------------                                                                 |
| |0 |HASH DISTINCT    |    |3       |5           |                                                                 |
| |1 |└─TABLE FULL SCAN|t1  |3       |4           |                                                                 |
| =================================================                                                                 |
| Outputs & filters:                                                                                                |
| -------------------------------------                                                                             |
|   0 - output([cast(0, DOUBLE(-1, -1)) * cast(t1.col_float_undef_signed, DOUBLE(-1, -1))]), filter(nil), rowset=16 |
|       distinct([cast(0, DOUBLE(-1, -1)) * cast(t1.col_float_undef_signed, DOUBLE(-1, -1))])                       |
|   1 - output([t1.col_float_undef_signed]), filter(nil), rowset=16                                                 |
|       access([t1.col_float_undef_signed]), partitions(p0)                                                         |
|       is_index_back=false, is_global_index=false,                                                                 |
|       range_key([t1.__pk_increment]), range(MIN ; MAX)always true                                                 |
+-------------------------------------------------------------------------------------------------------------------+
14 rows in set (0.004 sec)

OceanBase(admin@test)>select hex(binary(0*col_float_undef_signed)) from t1;
+---------------------------------------+
| hex(binary(0*col_float_undef_signed)) |
+---------------------------------------+
| 2D30                                  |
| 30                                    |
| 30                                    |
+---------------------------------------+
3 rows in set (0.002 sec)
OceanBase(admin@test)>explain select distinct (binary(0*col_float_undef_signed)) from t1;
+-------------------------------------------------------------------------------------------------------------------------------------------+
| Query Plan                                                                                                                                |
+-------------------------------------------------------------------------------------------------------------------------------------------+
| =================================================                                                                                         |
| |ID|OPERATOR         |NAME|EST.ROWS|EST.TIME(us)|                                                                                         |
| -------------------------------------------------                                                                                         |
| |0 |HASH DISTINCT    |    |3       |5           |                                                                                         |
| |1 |└─TABLE FULL SCAN|t1  |3       |4           |                                                                                         |
| =================================================                                                                                         |
| Outputs & filters:                                                                                                                        |
| -------------------------------------                                                                                                     |
|   0 - output([cast(cast(0, DOUBLE(-1, -1)) * cast(t1.col_float_undef_signed, DOUBLE(-1, -1)), VARCHAR(1048576))]), filter(nil), rowset=16 |
|       distinct([cast(cast(0, DOUBLE(-1, -1)) * cast(t1.col_float_undef_signed, DOUBLE(-1, -1)), VARCHAR(1048576))])                       |
|   1 - output([t1.col_float_undef_signed]), filter(nil), rowset=16                                                                         |
|       access([t1.col_float_undef_signed]), partitions(p0)                                                                                 |
|       is_index_back=false, is_global_index=false,                                                                                         |
|       range_key([t1.__pk_increment]), range(MIN ; MAX)always true                                                                         |
+-------------------------------------------------------------------------------------------------------------------------------------------+
14 rows in set (0.003 sec)
OceanBase(admin@test)>select distinct (binary(0*col_float_undef_signed)) from t1;
+------------------------------------+
| (binary(0*col_float_undef_signed)) |
+------------------------------------+
| -0                                 |
| 0                                  |
+------------------------------------+
2 rows in set (0.002 sec)

@akuluasan @ammmkilo by the way, could I know which tools is used to detect this bug? we may collaborate to test for OceanBase.

akaError avatar Dec 15 '23 09:12 akaError