dble icon indicating copy to clipboard operation
dble copied to clipboard

某种collation场景下,因为数据的大小写问题,分片表在对非分片键自动做Group by后结果集不正确

Open zhangjig opened this issue 4 years ago • 1 comments

  • **dble version:master
  • preconditions :

mysql-8.0.23-winx64

my.ini配置如下:

[mysql] #设置mysql客户端默认字符集 #default-character-set = utf8mb4 [mysqld] #设置3306端口 port = 3306 #设置mysql的安装目录 basedir=D:\soft\mysql-8.0.23-winx64 #设置mysql数据库的数据的存放目录 datadir=D:\soft\mysql-8.0.23-winx64\data #允许最大连接数 max_connections=200 #服务端使用的字符集默认为UTF8 #character-set-server = utf8mb4 #collation-server = utf8mb4_unicode_ci #init_connect='SET NAMES utf8mb4' #创建新表时将使用的默认存储引擎 default-storage-engine=INNODB #等待超时时间 wait_timeout=172800 #交互式连接超时时间 interactive-timeout=172800 #日志 log-error=D:\soft\mysql-8.0.23-winx64\logs\error.log [client] #default-character-set = utf8mb4

mysql> show variables like 'character%'; +--------------------------+---------------------------------------------+ | Variable_name | Value | +--------------------------+---------------------------------------------+ | character_set_client | utf8mb4 | | character_set_connection | utf8mb4 | | character_set_database | utf8mb4 | | character_set_filesystem | binary | | character_set_results | utf8mb4 | | character_set_server | utf8mb4 | | character_set_system | utf8 | | character_sets_dir | D:\soft\mysql-8.0.23-winx64\share\charsets\ | +--------------------------+---------------------------------------------+ 8 rows in set, 1 warning (0.00 sec)

  • configs:

cluster.cnf

clusterEnable=false
clusterMode=ucore
clusterIP=127.0.0.1
clusterPort=25700
rootPath=universe/dble
clusterId=cluster-1
needSyncHa=true
showBinlogStatusTimeout=60000
sequenceHandlerType=2
# valid for sequenceHandlerType=2 or 3
#sequenceStartTime=2010-11-04 09:42:54
# valid for sequenceHandlerType=3 and clusterMode is zk, default true
sequenceInstanceByZk=false

bootstrap.cnf

#encoding=UTF-8
-agentlib:jdwp=transport=dt_socket,server=y,address=8088,suspend=n
-server
-XX:+AggressiveOpts
-Dfile.encoding=UTF-8
-Dcom.sun.management.jmxremote
-Dcom.sun.management.jmxremote.port=1984
-Dcom.sun.management.jmxremote.authenticate=false
-Dcom.sun.management.jmxremote.ssl=false
-Dcom.sun.management.jmxremote.host=127.0.0.1
-Xmx4G
-Xms1G
-Xss256k
-XX:MaxDirectMemorySize=1G
-XX:MetaspaceSize=100M
# GC Log
-XX:+PrintHeapAtGC
-XX:+PrintGCDateStamps
-Xloggc:./logs/gc_%WRAPPER_TIME_YYYYMMDDHHIISS%_%p.log
-XX:+PrintGCTimeStamps
-XX:+PrintGCDetails
-XX:+PrintTenuringDistribution
# CMS
-XX:+UseConcMarkSweepGC
-XX:+UseParNewGC
-XX:+CMSParallelRemarkEnabled
-XX:+UseCMSCompactAtFullCollection
-XX:CMSFullGCsBeforeCompaction=0
-XX:+CMSClassUnloadingEnabled
-XX:LargePageSizeInBytes=128M
-XX:+UseFastAccessorMethods
-XX:+UseCMSInitiatingOccupancyOnly
-XX:CMSInitiatingOccupancyFraction=70
#  base config
-DhomePath=.
-DinstanceName=1
# valid for sequenceHandlerType=2 or 3
-DinstanceId=1
-DserverId=xxx1
#-DbindIp=0.0.0.0
#-DserverPort=8066
#-DmanagerPort=9066
#-DmaxCon=1024
#-Dprocessors=4
#-DbackendProcessors=12
#-DprocessorExecutor=4
#-DbackendProcessorExecutor=12
#-DcomplexExecutor=8
#-DwriteToBackendExecutor=4


-DfakeMySQLVersion=5.7.11
#-DtraceEndPoint=http://10.186.60.96:14268/api/traces

# serverBacklog size,default 2048
-DserverBacklog=2048

#-DusePerformanceMode=0
# if need out HA
-DuseOuterHa=true

# connection
#-Dcharset=utf8mb4
-DmaxPacketSize=167772160
-DtxIsolation=2
#-Dautocommit=1
#-DidleTimeout=60000

# option
#-DuseCompression=1
#-DcapClientFoundRows=false
-DusingAIO=0

-DuseThreadUsageStat=1
#  query time cost statistics
#-DuseCostTimeStat=0
#-DmaxCostStatSize=100
#-DcostSamplePercent=1


# consistency
#  check the consistency of table structure between nodes,default not
-DcheckTableConsistency=0
#  check period, he default period is 60000 milliseconds
-DcheckTableConsistencyPeriod=60000

#  processor check conn
-DprocessorCheckPeriod=1000
-DsqlExecuteTimeout=3000


#-DbackSocket unit:bytes
#-DbackSocketSoRcvbuf=4194304
#-DbackSocketSoSndbuf=1048576
#-DbackSocketNoDelay=1

#  frontSocket
#-DfrontSocketSoRcvbuf=1048576
#-DfrontSocketSoSndbuf=4194304
#-DfrontSocketNoDelay=1


#  query memory used for per session,unit is M
-DotherMemSize=4
-DorderMemSize=4
-DjoinMemSize=4


#  off Heap unit:bytes
-DbufferPoolChunkSize=32767
-DbufferPoolPageNumber=512
-DbufferPoolPageSize=2097152
#-DmappedFileSize=2097152


#  sql statistics
#  1 means use SQL statistics, 0 means not
-DuseSqlStat=1
#-DbufferUsagePercent=80
-DclearBigSQLResultSetMapMs=600000
#-DsqlRecordCount=10
#-DmaxResultSet=524288


#  transaction log
#  1 enable record the transaction log, 0 disable ,the unit of transactionRotateSize is M
-DrecordTxn=0
#-DtransactionLogBaseDir=/txlogs
#-DtransactionLogBaseName=server-tx
#-DtransactionRotateSize=16
#  XA transaction
#  use XA transaction ,if the mysql service crash,the unfinished XA commit/rollback will retry for several times , it is the check period for ,default is 1000 milliseconds
-DxaSessionCheckPeriod=1000
#  use XA transaction ,the finished XA log will removed. the default period is 1000 milliseconds
-DxaLogCleanPeriod=1000
#  XA Recovery Log path
# -DxaRecoveryLogBaseDir=/xalogs/
#  XA Recovery Log name
#-DxaRecoveryLogBaseName=xalog
#  XA Retry count, retry times in backend, 0 means always retry until success
#-DxaRetryCount=0

#-DviewPersistenceConfBaseDir=/viewPath
#-DviewPersistenceConfBaseName=viewJson

#  for join tmp results
#-DmergeQueueSize=1024
#-DorderByQueueSize=1024
#-DjoinQueueSize=1024


#  true is use JoinStrategy, default false
#-DuseJoinStrategy=true
-DnestLoopConnSize=4
-DnestLoopRowsSize=2000


#  if enable the slow query log
-DenableSlowLog=1
#  the slow query log location
#-DslowLogBaseDir=./slowlogs
#-DslowLogBaseName=slow-query
#  the max period for flushing the slow query log from memory to disk  after last time , unit is second
-DflushSlowLogPeriod=1
#  the max records for flushing the slow query log from memory to disk after last time
-DflushSlowLogSize=1000
#  the threshold for judging if the query is slow , unit is millisecond
-DsqlSlowTime=100

#  used for load data,maxCharsPerColumn means max chars length for per column when load data
#-DmaxCharsPerColumn=65535
#  used for load data, because dble need save to disk if loading file contains large size
#-DmaxRowSizeToFile=10000

#-DenableFlowControl=false
#-DflowControlHighLevel=4194304
#-DflowControlLowLevel=262144

db.xml


<dbGroup name="localhost1" rwSplitMode="0">
        <heartbeat>select user()</heartbeat>
        <dbInstance name="hostM1" url="localhost:3306" user="root" password="root" maxCon="500" minCon="10"  primary="true">
        </dbInstance>
    </dbGroup>

user.xml

<managerUser name="man1" password="654321" whiteIPs="127.0.0.1,0:0:0:0:0:0:0:1" readOnly="false"/>
    <shardingUser name="test" password="test" schemas="dbtest">
    </shardingUser>

sharding.xml

<schema name="dbtest">
        <shardingTable name="test" shardingNode="dn1,dn2" function="func0" shardingColumn="id"/>
    </schema>

    <shardingNode name="dn1" dbGroup="localhost1" database="db1"/>
    <shardingNode name="dn2" dbGroup="localhost1" database="db2"/>

    <!-- 路由函数定义 -->
    <function name="func0" class="com.actiontech.dble.route.function.PartitionByLong">
        <property name="partitionCount">2</property>
        <property name="partitionLength">1</property>
    </function>

  • steps:
    step1.
    create table test (id int, ctr_num varchar(50) );

insert into test values(1,'111A'); insert into test values(2,'111a');

select ctr_num,count(1) from test group by ctr_num having count(1) >1 ;

  • expect result:
    1. +---------+----------+ | ctr_num | count(1) | +---------+----------+ | 111A | 1 | | 111a | 1 | +---------+----------+
  • real result:
    1. +---------+----------+ | ctr_num | count(1) | +---------+----------+ | 111a | 2 | +---------+----------+
  • supplements:
    1.

zhangjig avatar Dec 12 '21 23:12 zhangjig

你好,针对你的建表语句,默认的数据是不区分大小写的,详情可以看 https://cnodejs.org/topic/5cbf2d0037faec0ce1d05663 image

针对你的场景,是正常现象,不是 bug。

理论上,建表的时候设置COLLATE为 XXX_cs或者 XXX_bin ,即可实现大小写敏感,此时可以做到你期望的结果。不过由于 COLLATE的规则实在太多,dble 并没有做相关的支持。故目前还不能实现这个你期望的结果。

dcy10000 avatar Dec 13 '21 10:12 dcy10000