某种collation场景下,因为数据的大小写问题,分片表在对非分片键自动做Group by后结果集不正确
- **dble versionï¼master
- preconditions ï¼
mysql-8.0.23-winx64
my.inié ç½®å¦ä¸ï¼
[mysql] #设置mysql客æ·ç«¯é»è®¤å符é #default-character-set = utf8mb4 [mysqld] #设置3306ç«¯å£ port = 3306 #设置mysqlçå®è£ ç®å½ basedir=D:\soft\mysql-8.0.23-winx64 #设置mysqlæ°æ®åºçæ°æ®çåæ¾ç®å½ datadir=D:\soft\mysql-8.0.23-winx64\data #å 许æå¤§è¿æ¥æ° max_connections=200 #æå¡ç«¯ä½¿ç¨çå符éé»è®¤ä¸ºUTF8 #character-set-server = utf8mb4 #collation-server = utf8mb4_unicode_ci #init_connect='SET NAMES utf8mb4' #å建æ°è¡¨æ¶å°ä½¿ç¨çé»è®¤åå¨å¼æ default-storage-engine=INNODB #çå¾ è¶ æ¶æ¶é´ wait_timeout=172800 #交äºå¼è¿æ¥è¶ æ¶æ¶é´ interactive-timeout=172800 #æ¥å¿ log-error=D:\soft\mysql-8.0.23-winx64\logs\error.log [client] #default-character-set = utf8mb4
mysql> show variables like 'character%'; +--------------------------+---------------------------------------------+ | Variable_name | Value | +--------------------------+---------------------------------------------+ | character_set_client | utf8mb4 | | character_set_connection | utf8mb4 | | character_set_database | utf8mb4 | | character_set_filesystem | binary | | character_set_results | utf8mb4 | | character_set_server | utf8mb4 | | character_set_system | utf8 | | character_sets_dir | D:\soft\mysql-8.0.23-winx64\share\charsets\ | +--------------------------+---------------------------------------------+ 8 rows in set, 1 warning (0.00 sec)
- configsï¼
cluster.cnf
clusterEnable=false
clusterMode=ucore
clusterIP=127.0.0.1
clusterPort=25700
rootPath=universe/dble
clusterId=cluster-1
needSyncHa=true
showBinlogStatusTimeout=60000
sequenceHandlerType=2
# valid for sequenceHandlerType=2 or 3
#sequenceStartTime=2010-11-04 09:42:54
# valid for sequenceHandlerType=3 and clusterMode is zk, default true
sequenceInstanceByZk=false
bootstrap.cnf
#encoding=UTF-8
-agentlib:jdwp=transport=dt_socket,server=y,address=8088,suspend=n
-server
-XX:+AggressiveOpts
-Dfile.encoding=UTF-8
-Dcom.sun.management.jmxremote
-Dcom.sun.management.jmxremote.port=1984
-Dcom.sun.management.jmxremote.authenticate=false
-Dcom.sun.management.jmxremote.ssl=false
-Dcom.sun.management.jmxremote.host=127.0.0.1
-Xmx4G
-Xms1G
-Xss256k
-XX:MaxDirectMemorySize=1G
-XX:MetaspaceSize=100M
# GC Log
-XX:+PrintHeapAtGC
-XX:+PrintGCDateStamps
-Xloggc:./logs/gc_%WRAPPER_TIME_YYYYMMDDHHIISS%_%p.log
-XX:+PrintGCTimeStamps
-XX:+PrintGCDetails
-XX:+PrintTenuringDistribution
# CMS
-XX:+UseConcMarkSweepGC
-XX:+UseParNewGC
-XX:+CMSParallelRemarkEnabled
-XX:+UseCMSCompactAtFullCollection
-XX:CMSFullGCsBeforeCompaction=0
-XX:+CMSClassUnloadingEnabled
-XX:LargePageSizeInBytes=128M
-XX:+UseFastAccessorMethods
-XX:+UseCMSInitiatingOccupancyOnly
-XX:CMSInitiatingOccupancyFraction=70
# base config
-DhomePath=.
-DinstanceName=1
# valid for sequenceHandlerType=2 or 3
-DinstanceId=1
-DserverId=xxx1
#-DbindIp=0.0.0.0
#-DserverPort=8066
#-DmanagerPort=9066
#-DmaxCon=1024
#-Dprocessors=4
#-DbackendProcessors=12
#-DprocessorExecutor=4
#-DbackendProcessorExecutor=12
#-DcomplexExecutor=8
#-DwriteToBackendExecutor=4
-DfakeMySQLVersion=5.7.11
#-DtraceEndPoint=http://10.186.60.96:14268/api/traces
# serverBacklog size,default 2048
-DserverBacklog=2048
#-DusePerformanceMode=0
# if need out HA
-DuseOuterHa=true
# connection
#-Dcharset=utf8mb4
-DmaxPacketSize=167772160
-DtxIsolation=2
#-Dautocommit=1
#-DidleTimeout=60000
# option
#-DuseCompression=1
#-DcapClientFoundRows=false
-DusingAIO=0
-DuseThreadUsageStat=1
# query time cost statistics
#-DuseCostTimeStat=0
#-DmaxCostStatSize=100
#-DcostSamplePercent=1
# consistency
# check the consistency of table structure between nodes,default not
-DcheckTableConsistency=0
# check period, he default period is 60000 milliseconds
-DcheckTableConsistencyPeriod=60000
# processor check conn
-DprocessorCheckPeriod=1000
-DsqlExecuteTimeout=3000
#-DbackSocket unit:bytes
#-DbackSocketSoRcvbuf=4194304
#-DbackSocketSoSndbuf=1048576
#-DbackSocketNoDelay=1
# frontSocket
#-DfrontSocketSoRcvbuf=1048576
#-DfrontSocketSoSndbuf=4194304
#-DfrontSocketNoDelay=1
# query memory used for per session,unit is M
-DotherMemSize=4
-DorderMemSize=4
-DjoinMemSize=4
# off Heap unit:bytes
-DbufferPoolChunkSize=32767
-DbufferPoolPageNumber=512
-DbufferPoolPageSize=2097152
#-DmappedFileSize=2097152
# sql statistics
# 1 means use SQL statistics, 0 means not
-DuseSqlStat=1
#-DbufferUsagePercent=80
-DclearBigSQLResultSetMapMs=600000
#-DsqlRecordCount=10
#-DmaxResultSet=524288
# transaction log
# 1 enable record the transaction log, 0 disable ,the unit of transactionRotateSize is M
-DrecordTxn=0
#-DtransactionLogBaseDir=/txlogs
#-DtransactionLogBaseName=server-tx
#-DtransactionRotateSize=16
# XA transaction
# use XA transaction ,if the mysql service crash,the unfinished XA commit/rollback will retry for several times , it is the check period for ,default is 1000 milliseconds
-DxaSessionCheckPeriod=1000
# use XA transaction ,the finished XA log will removed. the default period is 1000 milliseconds
-DxaLogCleanPeriod=1000
# XA Recovery Log path
# -DxaRecoveryLogBaseDir=/xalogs/
# XA Recovery Log name
#-DxaRecoveryLogBaseName=xalog
# XA Retry count, retry times in backend, 0 means always retry until success
#-DxaRetryCount=0
#-DviewPersistenceConfBaseDir=/viewPath
#-DviewPersistenceConfBaseName=viewJson
# for join tmp results
#-DmergeQueueSize=1024
#-DorderByQueueSize=1024
#-DjoinQueueSize=1024
# true is use JoinStrategy, default false
#-DuseJoinStrategy=true
-DnestLoopConnSize=4
-DnestLoopRowsSize=2000
# if enable the slow query log
-DenableSlowLog=1
# the slow query log location
#-DslowLogBaseDir=./slowlogs
#-DslowLogBaseName=slow-query
# the max period for flushing the slow query log from memory to disk after last time , unit is second
-DflushSlowLogPeriod=1
# the max records for flushing the slow query log from memory to disk after last time
-DflushSlowLogSize=1000
# the threshold for judging if the query is slow , unit is millisecond
-DsqlSlowTime=100
# used for load data,maxCharsPerColumn means max chars length for per column when load data
#-DmaxCharsPerColumn=65535
# used for load data, because dble need save to disk if loading file contains large size
#-DmaxRowSizeToFile=10000
#-DenableFlowControl=false
#-DflowControlHighLevel=4194304
#-DflowControlLowLevel=262144
db.xml
<dbGroup name="localhost1" rwSplitMode="0">
<heartbeat>select user()</heartbeat>
<dbInstance name="hostM1" url="localhost:3306" user="root" password="root" maxCon="500" minCon="10" primary="true">
</dbInstance>
</dbGroup>
user.xml
<managerUser name="man1" password="654321" whiteIPs="127.0.0.1,0:0:0:0:0:0:0:1" readOnly="false"/>
<shardingUser name="test" password="test" schemas="dbtest">
</shardingUser>
sharding.xml
<schema name="dbtest">
<shardingTable name="test" shardingNode="dn1,dn2" function="func0" shardingColumn="id"/>
</schema>
<shardingNode name="dn1" dbGroup="localhost1" database="db1"/>
<shardingNode name="dn2" dbGroup="localhost1" database="db2"/>
<!-- è·¯ç±å½æ°å®ä¹ -->
<function name="func0" class="com.actiontech.dble.route.function.PartitionByLong">
<property name="partitionCount">2</property>
<property name="partitionLength">1</property>
</function>
-
stepsï¼
step1.
create table test (id int, ctr_num varchar(50) );
insert into test values(1,'111A'); insert into test values(2,'111a');
select ctr_num,count(1) from test group by ctr_num having count(1) >1 ;
-
expect resultï¼
1. +---------+----------+ | ctr_num | count(1) | +---------+----------+ | 111A | 1 | | 111a | 1 | +---------+----------+ -
real resultï¼
1. +---------+----------+ | ctr_num | count(1) | +---------+----------+ | 111a | 2 | +---------+----------+ -
supplementsï¼
1.
你好,针对你的建表语句,默认的数据是不区分大小写的,详情可以看
https://cnodejs.org/topic/5cbf2d0037faec0ce1d05663

针对你的场景,是正常现象,不是 bug。
理论上,建表的时候设置COLLATE为 XXX_cs或者 XXX_bin ,即可实现大小写敏感,此时可以做到你期望的结果。不过由于 COLLATE的规则实在太多,dble 并没有做相关的支持。故目前还不能实现这个你期望的结果。