pd
pd copied to clipboard
Scheduler: `balance-leader-scheduler` always pick same region as scheduling region
Enhancement Task
The balance-leader-scheduler should pick the scheduling region randomly. but in POC, we can find that the scheduler always picks the same region(1289.)
version: v7.5.2
related code : https://github.com/tikv/pd/blob/78f4254e3f5adb48e3e1e2489065f5ccf6cf1815/pkg/schedule/schedulers/balance_leader.go#L451-L452
unit test for the RandLeaderRegions
func TestRandLeaderRegions(t *testing.T) {
re := require.New(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
cluster := mockcluster.NewCluster(ctx, mockconfig.NewTestOptions())
for i := uint64(1000); i < 2000; i++ {
region := core.NewTestRegionInfo(i, 1, []byte(strconv.FormatUint(i, 10)), []byte(strconv.FormatUint(i+1, 10)))
cluster.PutRegion(region)
}
rs := cluster.RandLeaderRegions(1, []core.KeyRange{core.NewKeyRange("", "")})
re.Len(rs, 10)
rs1 := cluster.RandLeaderRegions(1, []core.KeyRange{core.NewKeyRange("", "")})
re.Len(rs, 10)
re.NotEqual(rs[0], rs1[0])
}
conf:
{
"max-snapshot-count": 64,
"max-pending-peer-count": 64,
"max-merge-region-size": 20,
"max-merge-region-keys": 0,
"split-merge-interval": "1h0m0s",
"switch-witness-interval": "1h0m0s",
"enable-one-way-merge": "false",
"enable-cross-table-merge": "true",
"patrol-region-interval": "100ms",
"max-store-down-time": "30m0s",
"max-store-preparing-time": "48h0m0s",
"leader-schedule-limit": 4,
"leader-schedule-policy": "count",
"region-schedule-limit": 2048,
"witness-schedule-limit": 4,
"replica-schedule-limit": 1024,
"merge-schedule-limit": 16,
"hot-region-schedule-limit": 4,
"hot-region-cache-hits-threshold": 1,
"store-limit": {
"37440995": {
"add-peer": 64,
"remove-peer": 64
},
"37441002": {
"add-peer": 64,
"remove-peer": 64
},
"37441128": {
"add-peer": 64,
"remove-peer": 64
},
"37441138": {
"add-peer": 64,
"remove-peer": 64
},
"37441201": {
"add-peer": 64,
"remove-peer": 64
},
"37441426": {
"add-peer": 64,
"remove-peer": 64
},
"434387": {
"add-peer": 64,
"remove-peer": 64
},
"434554": {
"add-peer": 64,
"remove-peer": 64
},
"434631": {
"add-peer": 64,
"remove-peer": 64
},
"434937": {
"add-peer": 64,
"remove-peer": 64
},
"780154": {
"add-peer": 64,
"remove-peer": 64
}
},
"tolerant-size-ratio": 0,
"low-space-ratio": 0.8,
"high-space-ratio": 0.7,
"region-score-formula-version": "v2",
"scheduler-max-waiting-operator": 5,
"enable-remove-down-replica": "true",
"enable-replace-offline-replica": "true",
"enable-make-up-replica": "true",
"enable-remove-extra-replica": "true",
"enable-location-replacement": "true",
"enable-debug-metrics": "false",
"enable-joint-consensus": "true",
"enable-tikv-split-region": "true",
"enable-heartbeat-breakdown-metrics": "true",
"schedulers-v2": [
{
"type": "balance-region",
"args": null,
"disable": false,
"args-payload": ""
},
{
"type": "balance-leader",
"args": null,
"disable": false,
"args-payload": ""
},
{
"type": "hot-region",
"args": null,
"disable": false,
"args-payload": ""
},
{
"type": "evict-slow-store",
"args": null,
"disable": false,
"args-payload": ""
}
],
"schedulers-payload": null,
"hot-regions-write-interval": "10m0s",
"hot-regions-reserved-days": 7,
"max-movable-hot-peer-size": 512,
"enable-diagnostic": "true",
"enable-witness": "false",
"slow-store-evicting-affected-store-ratio-threshold": 0.3,
"store-limit-version": "v1"
},
"replication": {
"max-replicas": 3,
"location-labels": "topology.kubernetes.io/region,topology.kubernetes.io/zone,kubernetes.io/hostname,serverless.tidbcloud.com/partition",
"strictly-match-label": "false",
"enable-placement-rules": "true",
"enable-placement-rules-cache": "false",
"isolation-level": "serverless.tidbcloud.com/partition"
},
"store": {
"coprocessor": {
"region-max-size": "750MiB",
"region-split-size": "500MiB",
"region-max-keys": 75000000,
"region-split-keys": 50000000,
"enable-region-bucket": true,
"region-bucket-size": "96MiB"
},
"storage": {
"engine": ""
}
}
}
metrics: