pd icon indicating copy to clipboard operation
pd copied to clipboard

Scheduler: `balance-leader-scheduler` always pick same region as scheduling region

Open bufferflies opened this issue 9 months ago • 0 comments

Enhancement Task

The balance-leader-scheduler should pick the scheduling region randomly. but in POC, we can find that the scheduler always picks the same region(1289.)

Image

version: v7.5.2

related code : https://github.com/tikv/pd/blob/78f4254e3f5adb48e3e1e2489065f5ccf6cf1815/pkg/schedule/schedulers/balance_leader.go#L451-L452

unit test for the RandLeaderRegions

func TestRandLeaderRegions(t *testing.T) {
	re := require.New(t)
	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()
	cluster := mockcluster.NewCluster(ctx, mockconfig.NewTestOptions())

	for i := uint64(1000); i < 2000; i++ {
		region := core.NewTestRegionInfo(i, 1, []byte(strconv.FormatUint(i, 10)), []byte(strconv.FormatUint(i+1, 10)))
		cluster.PutRegion(region)
	}
	rs := cluster.RandLeaderRegions(1, []core.KeyRange{core.NewKeyRange("", "")})
	re.Len(rs, 10)
	rs1 := cluster.RandLeaderRegions(1, []core.KeyRange{core.NewKeyRange("", "")})
	re.Len(rs, 10)
	re.NotEqual(rs[0], rs1[0])
}

conf:

{
    "max-snapshot-count": 64,
    "max-pending-peer-count": 64,
    "max-merge-region-size": 20,
    "max-merge-region-keys": 0,
    "split-merge-interval": "1h0m0s",
    "switch-witness-interval": "1h0m0s",
    "enable-one-way-merge": "false",
    "enable-cross-table-merge": "true",
    "patrol-region-interval": "100ms",
    "max-store-down-time": "30m0s",
    "max-store-preparing-time": "48h0m0s",
    "leader-schedule-limit": 4,
    "leader-schedule-policy": "count",
    "region-schedule-limit": 2048,
    "witness-schedule-limit": 4,
    "replica-schedule-limit": 1024,
    "merge-schedule-limit": 16,
    "hot-region-schedule-limit": 4,
    "hot-region-cache-hits-threshold": 1,
    "store-limit": {
        "37440995": {
            "add-peer": 64,
            "remove-peer": 64
        },
        "37441002": {
            "add-peer": 64,
            "remove-peer": 64
        },
        "37441128": {
            "add-peer": 64,
            "remove-peer": 64
        },
        "37441138": {
            "add-peer": 64,
            "remove-peer": 64
        },
        "37441201": {
            "add-peer": 64,
            "remove-peer": 64
        },
        "37441426": {
            "add-peer": 64,
            "remove-peer": 64
        },
        "434387": {
            "add-peer": 64,
            "remove-peer": 64
        },
        "434554": {
            "add-peer": 64,
            "remove-peer": 64
        },
        "434631": {
            "add-peer": 64,
            "remove-peer": 64
        },
        "434937": {
            "add-peer": 64,
            "remove-peer": 64
        },
        "780154": {
            "add-peer": 64,
            "remove-peer": 64
        }
    },
    "tolerant-size-ratio": 0,
    "low-space-ratio": 0.8,
    "high-space-ratio": 0.7,
    "region-score-formula-version": "v2",
    "scheduler-max-waiting-operator": 5,
    "enable-remove-down-replica": "true",
    "enable-replace-offline-replica": "true",
    "enable-make-up-replica": "true",
    "enable-remove-extra-replica": "true",
    "enable-location-replacement": "true",
    "enable-debug-metrics": "false",
    "enable-joint-consensus": "true",
    "enable-tikv-split-region": "true",
    "enable-heartbeat-breakdown-metrics": "true",
    "schedulers-v2": [
        {
            "type": "balance-region",
            "args": null,
            "disable": false,
            "args-payload": ""
        },
        {
            "type": "balance-leader",
            "args": null,
            "disable": false,
            "args-payload": ""
        },
        {
            "type": "hot-region",
            "args": null,
            "disable": false,
            "args-payload": ""
        },
        {
            "type": "evict-slow-store",
            "args": null,
            "disable": false,
            "args-payload": ""
        }
    ],
    "schedulers-payload": null,
    "hot-regions-write-interval": "10m0s",
    "hot-regions-reserved-days": 7,
    "max-movable-hot-peer-size": 512,
    "enable-diagnostic": "true",
    "enable-witness": "false",
    "slow-store-evicting-affected-store-ratio-threshold": 0.3,
    "store-limit-version": "v1"
},
"replication": {
    "max-replicas": 3,
    "location-labels": "topology.kubernetes.io/region,topology.kubernetes.io/zone,kubernetes.io/hostname,serverless.tidbcloud.com/partition",
    "strictly-match-label": "false",
    "enable-placement-rules": "true",
    "enable-placement-rules-cache": "false",
    "isolation-level": "serverless.tidbcloud.com/partition"
},
"store": {
    "coprocessor": {
        "region-max-size": "750MiB",
        "region-split-size": "500MiB",
        "region-max-keys": 75000000,
        "region-split-keys": 50000000,
        "enable-region-bucket": true,
        "region-bucket-size": "96MiB"
    },
    "storage": {
        "engine": ""
    }
}
}

metrics:

Image Image

bufferflies avatar Mar 24 '25 09:03 bufferflies