pd icon indicating copy to clipboard operation
pd copied to clipboard

More effective structure to deal with scatter regions operators

Open Leavrth opened this issue 1 year ago • 2 comments

Enhancement Task

Speed up the creation of operators and handling heartbeat

image image image image image

1 @ 0xf9670e 0xfa8378 0xfa834f 0xfc72e5 0x20d9db0 0x20d9d88 0x20f2235 0x21756c8 0x2174d9f 0x2174929 0xfcb5e1
#	0xfc72e4	sync.runtime_SemacquireRWMutexR+0x24						/usr/local/go/src/runtime/sema.go:82
#	0x20d9daf	sync.(*RWMutex).RLock+0x4f							/usr/local/go/src/sync/rwmutex.go:71
#	0x20d9d87	github.com/tikv/pd/pkg/schedule/operator.(*Controller).OperatorCount+0x27	/root/pd/pkg/schedule/operator/operator_controller.go:754
#	0x20f2234	github.com/tikv/pd/pkg/schedule/checker.(*Controller).CheckRegion+0x594		/root/pd/pkg/schedule/checker/checker_controller.go:134
#	0x21756c7	github.com/tikv/pd/pkg/schedule.(*Coordinator).tryAddOperators+0x67		/root/pd/pkg/schedule/coordinator.go:301
#	0x2174d9e	github.com/tikv/pd/pkg/schedule.(*Coordinator).checkSuspectRegions+0x5e		/root/pd/pkg/schedule/coordinator.go:212
#	0x2174928	github.com/tikv/pd/pkg/schedule.(*Coordinator).PatrolRegions+0x248		/root/pd/pkg/schedule/coordinator.go:170


1 @ 0xf65c10 0xf65429 0xfac069 0x102d99f 0x18df847 0x18df819 0x18df759 0x1fa60e8 0x1fa61c7 0x1fa606b 0x1fa6508 0x20d979c 0x20d610d 0x20d3b0f 0x216cca5 0x216c909 0x22777f3 0x2270959 0x174afb5 0x1b3a5da 0x14c7e83 0x1b3a5c3 0x1b3a7f5 0x1b3a5c3 0x1b3a95c 0x1b3a503 0x174ae95 0x14b20e7 0x14b602c 0x14b036d 0xfcb5e1
#	0x102d99e	strings.genSplit+0x7e											/usr/local/go/src/strings/strings.go:254
#	0x18df846	strings.SplitN+0xa6											/usr/local/go/src/strings/strings.go:283
#	0x18df818	github.com/coreos/go-semver/semver.(*Version).Set+0x78							/root/.gopath/pkg/mod/github.com/coreos/[email protected]/semver/semver.go:74
#	0x18df758	github.com/coreos/go-semver/semver.NewVersion+0x38							/root/.gopath/pkg/mod/github.com/coreos/[email protected]/semver/semver.go:55
#	0x1fa60e7	github.com/tikv/pd/pkg/versioninfo.ParseVersion+0x47							/root/pd/pkg/versioninfo/versioninfo.go:59
#	0x1fa61c6	github.com/tikv/pd/pkg/versioninfo.MustParseVersion+0x26						/root/pd/pkg/versioninfo/versioninfo.go:68
#	0x1fa606a	github.com/tikv/pd/pkg/versioninfo.MinSupportedVersion+0x20a						/root/pd/pkg/versioninfo/feature.go:71
#	0x1fa6507	github.com/tikv/pd/pkg/versioninfo.IsFeatureSupported+0x67						/root/pd/pkg/versioninfo/versioninfo.go:89
#	0x20d979b	github.com/tikv/pd/pkg/schedule/operator.(*Controller).SendScheduleCommand+0x2bb			/root/pd/pkg/schedule/operator/operator_controller.go:713
#	0x20d610c	github.com/tikv/pd/pkg/schedule/operator.(*Controller).addOperatorLocked+0xbcc				/root/pd/pkg/schedule/operator/operator_controller.go:513
#	0x20d3b0e	github.com/tikv/pd/pkg/schedule/operator.(*Controller).AddOperator+0x30e				/root/pd/pkg/schedule/operator/operator_controller.go:346
#	0x216cca4	github.com/tikv/pd/pkg/schedule/scatter.(*RegionScatterer).scatterRegions+0x304				/root/pd/pkg/schedule/scatter/region_scatterer.go:252
#	0x216c908	github.com/tikv/pd/pkg/schedule/scatter.(*RegionScatterer).ScatterRegionsByID+0x568			/root/pd/pkg/schedule/scatter/region_scatterer.go:215
#	0x22777f2	github.com/tikv/pd/server.scatterRegions+0x92								/root/pd/server/grpc_service.go:2600
#	0x2270958	github.com/tikv/pd/server.(*GrpcServer).ScatterRegion+0x878						/root/pd/server/grpc_service.go:1971
#	0x174afb4	github.com/pingcap/kvproto/pkg/pdpb._PD_ScatterRegion_Handler.func1+0x74				/root/.gopath/pkg/mod/github.com/pingcap/[email protected]/pkg/pdpb/pdpb.pb.go:9984
#	0x1b3a5d9	go.etcd.io/etcd/etcdserver/api/v3rpc.Server.ChainUnaryServer.func5.1+0x79				/root/.gopath/pkg/mod/github.com/grpc-ecosystem/[email protected]/chain.go:31
#	0x14c7e82	github.com/grpc-ecosystem/go-grpc-prometheus.init.(*ServerMetrics).UnaryServerInterceptor.func3+0x82	/root/.gopath/pkg/mod/github.com/grpc-ecosystem/[email protected]/server_metrics.go:107
#	0x1b3a5c2	go.etcd.io/etcd/etcdserver/api/v3rpc.Server.ChainUnaryServer.func5.1+0x62				/root/.gopath/pkg/mod/github.com/grpc-ecosystem/[email protected]/chain.go:34
#	0x1b3a7f4	go.etcd.io/etcd/etcdserver/api/v3rpc.Server.newUnaryInterceptor.func4+0x1d4				/root/.gopath/pkg/mod/go.etcd.io/[email protected]/etcdserver/api/v3rpc/interceptor.go:70
#	0x1b3a5c2	go.etcd.io/etcd/etcdserver/api/v3rpc.Server.ChainUnaryServer.func5.1+0x62				/root/.gopath/pkg/mod/github.com/grpc-ecosystem/[email protected]/chain.go:34
#	0x1b3a95b	go.etcd.io/etcd/etcdserver/api/v3rpc.Server.newLogUnaryInterceptor.func3+0xbb				/root/.gopath/pkg/mod/go.etcd.io/[email protected]/etcdserver/api/v3rpc/interceptor.go:77
#	0x1b3a502	go.etcd.io/etcd/etcdserver/api/v3rpc.Server.ChainUnaryServer.func5+0x182				/root/.gopath/pkg/mod/github.com/grpc-ecosystem/[email protected]/chain.go:39
#	0x174ae94	github.com/pingcap/kvproto/pkg/pdpb._PD_ScatterRegion_Handler+0x134					/root/.gopath/pkg/mod/github.com/pingcap/[email protected]/pkg/pdpb/pdpb.pb.go:9986
#	0x14b20e6	google.golang.org/grpc.(*Server).processUnaryRPC+0xd26							/root/.gopath/pkg/mod/google.golang.org/[email protected]/server.go:1024
#	0x14b602b	google.golang.org/grpc.(*Server).handleStream+0x9cb							/root/.gopath/pkg/mod/google.golang.org/[email protected]/server.go:1313
#	0x14b036c	google.golang.org/grpc.(*Server).serveStreams.func1.1+0x8c						/root/.gopath/pkg/mod/google.golang.org/[email protected]/server.go:722


1 @ 0x27d5769 0x1003366 0x17fc18d 0x17f85bb 0x17f1217 0x17f11bc 0x17f0d77 0x17e8cd3 0x17e7b34 0x17e6ed4 0x21c3eb2 0x21d5045 0x2266b05 0x17490b4 0x1b3a2ba 0x14c7d72 0x1b3a2a3 0x1b3c7ab 0x1b3a1e3 0x14b486c 0x14b5feb 0x14b036d 0xfcb5e1
#	0x27d5768	github.com/tikv/pd/pkg/btree.items[...].find.func1+0x48							/root/pd/pkg/btree/btree_generic.go:219
#	0x1003365	sort.Search+0x45											/usr/local/go/src/sort/search.go:65
#	0x17fc18c	github.com/tikv/pd/pkg/btree.items[...].find+0x6c							/root/pd/pkg/btree/btree_generic.go:219
#	0x17f85ba	github.com/tikv/pd/pkg/btree.(*node[...]).iterate+0x7a							/root/pd/pkg/btree/btree_generic.go:750
#	0x17f1216	github.com/tikv/pd/pkg/btree.(*BTreeG[...]).DescendLessOrEqual+0x76					/root/pd/pkg/btree/btree_generic.go:1000
#	0x17f11bb	github.com/tikv/pd/pkg/core.(*regionTree).find+0x1b							/root/pd/pkg/core/region_tree.go:233
#	0x17f0d76	github.com/tikv/pd/pkg/core.(*regionTree).remove+0x76							/root/pd/pkg/core/region_tree.go:188
#	0x17e8cd2	github.com/tikv/pd/pkg/core.(*RegionsInfo).removeRegionFromSubTreeLocked+0x112				/root/pd/pkg/core/region.go:1170
#	0x17e7b33	github.com/tikv/pd/pkg/core.(*RegionsInfo).UpdateSubTree+0xf3						/root/pd/pkg/core/region.go:1029
#	0x17e6ed3	github.com/tikv/pd/pkg/core.(*RegionsInfo).AtomicCheckAndPutRegion+0x253				/root/pd/pkg/core/region.go:920
#	0x21c3eb1	github.com/tikv/pd/server/cluster.(*RaftCluster).processRegionHeartbeat+0x2b1				/root/pd/server/cluster/cluster.go:1035
#	0x21d5044	github.com/tikv/pd/server/cluster.(*RaftCluster).HandleRegionHeartbeat+0x24				/root/pd/server/cluster/cluster_worker.go:37
#	0x2266b04	github.com/tikv/pd/server.(*GrpcServer).RegionHeartbeat+0xfc4						/root/pd/server/grpc_service.go:1291
#	0x17490b3	github.com/pingcap/kvproto/pkg/pdpb._PD_RegionHeartbeat_Handler+0x93					/root/.gopath/pkg/mod/github.com/pingcap/[email protected]/pkg/pdpb/pdpb.pb.go:9766
#	0x1b3a2b9	go.etcd.io/etcd/etcdserver/api/v3rpc.Server.ChainStreamServer.func8.1+0x79				/root/.gopath/pkg/mod/github.com/grpc-ecosystem/[email protected]/chain.go:71
#	0x14c7d71	github.com/grpc-ecosystem/go-grpc-prometheus.init.(*ServerMetrics).StreamServerInterceptor.func4+0xd1	/root/.gopath/pkg/mod/github.com/grpc-ecosystem/[email protected]/server_metrics.go:121
#	0x1b3a2a2	go.etcd.io/etcd/etcdserver/api/v3rpc.Server.ChainStreamServer.func8.1+0x62				/root/.gopath/pkg/mod/github.com/grpc-ecosystem/[email protected]/chain.go:74
#	0x1b3c7aa	go.etcd.io/etcd/etcdserver/api/v3rpc.newStreamInterceptor.func1+0x46a					/root/.gopath/pkg/mod/go.etcd.io/[email protected]/etcdserver/api/v3rpc/interceptor.go:237
#	0x1b3a1e2	go.etcd.io/etcd/etcdserver/api/v3rpc.Server.ChainStreamServer.func8+0x182				/root/.gopath/pkg/mod/github.com/grpc-ecosystem/[email protected]/chain.go:79
#	0x14b486b	google.golang.org/grpc.(*Server).processStreamingRPC+0xd4b						/root/.gopath/pkg/mod/google.golang.org/[email protected]/server.go:1244
#	0x14b5fea	google.golang.org/grpc.(*Server).handleStream+0x98a							/root/.gopath/pkg/mod/google.golang.org/[email protected]/server.go:1317
#	0x14b036c	google.golang.org/grpc.(*Server).serveStreams.func1.1+0x8c						/root/.gopath/pkg/mod/google.golang.org/[email protected]/server.go:722

2 @ 0xf9670e 0xfa8378 0xfa834f 0xfc72e5 0x20d8cb1 0x20d8c88 0x20d14bd 0x21d50f8 0x2266b05 0x17490b4 0x1b3a2ba 0x14c7d72 0x1b3a2a3 0x1b3c7ab 0x1b3a1e3 0x14b486c 0x14b5feb 0x14b036d 0xfcb5e1
#	0xfc72e4	sync.runtime_SemacquireRWMutexR+0x24									/usr/local/go/src/runtime/sema.go:82
#	0x20d8cb0	sync.(*RWMutex).RLock+0x50										/usr/local/go/src/sync/rwmutex.go:71
#	0x20d8c87	github.com/tikv/pd/pkg/schedule/operator.(*Controller).GetOperator+0x27					/root/pd/pkg/schedule/operator/operator_controller.go:666
#	0x20d14bc	github.com/tikv/pd/pkg/schedule/operator.(*Controller).Dispatch+0x5c					/root/pd/pkg/schedule/operator/operator_controller.go:109
#	0x21d50f7	github.com/tikv/pd/server/cluster.(*RaftCluster).HandleRegionHeartbeat+0xd7				/root/pd/server/cluster/cluster_worker.go:44
#	0x2266b04	github.com/tikv/pd/server.(*GrpcServer).RegionHeartbeat+0xfc4						/root/pd/server/grpc_service.go:1291
#	0x17490b3	github.com/pingcap/kvproto/pkg/pdpb._PD_RegionHeartbeat_Handler+0x93					/root/.gopath/pkg/mod/github.com/pingcap/[email protected]/pkg/pdpb/pdpb.pb.go:9766
#	0x1b3a2b9	go.etcd.io/etcd/etcdserver/api/v3rpc.Server.ChainStreamServer.func8.1+0x79				/root/.gopath/pkg/mod/github.com/grpc-ecosystem/[email protected]/chain.go:71
#	0x14c7d71	github.com/grpc-ecosystem/go-grpc-prometheus.init.(*ServerMetrics).StreamServerInterceptor.func4+0xd1	/root/.gopath/pkg/mod/github.com/grpc-ecosystem/[email protected]/server_metrics.go:121
#	0x1b3a2a2	go.etcd.io/etcd/etcdserver/api/v3rpc.Server.ChainStreamServer.func8.1+0x62				/root/.gopath/pkg/mod/github.com/grpc-ecosystem/[email protected]/chain.go:74
#	0x1b3c7aa	go.etcd.io/etcd/etcdserver/api/v3rpc.newStreamInterceptor.func1+0x46a					/root/.gopath/pkg/mod/go.etcd.io/[email protected]/etcdserver/api/v3rpc/interceptor.go:237
#	0x1b3a1e2	go.etcd.io/etcd/etcdserver/api/v3rpc.Server.ChainStreamServer.func8+0x182				/root/.gopath/pkg/mod/github.com/grpc-ecosystem/[email protected]/chain.go:79
#	0x14b486b	google.golang.org/grpc.(*Server).processStreamingRPC+0xd4b						/root/.gopath/pkg/mod/google.golang.org/[email protected]/server.go:1244
#	0x14b5fea	google.golang.org/grpc.(*Server).handleStream+0x98a							/root/.gopath/pkg/mod/google.golang.org/[email protected]/server.go:1317
#	0x14b036c	google.golang.org/grpc.(*Server).serveStreams.func1.1+0x8c						/root/.gopath/pkg/mod/google.golang.org/[email protected]/server.go:722

0xf9670e 0xfa8378 0xfa834f 0xfc7285 0xfd9d3d 0xfdb251 0xfdb232 0x17e7aac 0x17e6ed4 0x21c3eb2 0x21d5045 0x2266b05 0x17490b4 0x1b3a2ba 0x14c7d72 0x1b3a2a3 0x1b3c7ab 0x1b3a1e3 0x14b486c 0x14b5feb 0x14b036d 0xfcb5e1
#	0xfc7284	sync.runtime_SemacquireMutex+0x24									/usr/local/go/src/runtime/sema.go:77
#	0xfd9d3c	sync.(*Mutex).lockSlow+0x15c										/usr/local/go/src/sync/mutex.go:171
#	0xfdb250	sync.(*Mutex).Lock+0x30											/usr/local/go/src/sync/mutex.go:90
#	0xfdb231	sync.(*RWMutex).Lock+0x11										/usr/local/go/src/sync/rwmutex.go:147
#	0x17e7aab	github.com/tikv/pd/pkg/core.(*RegionsInfo).UpdateSubTree+0x6b						/root/pd/pkg/core/region.go:1023
#	0x17e6ed3	github.com/tikv/pd/pkg/core.(*RegionsInfo).AtomicCheckAndPutRegion+0x253				/root/pd/pkg/core/region.go:920
#	0x21c3eb1	github.com/tikv/pd/server/cluster.(*RaftCluster).processRegionHeartbeat+0x2b1				/root/pd/server/cluster/cluster.go:1035
#	0x21d5044	github.com/tikv/pd/server/cluster.(*RaftCluster).HandleRegionHeartbeat+0x24				/root/pd/server/cluster/cluster_worker.go:37
#	0x2266b04	github.com/tikv/pd/server.(*GrpcServer).RegionHeartbeat+0xfc4						/root/pd/server/grpc_service.go:1291
#	0x17490b3	github.com/pingcap/kvproto/pkg/pdpb._PD_RegionHeartbeat_Handler+0x93					/root/.gopath/pkg/mod/github.com/pingcap/[email protected]/pkg/pdpb/pdpb.pb.go:9766
#	0x1b3a2b9	go.etcd.io/etcd/etcdserver/api/v3rpc.Server.ChainStreamServer.func8.1+0x79				/root/.gopath/pkg/mod/github.com/grpc-ecosystem/[email protected]/chain.go:71
#	0x14c7d71	github.com/grpc-ecosystem/go-grpc-prometheus.init.(*ServerMetrics).StreamServerInterceptor.func4+0xd1	/root/.gopath/pkg/mod/github.com/grpc-ecosystem/[email protected]/server_metrics.go:121
#	0x1b3a2a2	go.etcd.io/etcd/etcdserver/api/v3rpc.Server.ChainStreamServer.func8.1+0x62				/root/.gopath/pkg/mod/github.com/grpc-ecosystem/[email protected]/chain.go:74
#	0x1b3c7aa	go.etcd.io/etcd/etcdserver/api/v3rpc.newStreamInterceptor.func1+0x46a					/root/.gopath/pkg/mod/go.etcd.io/[email protected]/etcdserver/api/v3rpc/interceptor.go:237
#	0x1b3a1e2	go.etcd.io/etcd/etcdserver/api/v3rpc.Server.ChainStreamServer.func8+0x182				/root/.gopath/pkg/mod/github.com/grpc-ecosystem/[email protected]/chain.go:79
#	0x14b486b	google.golang.org/grpc.(*Server).processStreamingRPC+0xd4b						/root/.gopath/pkg/mod/google.golang.org/[email protected]/server.go:1244
#	0x14b5fea	google.golang.org/grpc.(*Server).handleStream+0x98a							/root/.gopath/pkg/mod/google.golang.org/[email protected]/server.go:1317
#	0x14b036c	google.golang.org/grpc.(*Server).serveStreams.func1.1+0x8c						/root/.gopath/pkg/mod/google.golang.org/[email protected]/server.go:722

Leavrth avatar Mar 06 '24 04:03 Leavrth

would be fixed by https://github.com/pingcap/tidb/issues/53532 Actually, it splits too many regions.

Leavrth avatar Aug 08 '24 09:08 Leavrth

Actually, assuming that splitting fewer regions is the correct approach is inaccurate. Splitting fewer regions doesn’t always work. As a large-scale distributed database, our primary focus should be on improving the efficiency of handling millions of regions.

Additionally, we need to ensure full utilization of the scatter phase.

Please reopen the issue until we reach a clear conclusion that there is either no room for improvement or no feasible actions we can take to address it.

3pointer avatar Dec 05 '24 07:12 3pointer