From f5d4636f44af151e91cfb57babf8f1094405d096 Mon Sep 17 00:00:00 2001 From: Shirly Date: Thu, 14 Jul 2022 10:35:06 +0800 Subject: [PATCH 1/3] schedule/placement/fit: remove the recursion in pick peers for on rule (#5269) close tikv/pd#5268 Signed-off-by: shirly Co-authored-by: Ti Chi Robot --- server/schedule/placement/fit.go | 59 +++++++++++++++++++-------- server/schedule/placement/fit_test.go | 32 +++++++++++++++ 2 files changed, 75 insertions(+), 16 deletions(-) diff --git a/server/schedule/placement/fit.go b/server/schedule/placement/fit.go index 8b7dac1c922..9879d434720 100644 --- a/server/schedule/placement/fit.go +++ b/server/schedule/placement/fit.go @@ -16,6 +16,7 @@ package placement import ( "math" + "math/bits" "sort" "github.com/pingcap/kvproto/pkg/metapb" @@ -227,27 +228,29 @@ func (w *fitWorker) fitRule(index int) bool { if len(candidates) < count { count = len(candidates) } - return w.enumPeers(candidates, nil, index, count) + + return w.fixRuleWithCandidates(candidates, index, count) } -// Recursively traverses all feasible peer combinations. -// For each combination, call `compareBest` to determine whether it is better -// than the existing option. +// Pick the most suitable peer combination for the rule with candidates. // Returns true if it replaces `bestFit` with a better alternative. -func (w *fitWorker) enumPeers(candidates, selected []*fitPeer, index int, count int) bool { - if len(selected) == count { - // We collect enough peers. End recursive. - return w.compareBest(selected, index) - } +func (w *fitWorker) fixRuleWithCandidates(candidates []*fitPeer, index int, count int) bool { + // map the candidates to binary numbers with len(candidates) bits, + // each bit can be 1 or 0, 1 means a picked candidate + // the binary numbers with `count` 1 means a choose for the current rule. var better bool - // make sure the left number of candidates should be enough. - indexLimit := len(candidates) - (count - len(selected)) - for i := 0; i <= indexLimit; i++ { - p := candidates[i] - p.selected = true - better = w.enumPeers(candidates[i+1:], append(selected, p), index, count) || better - p.selected = false + limit := uint(1<>= 1 + if binaryNumber == 0 { + break + } + } + return selected +} + +func unSelectPeers(seleted []*fitPeer) { + for _, p := range seleted { + p.selected = false + } +} + // compareBest checks if the selected peers is better then previous best. // Returns true if it replaces `bestFit` with a better alternative. func (w *fitWorker) compareBest(selected []*fitPeer, index int) bool { diff --git a/server/schedule/placement/fit_test.go b/server/schedule/placement/fit_test.go index 6325dadb30a..7c4c9147ac5 100644 --- a/server/schedule/placement/fit_test.go +++ b/server/schedule/placement/fit_test.go @@ -187,3 +187,35 @@ func TestIsolationScore(t *testing.T) { testCase.checker(score1, score2) } } + +func TestPickPeersFromBinaryInt(t *testing.T) { + re := require.New(t) + var candidates []*fitPeer + for id := uint64(1); id <= 10; id++ { + candidates = append(candidates, &fitPeer{ + Peer: &metapb.Peer{Id: id}, + }) + } + testCases := []struct { + binary string + expectedPeers []uint64 + }{ + {"0", []uint64{}}, + {"1", []uint64{1}}, + {"101", []uint64{1, 3}}, + {"111", []uint64{1, 2, 3}}, + {"1011", []uint64{1, 2, 4}}, + {"100011", []uint64{1, 2, 6}}, + {"1000001111", []uint64{1, 2, 3, 4, 10}}, + } + + for _, c := range testCases { + binaryNumber, err := strconv.ParseUint(c.binary, 2, 64) + re.NoError(err) + selected := pickPeersFromBinaryInt(candidates, uint(binaryNumber)) + re.Len(selected, len(c.expectedPeers)) + for id := 0; id < len(selected); id++ { + re.Equal(selected[id].Id, c.expectedPeers[id]) + } + } +} From 8a96807fc8be156169c05ebd24a1b800482dbb78 Mon Sep 17 00:00:00 2001 From: Hu# Date: Thu, 14 Jul 2022 11:43:06 +0800 Subject: [PATCH 2/3] api: add an interface to drop all region caches (#5305) close tikv/pd#5282 Currently, the interface "/admin/cache/region/{id}" just only can drop one region when needed. This new interface provides an API to clear all caches of the region in PD. Signed-off-by: husharp Co-authored-by: Ti Chi Robot --- server/api/admin.go | 11 +++++++ server/api/admin_test.go | 68 ++++++++++++++++++++++++++++++++++++++++ server/api/router.go | 1 + server/tso/tso.go | 2 +- 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/server/api/admin.go b/server/api/admin.go index 1fa63c8ad9a..42aac1247bb 100644 --- a/server/api/admin.go +++ b/server/api/admin.go @@ -58,6 +58,17 @@ func (h *adminHandler) DeleteRegionCache(w http.ResponseWriter, r *http.Request) h.rd.JSON(w, http.StatusOK, "The region is removed from server cache.") } +// @Tags admin +// @Summary Drop all regions from cache. +// @Produce json +// @Success 200 {string} string "All regions are removed from server cache." +// @Router /admin/cache/regions [delete] +func (h *adminHandler) DeleteAllRegionCache(w http.ResponseWriter, r *http.Request) { + rc := getCluster(r) + rc.DropCacheAllRegion() + h.rd.JSON(w, http.StatusOK, "All regions are removed from server cache.") +} + // FIXME: details of input json body params // @Tags admin // @Summary Reset the ts. diff --git a/server/api/admin_test.go b/server/api/admin_test.go index f8fd0bcf74f..1ea5a4f9ec7 100644 --- a/server/api/admin_test.go +++ b/server/api/admin_test.go @@ -94,6 +94,74 @@ func (suite *adminTestSuite) TestDropRegion() { suite.Equal(uint64(50), region.GetRegionEpoch().Version) } +func (suite *adminTestSuite) TestDropRegions() { + cluster := suite.svr.GetRaftCluster() + + n := uint64(10000) + np := uint64(3) + + regions := make([]*core.RegionInfo, 0, n) + for i := uint64(0); i < n; i++ { + peers := make([]*metapb.Peer, 0, np) + for j := uint64(0); j < np; j++ { + peer := &metapb.Peer{ + Id: i*np + j, + } + peer.StoreId = (i + j) % n + peers = append(peers, peer) + } + // initialize region's epoch to (100, 100). + region := cluster.GetRegionByKey([]byte(fmt.Sprintf("%d", i))).Clone( + core.SetPeers(peers), + core.SetRegionConfVer(100), + core.SetRegionVersion(100), + ) + regions = append(regions, region) + + err := cluster.HandleRegionHeartbeat(region) + suite.NoError(err) + } + + // Region epoch cannot decrease. + for i := uint64(0); i < n; i++ { + region := regions[i].Clone( + core.SetRegionConfVer(50), + core.SetRegionVersion(50), + ) + regions[i] = region + err := cluster.HandleRegionHeartbeat(region) + suite.Error(err) + } + + for i := uint64(0); i < n; i++ { + region := cluster.GetRegionByKey([]byte(fmt.Sprintf("%d", i))) + + suite.Equal(uint64(100), region.GetRegionEpoch().ConfVer) + suite.Equal(uint64(100), region.GetRegionEpoch().Version) + } + + // After drop all regions from cache, lower version is accepted. + url := fmt.Sprintf("%s/admin/cache/regions", suite.urlPrefix) + req, err := http.NewRequest(http.MethodDelete, url, nil) + suite.NoError(err) + res, err := testDialClient.Do(req) + suite.NoError(err) + suite.Equal(http.StatusOK, res.StatusCode) + res.Body.Close() + + for _, region := range regions { + err := cluster.HandleRegionHeartbeat(region) + suite.NoError(err) + } + + for i := uint64(0); i < n; i++ { + region := cluster.GetRegionByKey([]byte(fmt.Sprintf("%d", i))) + + suite.Equal(uint64(50), region.GetRegionEpoch().ConfVer) + suite.Equal(uint64(50), region.GetRegionEpoch().Version) + } +} + func (suite *adminTestSuite) TestPersistFile() { data := []byte("#!/bin/sh\nrm -rf /") re := suite.Require() diff --git a/server/api/router.go b/server/api/router.go index 81b8f9d83bc..b062ff78f65 100644 --- a/server/api/router.go +++ b/server/api/router.go @@ -286,6 +286,7 @@ func createRouter(prefix string, svr *server.Server) *mux.Router { adminHandler := newAdminHandler(svr, rd) registerFunc(clusterRouter, "/admin/cache/region/{id}", adminHandler.DeleteRegionCache, setMethods(http.MethodDelete), setAuditBackend(localLog)) + registerFunc(clusterRouter, "/admin/cache/regions", adminHandler.DeleteAllRegionCache, setMethods(http.MethodDelete), setAuditBackend(localLog)) registerFunc(clusterRouter, "/admin/reset-ts", adminHandler.ResetTS, setMethods(http.MethodPost), setAuditBackend(localLog)) registerFunc(apiRouter, "/admin/persist-file/{file_name}", adminHandler.SavePersistFile, setMethods(http.MethodPost), setAuditBackend(localLog)) diff --git a/server/tso/tso.go b/server/tso/tso.go index 8e1f7009657..b7a00c3f9ab 100644 --- a/server/tso/tso.go +++ b/server/tso/tso.go @@ -123,7 +123,7 @@ func (t *timestampOracle) generateTSO(count int64, suffixBits int) (physical int // For example, we have three DCs: dc-1, dc-2 and dc-3. The bits of suffix is defined by // the const suffixBits. Then, for dc-2, the suffix may be 1 because it's persisted // in etcd with the value of 1. -// Once we get a noramal TSO like this (18 bits): xxxxxxxxxxxxxxxxxx. We will make the TSO's +// Once we get a normal TSO like this (18 bits): xxxxxxxxxxxxxxxxxx. We will make the TSO's // low bits of logical part from each DC looks like: // global: xxxxxxxxxx00000000 // dc-1: xxxxxxxxxx00000001 From 34a4cce2cb07f8aa4283e28ba40a667e1321a1fb Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 14 Jul 2022 13:13:05 +0800 Subject: [PATCH 3/3] metrics: make hot split region more readable (#5306) ref tikv/pd#4399 Signed-off-by: lhy1024 --- server/schedulers/hot_region.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/schedulers/hot_region.go b/server/schedulers/hot_region.go index 07e45751ae3..3f98b267a63 100644 --- a/server/schedulers/hot_region.go +++ b/server/schedulers/hot_region.go @@ -513,7 +513,7 @@ func (bs *balanceSolver) solve() []*operator.Operator { if bs.cur.region = bs.getRegion(srcPeerStat, srcStoreID); bs.cur.region == nil { continue } else if bs.opTy == movePeer && bs.cur.region.GetApproximateSize() > bs.GetOpts().GetMaxMovableHotPeerSize() { - schedulerCounter.WithLabelValues(fmt.Sprintf("hot-region-%s", bs.rwTy), "hot_region_split").Inc() + schedulerCounter.WithLabelValues(bs.sche.GetName(), "need_split_before_move_peer").Inc() continue } bs.cur.srcPeerStat = srcPeerStat