diff --git a/pkg/kv/kvserver/BUILD.bazel b/pkg/kv/kvserver/BUILD.bazel index 27bcb9e855b3..5033a24c8f49 100644 --- a/pkg/kv/kvserver/BUILD.bazel +++ b/pkg/kv/kvserver/BUILD.bazel @@ -196,7 +196,6 @@ go_library( "//pkg/util/quotapool", "//pkg/util/retry", "//pkg/util/shuffle", - "//pkg/util/slidingwindow", "//pkg/util/stop", "//pkg/util/syncutil", "//pkg/util/syncutil/singleflight", diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/BUILD.bazel b/pkg/kv/kvserver/allocator/allocatorimpl/BUILD.bazel index b8053a68bacb..04f19b9dc06f 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/BUILD.bazel +++ b/pkg/kv/kvserver/allocator/allocatorimpl/BUILD.bazel @@ -22,6 +22,7 @@ go_library( "//pkg/roachpb", "//pkg/settings", "//pkg/settings/cluster", + "//pkg/util/admission/admissionpb", "//pkg/util/log", "//pkg/util/metric", "//pkg/util/stop", diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go index 8a3777749582..58febeb5e84c 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go @@ -2036,16 +2036,16 @@ func (a *Allocator) leaseholderShouldMoveDueToPreferences( } // StoreHealthOptions returns the store health options, currently only -// considering the threshold for L0 sub-levels. This threshold is not +// considering the threshold for io overload. This threshold is not // considered in allocation or rebalancing decisions (excluding candidate // stores as targets) when enforcementLevel is set to storeHealthNoAction or // storeHealthLogOnly. By default storeHealthBlockRebalanceTo is the action taken. When // there is a mixed version cluster, storeHealthNoAction is set instead. func (a *Allocator) StoreHealthOptions(_ context.Context) StoreHealthOptions { - enforcementLevel := StoreHealthEnforcement(l0SublevelsThresholdEnforce.Get(&a.st.SV)) + enforcementLevel := StoreHealthEnforcement(IOOverloadThresholdEnforce.Get(&a.st.SV)) return StoreHealthOptions{ - EnforcementLevel: enforcementLevel, - L0SublevelThreshold: l0SublevelsThreshold.Get(&a.st.SV), + EnforcementLevel: enforcementLevel, + IOThreshold: IOOverloadThreshold.Get(&a.st.SV), } } diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer.go index 1d5b07ef653a..f7b77955f143 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer.go @@ -16,7 +16,6 @@ import ( "fmt" "math" "sort" - "time" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/load" @@ -24,6 +23,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvserver/constraint" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/settings" + "github.com/cockroachdb/cockroach/pkg/util/admission/admissionpb" "github.com/cockroachdb/cockroach/pkg/util/log" ) @@ -70,26 +70,16 @@ const ( // away from the mean. minRangeRebalanceThreshold = 2 - // MaxL0SublevelThreshold is the number of L0 sub-levels of a store - // descriptor, that when greater than this value and in excees of the - // average L0 sub-levels in the cluster - will have the action defined by - // l0SublevelsThresholdEnforce taken. This value does not affect the - // allocator in deciding to remove replicas from it's store, only + // DefaultIOOverloadThreshold is the IO overload score of a store + // descriptor, that when greater than this value and in excess of the + // average IO overload of comparable candidates - will have the action + // defined by StoreHealthEnforcement taken. This value does not affect + // the allocator in deciding to remove replicas from it's store, only // potentially block adding or moving replicas to other stores. - MaxL0SublevelThreshold = 20 - - // L0SublevelInterval is the period over which to accumulate statistics on - // the number of L0 sublevels within a store. - L0SublevelInterval = time.Minute * 2 - - // L0SublevelMaxSampled is maximum number of L0 sub-levels that may exist - // in a sample. This setting limits the extreme skew that could occur by - // capping the highest possible value considered. - L0SublevelMaxSampled = 500 - - // l0SubLevelWaterMark is the percentage above the mean after which a store + DefaultIOOverloadThreshold = 0.8 + // IOOverloadWaterMark is the percentage above the mean after which a store // could be conisdered unhealthy if also exceeding the threshold. - l0SubLevelWaterMark = 1.10 + IOOverloadWaterMark = 1.1 ) // StoreHealthEnforcement represents the level of action that may be taken or @@ -98,18 +88,18 @@ type StoreHealthEnforcement int64 const ( // StoreHealthNoAction will take no action upon candidate stores when they - // exceed l0SublevelThreshold. + // exceed the IOOverloadThreshold. StoreHealthNoAction StoreHealthEnforcement = iota // StoreHealthLogOnly will take no action upon candidate stores when they - // exceed l0SublevelThreshold except log an event. + // exceed IOOverloadThreshold except log an event. StoreHealthLogOnly // StoreHealthBlockRebalanceTo will take action to exclude candidate stores - // when they exceed l0SublevelThreshold and mean from being considered + // when they exceed IOOverloadThreshold and mean from being considered // targets for rebalance actions only. Allocation actions such as adding // upreplicaing an range will not be affected. StoreHealthBlockRebalanceTo // StoreHealthBlockAll will take action to exclude candidate stores when - // they exceed l0SublevelThreshold and mean from being candidates for all + // they exceed IOOverloadThreshold and mean from being candidates for all // replica allocation and rebalancing. When enabled and stores exceed the // threshold, they will not receive any new replicas. StoreHealthBlockAll @@ -130,29 +120,16 @@ var RangeRebalanceThreshold = func() *settings.FloatSetting { return s }() -// l0SublevelsThreshold is the maximum number of sub-levels within level 0 that -// may exist on candidate store descriptors before they are considered -// unhealthy. Once considered unhealthy, the action taken will be dictated by -// l0SublevelsThresholdEnforce cluster setting defined below. The rationale for -// using L0 sub-levels as opposed to read amplification is that it is more -// generally the variable component that makes up read amplification. When -// L0 sub-levels is high, it is an indicator of poor LSM health as L0 is usually -// in memory and must be first visited before traversing any further level. See -// this issue for additional information: -// https://github.com/cockroachdb/pebble/issues/609 -var l0SublevelsThreshold = settings.RegisterIntSetting( +var IOOverloadThreshold = settings.RegisterFloatSetting( settings.SystemOnly, - "kv.allocator.l0_sublevels_threshold", - "the maximum number of l0 sublevels within a store that may exist "+ - "before the action defined in "+ - "`kv.allocator.l0_sublevels_threshold_enforce` will be taken "+ - "if also exceeding the cluster average", - MaxL0SublevelThreshold, + "kv.allocator.io_overload_threshold", + "-", + DefaultIOOverloadThreshold, ) -// l0SublevelsThresholdEnforce is the level of enforcement taken upon candidate -// stores when their L0-sublevels exceeds the threshold defined in -// l0SublevelThreshold. Under disabled and log enforcement, no action is taken +// IOOverloadThresholdEnforce is the level of enforcement taken upon candidate +// stores when their io overload exceeds the threshold defined in +// IOOverloadThresold. Under disabled and log enforcement, no action is taken // to exclude the candidate store either as a potential allocation nor // rebalance target by the replicate queue and store rebalancer. When the // enforcement level is rebalance, candidate stores will be excluded as targets @@ -160,11 +137,11 @@ var l0SublevelsThreshold = settings.RegisterIntSetting( // for allocation of voters and non-voters. When allocate is set, candidates // are excluded as targets for all rebalancing and also allocation of voters // and non-voters. -var l0SublevelsThresholdEnforce = settings.RegisterEnumSetting( +var IOOverloadThresholdEnforce = settings.RegisterEnumSetting( settings.SystemOnly, - "kv.allocator.l0_sublevels_threshold_enforce", - "the level of enforcement when a candidate disk has L0 sub-levels "+ - "exceeding `kv.allocator.l0_sublevels_threshold` and above the "+ + "kv.allocator.io_overload_threshold_enforce", + "the level of enforcement when a candidate store has an io overload score "+ + "exceeding `kv.allocator.io_overload_threshold` and above the "+ "cluster average:`block_none` will exclude "+ "no candidate stores, `block_none_log` will exclude no candidates but log an "+ "event, `block_rebalance_to` will exclude candidates stores from being "+ @@ -571,24 +548,24 @@ func (o *LoadScorerOptions) removalMaximallyConvergesScore( // candidate store for allocation. These are ordered by importance. type candidate struct { - store roachpb.StoreDescriptor - valid bool - fullDisk bool - necessary bool - diversityScore float64 - highReadAmp bool - l0SubLevels int - convergesScore int - balanceScore balanceStatus - hasNonVoter bool - rangeCount int - details string + store roachpb.StoreDescriptor + valid bool + fullDisk bool + necessary bool + diversityScore float64 + ioOverloaded bool + ioOverloadScore float64 + convergesScore int + balanceScore balanceStatus + hasNonVoter bool + rangeCount int + details string } func (c candidate) String() string { - str := fmt.Sprintf("s%d, valid:%t, fulldisk:%t, necessary:%t, diversity:%.2f, highReadAmp: %t, l0SubLevels: %d, converges:%d, "+ + str := fmt.Sprintf("s%d, valid:%t, fulldisk:%t, necessary:%t, diversity:%.2f, ioOverloaded: %t, ioOverload: %.2f, converges:%d, "+ "balance:%d, hasNonVoter:%t, rangeCount:%d, queriesPerSecond:%.2f", - c.store.StoreID, c.valid, c.fullDisk, c.necessary, c.diversityScore, c.highReadAmp, c.l0SubLevels, c.convergesScore, + c.store.StoreID, c.valid, c.fullDisk, c.necessary, c.diversityScore, c.ioOverloaded, c.ioOverloadScore, c.convergesScore, c.balanceScore, c.hasNonVoter, c.rangeCount, c.store.Capacity.QueriesPerSecond) if c.details != "" { return fmt.Sprintf("%s, details:(%s)", str, c.details) @@ -611,11 +588,11 @@ func (c candidate) compactString() string { if c.diversityScore != 0 { fmt.Fprintf(&buf, ", diversity:%.2f", c.diversityScore) } - if c.highReadAmp { - fmt.Fprintf(&buf, ", highReadAmp:%t", c.highReadAmp) + if c.ioOverloaded { + fmt.Fprintf(&buf, ", ioOverloaded:%t", c.ioOverloaded) } - if c.l0SubLevels > 0 { - fmt.Fprintf(&buf, ", l0SubLevels:%d", c.l0SubLevels) + if c.ioOverloadScore > 0 { + fmt.Fprintf(&buf, ", ioOverload:%.2fd", c.ioOverloadScore) } fmt.Fprintf(&buf, ", converges:%d, balance:%d, rangeCount:%d", c.convergesScore, c.balanceScore, c.rangeCount) @@ -660,17 +637,17 @@ func (c candidate) compare(o candidate) float64 { } return -300 } - // If both o and c have high read amplification, then we prefer the - // canidate with lower read amp. - if o.highReadAmp && c.highReadAmp { - if o.l0SubLevels > c.l0SubLevels { + // If both o and c are IO overloaded, then we prefer the + // canidate with IO overload score. + if o.ioOverloaded && c.ioOverloaded { + if o.ioOverloadScore > c.ioOverloadScore { return 250 } } - if c.highReadAmp { + if c.ioOverloaded { return -250 } - if o.highReadAmp { + if o.ioOverloaded { return 250 } @@ -743,7 +720,7 @@ func (c byScoreAndID) Less(i, j int) bool { c[i].rangeCount == c[j].rangeCount && c[i].necessary == c[j].necessary && c[i].fullDisk == c[j].fullDisk && - c[i].highReadAmp == c[j].highReadAmp && + c[i].ioOverloaded == c[j].ioOverloaded && c[i].valid == c[j].valid { return c[i].store.StoreID < c[j].store.StoreID } @@ -752,11 +729,11 @@ func (c byScoreAndID) Less(i, j int) bool { func (c byScoreAndID) Swap(i, j int) { c[i], c[j] = c[j], c[i] } // onlyValidAndHealthyDisk returns all the elements in a sorted (by score -// reversed) candidate list that are valid and not nearly full or with high -// read amplification. +// reversed) candidate list that are valid and not nearly full or being IO +// overloaded. func (cl candidateList) onlyValidAndHealthyDisk() candidateList { for i := len(cl) - 1; i >= 0; i-- { - if cl[i].valid && !cl[i].fullDisk && !cl[i].highReadAmp { + if cl[i].valid && !cl[i].fullDisk && !cl[i].ioOverloaded { return cl[:i+1] } } @@ -826,10 +803,10 @@ func (cl candidateList) worst() candidateList { } } } - // Are there candidates with high read amplification? If so, pick those. - if cl[len(cl)-1].highReadAmp { + // Are there candidates with high io overload? If so, pick those. + if cl[len(cl)-1].ioOverloaded { for i := len(cl) - 2; i >= 0; i-- { - if !cl[i].highReadAmp { + if !cl[i].ioOverloaded { return cl[i+1:] } } @@ -978,10 +955,10 @@ func rankedCandidateListForAllocation( continue } - if !allocator.MaxCapacityCheck(s) || !options.getStoreHealthOptions().readAmpIsHealthy( + if !allocator.MaxCapacityCheck(s) || !options.getStoreHealthOptions().storeIsHealthy( ctx, s, - candidateStores.CandidateL0Sublevels.Mean, + candidateStores.CandidateIOOverload.Mean, ) { continue } @@ -1049,10 +1026,10 @@ func candidateListForRemoval( necessary: necessary, fullDisk: !allocator.MaxCapacityCheck(s), // When removing a replica from a store, we do not want to include - // high amplification in ranking stores. This would submit already - // high read amplification stores to additional load of moving a + // io overloaded in ranking stores. This would submit already + // overloaded amplification stores to additional load of moving a // replica. - highReadAmp: false, + ioOverloaded: false, diversityScore: diversityScore, }) } @@ -1366,10 +1343,10 @@ func rankedCandidateListForRebalancing( necessary: necessary, fullDisk: fullDisk, // When rebalancing a replica away from a store, we do not want - // to include high amplification in ranking stores. This would - // submit already high read amplification stores to additional - // load of moving a replica. - highReadAmp: false, + // to include io overload in ranking stores. This would + // submit already overloaded stores to additional load of + // moving a replica. + ioOverloaded: false, diversityScore: curDiversityScore, } } @@ -1561,14 +1538,15 @@ func rankedCandidateListForRebalancing( // above, but recompute fullDisk using special rebalanceTo logic for // rebalance candidates. s := cand.store + candIOOverloadScore, _ := s.Capacity.IOThreshold.Score() cand.fullDisk = !rebalanceToMaxCapacityCheck(s) - cand.l0SubLevels = int(s.Capacity.L0Sublevels) - cand.highReadAmp = !options.getStoreHealthOptions().rebalanceToReadAmpIsHealthy( + cand.ioOverloadScore = candIOOverloadScore + cand.ioOverloaded = !options.getStoreHealthOptions().rebalanceToStoreIsHealthy( ctx, s, - // We only wish to compare the read amplification to the + // We only wish to compare the io overload to the // comparable stores average and not the cluster. - comparable.candidateSL.CandidateL0Sublevels.Mean, + comparable.candidateSL.CandidateIOOverload.Mean, ) cand.balanceScore = options.balanceScore(comparable.candidateSL, s.Capacity) cand.convergesScore = options.rebalanceToConvergesScore(comparable, s) @@ -2093,62 +2071,68 @@ func convergesOnMean(oldVal, newVal, mean float64) bool { // StoreHealthOptions is the scorer options for store health. It is // used to inform scoring based on the health of a store. type StoreHealthOptions struct { - EnforcementLevel StoreHealthEnforcement - L0SublevelThreshold int64 + EnforcementLevel StoreHealthEnforcement + IOThreshold float64 } -// readAmpIsHealthy returns true if the store read amplification does not exceed +// storeIsHealthy returns true if the store io overload does not exceed // the cluster threshold and mean, or the enforcement level does not include // excluding candidates from being allocation targets. -func (o StoreHealthOptions) readAmpIsHealthy( +func (o StoreHealthOptions) storeIsHealthy( ctx context.Context, store roachpb.StoreDescriptor, avg float64, ) bool { + ioOverloadScore, _ := store.Capacity.IOThreshold.Score() if o.EnforcementLevel == StoreHealthNoAction || - store.Capacity.L0Sublevels < o.L0SublevelThreshold { + ioOverloadScore < o.IOThreshold { return true } - // Still log an event when the L0 sub-levels exceeds the threshold, however + // Still log an event when the IO overload score exceeds the threshold, however // does not exceed the cluster average. This is enabled to avoid confusion // where candidate stores are still targets, despite exeeding the // threshold. - if float64(store.Capacity.L0Sublevels) < avg*l0SubLevelWaterMark { - log.KvDistribution.VEventf(ctx, 5, "s%d, allocate check l0 sublevels %d exceeds threshold %d, but below average: %f, action enabled %d", - store.StoreID, store.Capacity.L0Sublevels, - o.L0SublevelThreshold, avg, o.EnforcementLevel) + if ioOverloadScore < avg*IOOverloadWaterMark { + log.KvDistribution.VEventf(ctx, 5, "s%d, allocate check io overload %.2f exceeds threshold %.2f, but below average: %.2f, action enabled %d", + store.StoreID, ioOverloadScore, + o.IOThreshold, avg, o.EnforcementLevel) return true } - log.KvDistribution.VEventf(ctx, 5, "s%d, allocate check l0 sublevels %d exceeds threshold %d, above average: %f, action enabled %d", - store.StoreID, store.Capacity.L0Sublevels, - o.L0SublevelThreshold, avg, o.EnforcementLevel) + log.KvDistribution.VEventf(ctx, 5, "s%d, allocate check io overload %.2f exceeds threshold %.2f, above average: %.2f, action enabled %d", + store.StoreID, ioOverloadScore, + o.IOThreshold, avg, o.EnforcementLevel) // The store is only considered unhealthy when the enforcement level is // storeHealthBlockAll. return o.EnforcementLevel < StoreHealthBlockAll } -// rebalanceToReadAmpIsHealthy returns true if the store read amplification does -// not exceed the cluster threshold and mean, or the enforcement level does not +// rebalanceToStoreIsHealthy returns true if the store io overload does not +// exceed the cluster threshold and mean, or the enforcement level does not // include excluding candidates from being rebalancing targets. -func (o StoreHealthOptions) rebalanceToReadAmpIsHealthy( +func (o StoreHealthOptions) rebalanceToStoreIsHealthy( ctx context.Context, store roachpb.StoreDescriptor, avg float64, ) bool { + ioOverloadScore, _ := store.Capacity.IOThreshold.Score() if o.EnforcementLevel == StoreHealthNoAction || - store.Capacity.L0Sublevels < o.L0SublevelThreshold { + ioOverloadScore < o.IOThreshold { return true } - if float64(store.Capacity.L0Sublevels) < avg*l0SubLevelWaterMark { - log.KvDistribution.VEventf(ctx, 5, "s%d, allocate check l0 sublevels %d exceeds threshold %d, but below average watermark: %f, action enabled %d", - store.StoreID, store.Capacity.L0Sublevels, - o.L0SublevelThreshold, avg*l0SubLevelWaterMark, o.EnforcementLevel) + if ioOverloadScore < avg*IOOverloadWaterMark { + log.KvDistribution.VEventf(ctx, 5, + "s%d, allocate check io overload %.2f exceeds threshold %.2f, but "+ + "below average watermark: %.2f, action enabled %d", + store.StoreID, ioOverloadScore, o.IOThreshold, + avg*IOOverloadWaterMark, o.EnforcementLevel) return true } - log.KvDistribution.VEventf(ctx, 5, "s%d, allocate check l0 sublevels %d exceeds threshold %d, above average watermark: %f, action enabled %d", - store.StoreID, store.Capacity.L0Sublevels, - o.L0SublevelThreshold, avg*l0SubLevelWaterMark, o.EnforcementLevel) + log.KvDistribution.VEventf(ctx, 5, + "s%d, allocate check io overload %.2f exceeds threshold %.2f, above average "+ + "watermark: %.2f, action enabled %d", + store.StoreID, ioOverloadScore, o.IOThreshold, + avg*IOOverloadWaterMark, o.EnforcementLevel) // The store is only considered unhealthy when the enforcement level is // storeHealthBlockRebalanceTo or storeHealthBlockAll. @@ -2165,3 +2149,14 @@ func rebalanceToMaxCapacityCheck(store roachpb.StoreDescriptor) bool { func scoresAlmostEqual(score1, score2 float64) bool { return math.Abs(score1-score2) < epsilon } + +// TestingIOThresholdWithScore returns an IOThreshold where the score will be +// equal to the value provided. This is suitable for testing only. +func TestingIOThresholdWithScore(score float64) admissionpb.IOThreshold { + return admissionpb.IOThreshold{ + L0NumSubLevels: int64(20 * score), + L0NumSubLevelsThreshold: 20, + L0NumFiles: int64(1000 * score), + L0NumFilesThreshold: 1000, + } +} diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer_test.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer_test.go index ecf1a10fc02a..94e8a002144d 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer_test.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer_test.go @@ -52,7 +52,7 @@ func TestOnlyValidAndHealthyDisk(t *testing.T) { defer log.Scope(t).Close(t) testCases := []struct { - valid, invalid, full, readAmpHigh int + valid, invalid, full, ioOverloaded int }{ {0, 0, 0, 0}, {1, 0, 0, 0}, @@ -80,8 +80,8 @@ func TestOnlyValidAndHealthyDisk(t *testing.T) { for i := 0; i < tc.full; i++ { cl = append(cl, candidate{fullDisk: true}) } - for i := 0; i < tc.readAmpHigh; i++ { - cl = append(cl, candidate{highReadAmp: true}) + for i := 0; i < tc.ioOverloaded; i++ { + cl = append(cl, candidate{ioOverloaded: true}) } sort.Sort(sort.Reverse(byScore(cl))) @@ -89,7 +89,7 @@ func TestOnlyValidAndHealthyDisk(t *testing.T) { if a, e := len(valid), tc.valid; a != e { t.Errorf("expected %d valid, actual %d", e, a) } - if a, e := len(cl)-len(valid), tc.invalid+tc.full+tc.readAmpHigh; a != e { + if a, e := len(cl)-len(valid), tc.invalid+tc.full+tc.ioOverloaded; a != e { t.Errorf("expected %d invalid, actual %d", e, a) } }) diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go index 3977d8f2bb9a..57728385c733 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go @@ -344,90 +344,90 @@ var oneStoreWithFullDisk = []*roachpb.StoreDescriptor{ }, } -var oneStoreHighReadAmp = []*roachpb.StoreDescriptor{ +var oneStoreHighIOOverload = []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, L0Sublevels: MaxL0SublevelThreshold - 5}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold - 5)}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1800, L0Sublevels: MaxL0SublevelThreshold - 5}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1800, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold - 5)}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, L0Sublevels: MaxL0SublevelThreshold + 5}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 5)}, }, { StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, L0Sublevels: MaxL0SublevelThreshold - 5}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold - 5)}, }, } -var allStoresHighReadAmp = []*roachpb.StoreDescriptor{ +var allStoresHighIOOverload = []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, L0Sublevels: MaxL0SublevelThreshold + 1}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 1)}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 800, L0Sublevels: MaxL0SublevelThreshold + 1}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 800, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 1)}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, L0Sublevels: MaxL0SublevelThreshold + 1}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 1)}, }, } -var allStoresHighReadAmpSkewed = []*roachpb.StoreDescriptor{ +var allStoresHighIOOverloadSkewed = []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, L0Sublevels: MaxL0SublevelThreshold + 1}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 1)}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 800, L0Sublevels: MaxL0SublevelThreshold + 50}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 800, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 50)}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, L0Sublevels: MaxL0SublevelThreshold + 55}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 55)}, }, } -var threeStoresHighReadAmpAscRangeCount = []*roachpb.StoreDescriptor{ +var threeStoresHighIOOverloadAscRangeCount = []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 100, L0Sublevels: MaxL0SublevelThreshold + 10}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 100, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 10)}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 400, L0Sublevels: MaxL0SublevelThreshold + 10}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 400, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 10)}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1600, L0Sublevels: MaxL0SublevelThreshold + 10}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1600, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 10)}, }, { StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 6400, L0Sublevels: MaxL0SublevelThreshold - 10}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 6400, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold - 10)}, }, { StoreID: 5, Node: roachpb.NodeDescriptor{NodeID: 5}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 25000, L0Sublevels: MaxL0SublevelThreshold - 10}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 25000, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold - 10)}, }, } @@ -595,7 +595,7 @@ func TestAllocatorNoAvailableDisks(t *testing.T) { } } -func TestAllocatorReadAmpCheck(t *testing.T) { +func TestAllocatorRhadAmpCheck(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -615,10 +615,10 @@ func TestAllocatorReadAmpCheck(t *testing.T) { } tests := []testCase{ { - name: "ignore read amp on allocation when StoreHealthNoAction enforcement", - stores: allStoresHighReadAmp, + name: "ignore io overload on allocation when StoreHealthNoAction enforcement", + stores: allStoresHighIOOverload, conf: emptySpanConfig(), - // NB: All stores have high read amp, this should be ignored and + // NB: All stores have high io overload, this should be ignored and // allocate to the store with the lowest range count. expectedTargetIfAlive: roachpb.StoreID(3), // Recovery of a dead node can pick any valid store, not necessarily the @@ -627,10 +627,10 @@ func TestAllocatorReadAmpCheck(t *testing.T) { enforcement: StoreHealthNoAction, }, { - name: "ignore read amp on allocation when storeHealthLogOnly enforcement", - // NB: All stores have high read amp, this should be ignored and + name: "ignore io overload on allocation when storeHealthLogOnly enforcement", + // NB: All stores have high io overload, this should be ignored and // allocate to the store with the lowest range count. - stores: allStoresHighReadAmp, + stores: allStoresHighIOOverload, conf: emptySpanConfig(), expectedTargetIfAlive: roachpb.StoreID(3), // Recovery of a dead node can pick any valid store, not necessarily the @@ -639,10 +639,10 @@ func TestAllocatorReadAmpCheck(t *testing.T) { enforcement: StoreHealthLogOnly, }, { - name: "ignore read amp on allocation when StoreHealthBlockRebalanceTo enforcement", - // NB: All stores have high read amp, this should be ignored and + name: "ignore io overload on allocation when StoreHealthBlockRebalanceTo enforcement", + // NB: All stores have high io overload, this should be ignored and // allocate to the store with the lowest range count. - stores: allStoresHighReadAmp, + stores: allStoresHighIOOverload, conf: emptySpanConfig(), expectedTargetIfAlive: roachpb.StoreID(3), // Recovery of a dead node can pick any valid store, not necessarily the @@ -651,9 +651,9 @@ func TestAllocatorReadAmpCheck(t *testing.T) { enforcement: StoreHealthBlockRebalanceTo, }, { - name: "don't allocate to stores when all have high read amp and StoreHealthBlockAll", - // NB: All stores have high read amp (limit + 1), none are above the watermark, select the lowest range count. - stores: allStoresHighReadAmp, + name: "don't allocate to stores when all have high io overload and StoreHealthBlockAll", + // NB: All stores have high io overload (limit + 1), none are above the watermark, select the lowest range count. + stores: allStoresHighIOOverload, conf: emptySpanConfig(), expectedTargetIfAlive: roachpb.StoreID(3), // Recovery of a dead node can pick any valid store, not necessarily the @@ -662,19 +662,19 @@ func TestAllocatorReadAmpCheck(t *testing.T) { enforcement: StoreHealthBlockAll, }, { - name: "allocate to store below the mean when all have high read amp and StoreHealthBlockAll", - // NB: All stores have high read amp, however store 1 is below the watermark mean read amp. - stores: allStoresHighReadAmpSkewed, + name: "allocate to store below the mean when all have high io overload and StoreHealthBlockAll", + // NB: All stores have high io overload, however store 1 is below the watermark mean io overload. + stores: allStoresHighIOOverloadSkewed, conf: emptySpanConfig(), expectedTargetIfAlive: roachpb.StoreID(1), expectedTargetIfDead: roachpb.StoreID(1), enforcement: StoreHealthBlockAll, }, { - name: "allocate to lowest range count store without high read amp when StoreHealthBlockAll enforcement", - // NB: Store 1, 2 and 3 have high read amp and are above the watermark, the lowest range count (4) + name: "allocate to lowest range count store without high io overload when StoreHealthBlockAll enforcement", + // NB: Store 1, 2 and 3 have high io overload and are above the watermark, the lowest range count (4) // should be selected. - stores: threeStoresHighReadAmpAscRangeCount, + stores: threeStoresHighIOOverloadAscRangeCount, conf: emptySpanConfig(), expectedTargetIfAlive: roachpb.StoreID(4), expectedTargetIfDead: roachpb.StoreID(4), @@ -694,7 +694,7 @@ func TestAllocatorReadAmpCheck(t *testing.T) { sg.GossipStores(test.stores, t) // Enable read disk health checking in candidate exclusion. - l0SublevelsThresholdEnforce.Override(ctx, &a.st.SV, int64(test.enforcement)) + IOOverloadThresholdEnforce.Override(ctx, &a.st.SV, int64(test.enforcement)) // Allocate a voter where all replicas are alive (e.g. up-replicating a valid range). add, _, err := a.AllocateVoter( @@ -4272,19 +4272,14 @@ func TestAllocatorRebalanceNonVoters(t *testing.T) { } } -// TestAllocatorRebalanceReadAmpCheck ensures that rebalancing voters: -// (1) Respects storeHealthEnforcement setting, by ignoring L0 Sublevels in -// -// rebalancing decisions when disabled or set to log only. -// -// (2) Considers L0 sublevels when set to rebalanceOnly or allocate in -// -// conjunction with the mean. -// -// (3) Does not attempt to rebalance off of the store when read amplification -// -// is high, as this setting is only used for filtering candidates. -func TestAllocatorRebalanceReadAmpCheck(t *testing.T) { +// TestAllocatorRebalanceStoreHealthCheck ensures that rebalancing voters: +// (1) Respects storeHealthEnforcement setting, by ignoring IO overload in +// rebalancing decisions when disabled or set to log only. +// (2) Considers IO overload when set to rebalanceOnly or allocate in +// conjunction with the mean. +// (3) Does not attempt to rebalance off of the store when io overload +// is high, as this setting is only used for filtering candidates. +func TestAllocatorRebalanceStoreHealthCheck(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() @@ -4299,22 +4294,22 @@ func TestAllocatorRebalanceReadAmpCheck(t *testing.T) { } tests := []testCase{ { - name: "don't move off of nodes with high read amp when StoreHealthBlockRebalanceTo", - // NB: Store 1,2, 4 have okay read amp. Store 3 has high read amp. - // We expect high read amplifaction to only be considered for + name: "don't move off of nodes with high io overload when StoreHealthBlockRebalanceTo", + // NB: Store 1,2, 4 have okay io overload. Store 3 has high io overload. + // We expect high io overload to only be considered for // exlcuding targets, not for triggering rebalancing. - stores: threeStoresHighReadAmpAscRangeCount, + stores: threeStoresHighIOOverloadAscRangeCount, conf: emptySpanConfig(), existingVoters: replicas(3, 1), expectNoAction: true, enforcement: StoreHealthBlockRebalanceTo, }, { - name: "don't move off of nodes with high read amp when StoreHealthBlockAll", - // NB: Store 1,2, 4 have okay read amp. Store 3 has high read amp. - // We expect high read amplifaction to only be considered for + name: "don't move off of nodes with high io overload when StoreHealthBlockAll", + // NB: Store 1,2, 4 have okay io overload. Store 3 has high io overload. + // We expect high io overload to only be considered for // exlcuding targets, not for triggering rebalancing. - stores: threeStoresHighReadAmpAscRangeCount, + stores: threeStoresHighIOOverloadAscRangeCount, conf: emptySpanConfig(), existingVoters: replicas(3, 1), expectNoAction: true, @@ -4322,11 +4317,11 @@ func TestAllocatorRebalanceReadAmpCheck(t *testing.T) { }, { name: "don't take action when enforcement is not StoreHealthNoAction", - // NB: Store 3 has L0Sublevels > threshold. Store 2 has 3 x higher + // NB: Store 3 has IOOverload > threshold. Store 2 has 3 x higher // ranges as other stores. Should move to candidate to 4, however // enforcement for rebalancing is not enabled so will pick // candidate 3 which has a lower range count. - stores: oneStoreHighReadAmp, + stores: oneStoreHighIOOverload, conf: emptySpanConfig(), existingVoters: replicas(1, 2), expectedRemoveTargets: []roachpb.StoreID{2}, @@ -4334,11 +4329,11 @@ func TestAllocatorRebalanceReadAmpCheck(t *testing.T) { enforcement: StoreHealthNoAction, }, { - name: "don't rebalance to nodes with high read amp when StoreHealthBlockRebalanceTo enforcement", - // NB: Store 3 has L0Sublevels > threshold. Store 2 has 3 x higher + name: "don't rebalance to nodes with high io overload when StoreHealthBlockRebalanceTo enforcement", + // NB: Store 3 has IOOverload > threshold. Store 2 has 3 x higher // ranges as other stores. Should move to candidate to 4, which - // doesn't have high read amp. - stores: oneStoreHighReadAmp, + // doesn't have high io overload. + stores: oneStoreHighIOOverload, conf: emptySpanConfig(), existingVoters: replicas(1, 2), expectedRemoveTargets: []roachpb.StoreID{2}, @@ -4346,11 +4341,11 @@ func TestAllocatorRebalanceReadAmpCheck(t *testing.T) { enforcement: StoreHealthBlockRebalanceTo, }, { - name: "don't rebalance to nodes with high read amp when StoreHealthBlockAll enforcement", - // NB: Store 3 has L0Sublevels > threshold. Store 2 has 3 x higher + name: "don't rebalance to nodes with high io overload when StoreHealthBlockAll enforcement", + // NB: Store 3 has IOOverload > threshold. Store 2 has 3 x higher // ranges as other stores. Should move to candidate to 4, which - // doesn't have high read amp. - stores: oneStoreHighReadAmp, + // doesn't have high io overload. + stores: oneStoreHighIOOverload, conf: emptySpanConfig(), existingVoters: replicas(1, 2), expectedRemoveTargets: []roachpb.StoreID{2}, @@ -4377,7 +4372,7 @@ func TestAllocatorRebalanceReadAmpCheck(t *testing.T) { sg.GossipStores(test.stores, t) // Enable read disk health checking in candidate exclusion. options := a.ScorerOptions(ctx) - options.StoreHealthOptions = StoreHealthOptions{EnforcementLevel: test.enforcement, L0SublevelThreshold: 20} + options.StoreHealthOptions = StoreHealthOptions{EnforcementLevel: test.enforcement, IOThreshold: 1} add, remove, _, ok := a.RebalanceVoter( ctx, sp, diff --git a/pkg/kv/kvserver/allocator/storepool/BUILD.bazel b/pkg/kv/kvserver/allocator/storepool/BUILD.bazel index 73c8adc436fc..1b10fddbfad8 100644 --- a/pkg/kv/kvserver/allocator/storepool/BUILD.bazel +++ b/pkg/kv/kvserver/allocator/storepool/BUILD.bazel @@ -25,6 +25,7 @@ go_library( "//pkg/util/log", "//pkg/util/metric", "//pkg/util/shuffle", + "//pkg/util/slidingwindow", "//pkg/util/stop", "//pkg/util/syncutil", "//pkg/util/timeutil", @@ -45,6 +46,7 @@ go_test( "//pkg/roachpb", "//pkg/settings/cluster", "//pkg/testutils/gossiputil", + "//pkg/util/admission/admissionpb", "//pkg/util/hlc", "//pkg/util/leaktest", "//pkg/util/log", diff --git a/pkg/kv/kvserver/allocator/storepool/store_pool.go b/pkg/kv/kvserver/allocator/storepool/store_pool.go index ba7ba32131df..190d540bd8c2 100644 --- a/pkg/kv/kvserver/allocator/storepool/store_pool.go +++ b/pkg/kv/kvserver/allocator/storepool/store_pool.go @@ -29,6 +29,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/shuffle" + "github.com/cockroachdb/cockroach/pkg/util/slidingwindow" "github.com/cockroachdb/cockroach/pkg/util/syncutil" "github.com/cockroachdb/errors" ) @@ -41,6 +42,10 @@ const ( // TestTimeUntilStoreDeadOff is the test value for TimeUntilStoreDead that // prevents the store pool from marking stores as dead. TestTimeUntilStoreDeadOff = 24 * time.Hour + + // IOOverloadTrackedRetention is the period over which to accumulate + // statistics on IO overload within a store. + IOOverloadTrackedRetention = time.Minute * 10 ) // FailedReservationsTimeout specifies a duration during which the local @@ -198,6 +203,14 @@ type StoreDetail struct { // LastAvailable is set when it's detected that a store was available, // i.e. we got a liveness heartbeat. LastAvailable time.Time + // storeHealthTracker tracks the store health over the last + // IOOverloadTrackedRetention period, which is 10 minutes. This serves to + // exclude stores based on historical information and not just + // point-in-time information. + storeHealthTracker struct { + NumL0FilesTracker *slidingwindow.Swag + NumL0SublevelsTracker *slidingwindow.Swag + } } // isThrottled returns whether the store is currently throttled. @@ -558,26 +571,50 @@ func (sp *StorePool) statusString(nl NodeLivenessFunc) string { // storeGossipUpdate is the Gossip callback used to keep the StorePool up to date. func (sp *StorePool) storeGossipUpdate(_ string, content roachpb.Value) { var storeDesc roachpb.StoreDescriptor - // We keep copies of the capacity and storeID to pass into the - // capacityChanged callback. - var oldCapacity, curCapacity roachpb.StoreCapacity - var storeID roachpb.StoreID if err := content.GetProto(&storeDesc); err != nil { ctx := sp.AnnotateCtx(context.TODO()) log.Errorf(ctx, "%v", err) return } - storeID = storeDesc.StoreID - curCapacity = storeDesc.Capacity + + sp.storeDescriptorUpdate(storeDesc) +} + +// storeDescriptorUpdate takes a store descriptor and updates the corresponding +// details for the store in the storepool. +func (sp *StorePool) storeDescriptorUpdate(storeDesc roachpb.StoreDescriptor) { + // We keep copies of the capacity and storeID to pass into the + // capacityChanged callback. + var oldCapacity roachpb.StoreCapacity + storeID := storeDesc.StoreID + curCapacity := storeDesc.Capacity + + now := sp.clock.PhysicalTime() sp.DetailsMu.Lock() detail := sp.GetStoreDetailLocked(storeID) + + // We update the store descriptor to reflect the maximum IO Overload values + // that have been seen in the past tracking interval, updating the trackers + // with the new values prior to querying. This means that the most recent + // value for IOThreshold.*Threshold is always used, together with the + // tracked recent maximum IOThreshold.L0NumFiles/L0NumSubLevels to + // calculate the score. + detail.storeHealthTracker.NumL0FilesTracker.Record( + now, float64(curCapacity.IOThreshold.L0NumFiles)) + detail.storeHealthTracker.NumL0SublevelsTracker.Record( + now, float64(curCapacity.IOThreshold.L0NumSubLevels)) + maxL0NumFiles, _ := detail.storeHealthTracker.NumL0FilesTracker.Query(now) + maxL0NumSublevels, _ := detail.storeHealthTracker.NumL0SublevelsTracker.Query(now) + storeDesc.Capacity.IOThreshold.L0NumFiles = int64(maxL0NumFiles) + storeDesc.Capacity.IOThreshold.L0NumSubLevels = int64(maxL0NumSublevels) + if detail.Desc != nil { oldCapacity = detail.Desc.Capacity } detail.Desc = &storeDesc - detail.LastUpdatedTime = sp.clock.PhysicalTime() + detail.LastUpdatedTime = now sp.DetailsMu.Unlock() sp.localitiesMu.Lock() @@ -751,8 +788,16 @@ func (sp *StorePool) UpdateLocalStoresAfterLeaseTransfer( // newStoreDetail makes a new StoreDetail struct. It sets index to be -1 to // ensure that it will be processed by a queue immediately. -func newStoreDetail() *StoreDetail { - return &StoreDetail{} +func newStoreDetail(now time.Time) *StoreDetail { + sd := &StoreDetail{} + // Track the store health, specifically IO overload for the + // IOOverloadTrackedRetention period, using a sliding window with 5 + // buckets. + sd.storeHealthTracker.NumL0FilesTracker = slidingwindow.NewMaxSwag( + now, IOOverloadTrackedRetention/5, 5) + sd.storeHealthTracker.NumL0SublevelsTracker = slidingwindow.NewMaxSwag( + now, IOOverloadTrackedRetention/5, 5) + return sd } // GetStores returns information on all the stores with descriptor in the pool. @@ -781,7 +826,7 @@ func (sp *StorePool) GetStoreDetailLocked(storeID roachpb.StoreID) *StoreDetail // network). The first time this occurs, presume the store is // alive, but start the clock so it will become dead if enough // time passes without updates from gossip. - detail = newStoreDetail() + detail = newStoreDetail(sp.clock.Now().GoTime()) detail.LastUpdatedTime = sp.startTime sp.DetailsMu.StoreDetails[storeID] = detail } @@ -1063,9 +1108,9 @@ type StoreList struct { // eligible to be rebalance targets. candidateWritesPerSecond Stat - // candidateWritesPerSecond tracks L0 sub-level stats for Stores that are - // eligible to be rebalance targets. - CandidateL0Sublevels Stat + // CandidateIOOverload tracks the IO overload stats for Stores that are + // eligible to be rebalance candidates. + CandidateIOOverload Stat } // MakeStoreList constructs a new store list based on the passed in descriptors. @@ -1080,8 +1125,9 @@ func MakeStoreList(descriptors []roachpb.StoreDescriptor) StoreList { sl.candidateLogicalBytes.update(float64(desc.Capacity.LogicalBytes)) sl.CandidateQueriesPerSecond.update(desc.Capacity.QueriesPerSecond) sl.candidateWritesPerSecond.update(desc.Capacity.WritesPerSecond) - sl.CandidateL0Sublevels.update(float64(desc.Capacity.L0Sublevels)) sl.CandidateCPU.update(desc.Capacity.CPUPerSecond) + score, _ := desc.Capacity.IOThreshold.Score() + sl.CandidateIOOverload.update(score) } return sl } @@ -1102,12 +1148,13 @@ func (sl StoreList) String() string { fmt.Fprintf(&buf, " ") } for _, desc := range sl.Stores { - fmt.Fprintf(&buf, " %d: ranges=%d leases=%d disk-usage=%s queries-per-second=%.2f store-cpu-per-second=%s l0-sublevels=%d\n", + ioScore, _ := desc.Capacity.IOThreshold.Score() + fmt.Fprintf(&buf, " %d: ranges=%d leases=%d disk-usage=%s queries-per-second=%.2f store-cpu-per-second=%s io-overload=%.2f\n", desc.StoreID, desc.Capacity.RangeCount, desc.Capacity.LeaseCount, humanizeutil.IBytes(desc.Capacity.LogicalBytes), desc.Capacity.QueriesPerSecond, humanizeutil.Duration(time.Duration(int64(desc.Capacity.CPUPerSecond))), - desc.Capacity.L0Sublevels, + ioScore, ) } return buf.String() diff --git a/pkg/kv/kvserver/allocator/storepool/store_pool_test.go b/pkg/kv/kvserver/allocator/storepool/store_pool_test.go index 51c456d5390e..7d61865638ab 100644 --- a/pkg/kv/kvserver/allocator/storepool/store_pool_test.go +++ b/pkg/kv/kvserver/allocator/storepool/store_pool_test.go @@ -23,6 +23,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/testutils/gossiputil" + "github.com/cockroachdb/cockroach/pkg/util/admission/admissionpb" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -1029,3 +1030,84 @@ func TestNodeLivenessLivenessStatus(t *testing.T) { }) } } + +func TestStorePoolStoreHealthTracker(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + st := cluster.MakeTestingClusterSettings() + stopper, _, clock, sp, _ := CreateTestStorePool(ctx, st, + TestTimeUntilStoreDead, false, /* deterministic */ + func() int { return 1 }, /* nodeCount */ + livenesspb.NodeLivenessStatus_LIVE) + defer stopper.Stop(ctx) + desc := roachpb.StoreDescriptor{ + StoreID: 1, + Node: roachpb.NodeDescriptor{NodeID: 1}, + Capacity: roachpb.StoreCapacity{ + IOThreshold: admissionpb.IOThreshold{ + L0NumSubLevels: 5, + L0NumSubLevelsThreshold: 20, + L0NumFiles: 5, + L0NumFilesThreshold: 1000, + }, + }, + } + + checkIOOverloadScore := func() float64 { + detail, ok := sp.GetStoreDescriptor(1) + require.True(t, ok) + score, _ := detail.Capacity.IOThreshold.Score() + return score + } + + // Intiailly sublevels is 5/20 = 0.25, expect that score. + sp.storeDescriptorUpdate(desc) + require.Equal(t, 0.25, checkIOOverloadScore()) + // Now increase the numfiles to be greater (500/1000) so the score should + // update to 0.5. + desc.Capacity.IOThreshold.L0NumFiles = 500 + sp.storeDescriptorUpdate(desc) + require.Equal(t, 0.5, checkIOOverloadScore()) + // Despite the numfiles now decreasing again to 0, the max tracked should + // be still 500 and therefore the score unchanged. + desc.Capacity.IOThreshold.L0NumFiles = 0 + sp.storeDescriptorUpdate(desc) + require.Equal(t, 0.5, checkIOOverloadScore()) + // Update the threshold, which will be used with the max tracked score + // (500/500). + desc.Capacity.IOThreshold.L0NumFilesThreshold = 500 + sp.storeDescriptorUpdate(desc) + require.Equal(t, 1.0, checkIOOverloadScore()) + // Update the sublevels to be greater than the num files score. + desc.Capacity.IOThreshold.L0NumSubLevels = 40 + sp.storeDescriptorUpdate(desc) + require.Equal(t, 2.0, checkIOOverloadScore()) + // Decreasing the sublevels doesn't affect the max tracked since they are + // recorded at the same time. + desc.Capacity.IOThreshold.L0NumSubLevels = 20 + sp.storeDescriptorUpdate(desc) + require.Equal(t, 2.0, checkIOOverloadScore()) + // Advance past the retention period, so that previous values are + // rotated out. + clock.Advance(IOOverloadTrackedRetention) + desc.Capacity.IOThreshold.L0NumSubLevels = 10 + desc.Capacity.IOThreshold.L0NumSubLevelsThreshold = 20 + desc.Capacity.IOThreshold.L0NumFiles = 0 + desc.Capacity.IOThreshold.L0NumFilesThreshold = 1000 + sp.storeDescriptorUpdate(desc) + require.Equal(t, 0.5, checkIOOverloadScore()) + // Rotate 1/2 the retention period and update sublevels with a lower value, + // this shouldn't be included in the score as it is lower than the maximum + // seen in the retention period. + clock.Advance(IOOverloadTrackedRetention / 2) + desc.Capacity.IOThreshold.L0NumSubLevels = 5 + sp.storeDescriptorUpdate(desc) + require.Equal(t, 0.5, checkIOOverloadScore()) + // Rotate 1/2 the retention period again, this time the sublevels added + // above should now be the maximum (5/20). + clock.Advance(IOOverloadTrackedRetention / 2) + desc.Capacity.IOThreshold.L0NumSubLevels = 0 + sp.storeDescriptorUpdate(desc) + require.Equal(t, 0.25, checkIOOverloadScore()) +} diff --git a/pkg/kv/kvserver/metrics.go b/pkg/kv/kvserver/metrics.go index 1742838b7e03..6fe01d0d50d4 100644 --- a/pkg/kv/kvserver/metrics.go +++ b/pkg/kv/kvserver/metrics.go @@ -18,7 +18,6 @@ import ( "time" "unsafe" - "github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/allocatorimpl" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rangefeed" @@ -30,9 +29,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/cockroachdb/cockroach/pkg/util/metric/aggmetric" - "github.com/cockroachdb/cockroach/pkg/util/slidingwindow" "github.com/cockroachdb/cockroach/pkg/util/syncutil" - "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/pebble" "go.etcd.io/raft/v3/raftpb" ) @@ -1765,13 +1762,6 @@ type StoreMetrics struct { AverageWriteBytesPerSecond *metric.GaugeFloat64 AverageReadBytesPerSecond *metric.GaugeFloat64 AverageCPUNanosPerSecond *metric.GaugeFloat64 - // l0SublevelsWindowedMax doesn't get recorded to metrics itself, it maintains - // an ad-hoc history for gosipping information for allocator use. - l0SublevelsWindowedMax syncutil.AtomicFloat64 - l0SublevelsTracker struct { - syncutil.Mutex - swag *slidingwindow.Swag - } // Follower read metrics. FollowerReadsCount *metric.Counter @@ -2593,19 +2583,6 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics { ReplicaReadBatchWithoutInterleavingIter: metric.NewCounter(metaReplicaReadBatchWithoutInterleavingIter), } - { - // Track the maximum L0 sublevels seen in the last 10 minutes. backed - // by a sliding window, which we record and query indirectly in - // L0SublevelsMax. this is not exported to as metric. - sm.l0SublevelsTracker.swag = slidingwindow.NewMaxSwag( - timeutil.Now(), - allocatorimpl.L0SublevelInterval, - // 5 sliding windows, by the default interval (2 mins) will track the - // maximum for up to 10 minutes. Selected experimentally. - 5, - ) - } - storeRegistry.AddMetricStruct(sm) storeRegistry.AddMetricStruct(sm.LoadSplitterMetrics) return sm @@ -2684,14 +2661,6 @@ func (sm *StoreMetrics) updateEngineMetrics(m storage.Metrics) { sm.DiskStalled.Update(m.DiskStallCount) sm.SharedStorageBytesRead.Update(m.SharedStorageReadBytes) sm.SharedStorageBytesWritten.Update(m.SharedStorageWriteBytes) - - // Update the maximum number of L0 sub-levels seen. - sm.l0SublevelsTracker.Lock() - sm.l0SublevelsTracker.swag.Record(timeutil.Now(), float64(m.Levels[0].Sublevels)) - curMax, _ := sm.l0SublevelsTracker.swag.Query(timeutil.Now()) - sm.l0SublevelsTracker.Unlock() - syncutil.StoreFloat64(&sm.l0SublevelsWindowedMax, curMax) - sm.RdbL0Sublevels.Update(int64(m.Levels[0].Sublevels)) sm.RdbL0NumFiles.Update(m.Levels[0].NumFiles) sm.RdbL0BytesFlushed.Update(int64(m.Levels[0].BytesFlushed)) diff --git a/pkg/kv/kvserver/store.go b/pkg/kv/kvserver/store.go index c2be10157628..78e0038de418 100644 --- a/pkg/kv/kvserver/store.go +++ b/pkg/kv/kvserver/store.go @@ -2631,7 +2631,6 @@ func (s *Store) Capacity(ctx context.Context, useCached bool) (roachpb.StoreCapa var leaseCount int32 var rangeCount int32 var logicalBytes int64 - var l0SublevelsMax int64 var totalQueriesPerSecond float64 var totalWritesPerSecond float64 var totalStoreCPUTimePerSecond float64 @@ -2642,7 +2641,6 @@ func (s *Store) Capacity(ctx context.Context, useCached bool) (roachpb.StoreCapa rankingsByTenantAccumulator := NewTenantReplicaAccumulator() // Query the current L0 sublevels and record the updated maximum to metrics. - l0SublevelsMax = int64(syncutil.LoadFloat64(&s.metrics.l0SublevelsWindowedMax)) newStoreReplicaVisitor(s).Visit(func(r *Replica) bool { rangeCount++ if r.OwnsValidLease(ctx, now) { @@ -2687,7 +2685,6 @@ func (s *Store) Capacity(ctx context.Context, useCached bool) (roachpb.StoreCapa capacity.CPUPerSecond = totalStoreCPUTimePerSecond capacity.QueriesPerSecond = totalQueriesPerSecond capacity.WritesPerSecond = totalWritesPerSecond - capacity.L0Sublevels = l0SublevelsMax { s.ioThreshold.Lock() capacity.IOThreshold = *s.ioThreshold.t diff --git a/pkg/kv/kvserver/store_pool_test.go b/pkg/kv/kvserver/store_pool_test.go index d528247b0977..069839277ab9 100644 --- a/pkg/kv/kvserver/store_pool_test.go +++ b/pkg/kv/kvserver/store_pool_test.go @@ -25,6 +25,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/storage" "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/testutils/gossiputil" + "github.com/cockroachdb/cockroach/pkg/util/admission/admissionpb" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -60,7 +61,12 @@ func TestStorePoolUpdateLocalStore(t *testing.T) { LogicalBytes: 30, QueriesPerSecond: 100, WritesPerSecond: 30, - L0Sublevels: 4, + IOThreshold: admissionpb.IOThreshold{ + L0NumSubLevels: 5, + L0NumSubLevelsThreshold: 20, + L0NumFiles: 5, + L0NumFilesThreshold: 1000, + }, }, }, { @@ -74,7 +80,12 @@ func TestStorePoolUpdateLocalStore(t *testing.T) { LogicalBytes: 25, QueriesPerSecond: 50, WritesPerSecond: 25, - L0Sublevels: 8, + IOThreshold: admissionpb.IOThreshold{ + L0NumSubLevels: 10, + L0NumSubLevelsThreshold: 20, + L0NumFiles: 10, + L0NumFilesThreshold: 1000, + }, }, }, } @@ -130,8 +141,8 @@ func TestStorePoolUpdateLocalStore(t *testing.T) { if expectedWPS := 30 + WPS; desc.Capacity.WritesPerSecond != expectedWPS { t.Errorf("expected WritesPerSecond %f, but got %f", expectedWPS, desc.Capacity.WritesPerSecond) } - if expectedL0Sublevels := int64(4); desc.Capacity.L0Sublevels != expectedL0Sublevels { - t.Errorf("expected L0 Sub-Levels %d, but got %d", expectedL0Sublevels, desc.Capacity.L0Sublevels) + if expectedNumL0Sublevels := int64(5); desc.Capacity.IOThreshold.L0NumSubLevels != expectedNumL0Sublevels { + t.Errorf("expected L0 Sub-Levels %d, but got %d", expectedNumL0Sublevels, desc.Capacity.IOThreshold.L0NumFiles) } sp.UpdateLocalStoreAfterRebalance(roachpb.StoreID(2), rangeUsageInfo, roachpb.REMOVE_VOTER) @@ -151,8 +162,8 @@ func TestStorePoolUpdateLocalStore(t *testing.T) { if expectedWPS := 25 - WPS; desc.Capacity.WritesPerSecond != expectedWPS { t.Errorf("expected WritesPerSecond %f, but got %f", expectedWPS, desc.Capacity.WritesPerSecond) } - if expectedL0Sublevels := int64(8); desc.Capacity.L0Sublevels != expectedL0Sublevels { - t.Errorf("expected L0 Sub-Levels %d, but got %d", expectedL0Sublevels, desc.Capacity.L0Sublevels) + if expectedNumL0Sublevels := int64(10); desc.Capacity.IOThreshold.L0NumSubLevels != expectedNumL0Sublevels { + t.Errorf("expected L0 Sub-Levels %d, but got %d", expectedNumL0Sublevels, desc.Capacity.IOThreshold.L0NumFiles) } sp.UpdateLocalStoresAfterLeaseTransfer(roachpb.StoreID(1), roachpb.StoreID(2), rangeUsageInfo) diff --git a/pkg/kv/kvserver/store_rebalancer_test.go b/pkg/kv/kvserver/store_rebalancer_test.go index 8578fdf6c77b..72bdbaea7941 100644 --- a/pkg/kv/kvserver/store_rebalancer_test.go +++ b/pkg/kv/kvserver/store_rebalancer_test.go @@ -59,7 +59,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 3000, CPUPerSecond: 3000 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold - 10, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold - 10), }, }, { @@ -78,7 +79,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 2800, CPUPerSecond: 2800 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold - 5, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold - 5), }, }, { @@ -97,7 +99,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 2600, CPUPerSecond: 2600 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 2, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 2), }, }, { @@ -116,7 +119,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 2400, CPUPerSecond: 2400 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold - 10, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold - 10), }, }, { @@ -135,7 +139,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 2200, CPUPerSecond: 2200 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold - 3, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold - 3), }, }, { @@ -154,7 +159,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 2000, CPUPerSecond: 2000 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 2, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 2), }, }, { @@ -173,7 +179,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 1800, CPUPerSecond: 1800 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold - 10, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold - 10), }, }, { @@ -192,7 +199,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 1600, CPUPerSecond: 1600 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold - 5, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold - 5), }, }, { @@ -211,7 +219,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 1400, CPUPerSecond: 1400 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 3, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 3), }, }, } @@ -273,7 +282,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 1500, CPUPerSecond: 1500 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold - 15, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold - 15), }, }, { @@ -282,7 +292,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 1300, CPUPerSecond: 1300 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold - 10, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold - 10), }, }, { @@ -291,7 +302,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold - 5, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold - 5), }, }, { @@ -300,7 +312,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 900, CPUPerSecond: 900 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 20, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 20), }, }, { @@ -309,7 +322,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 500, CPUPerSecond: 500 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 25, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 25), }, }, } @@ -323,7 +337,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 100, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 100), }, }, { @@ -332,7 +347,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold - 15, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold - 15), }, }, { @@ -341,7 +357,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 100, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 100), }, }, { @@ -350,7 +367,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold - 15, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold - 15), }, }, { @@ -359,7 +377,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 100, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 100), }, }, } @@ -373,7 +392,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 1500, CPUPerSecond: 1500 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 1, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 1), }, }, { @@ -382,7 +402,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 1300, CPUPerSecond: 1300 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 1, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 1), }, }, { @@ -391,7 +412,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 1, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 1), }, }, { @@ -400,7 +422,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 900, CPUPerSecond: 900 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 1, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 1), }, }, { @@ -409,7 +432,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 500, CPUPerSecond: 500 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 1, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 1), }, }, } @@ -423,7 +447,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 1500, CPUPerSecond: 1500 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 1, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 1), }, }, { @@ -432,7 +457,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 1300, CPUPerSecond: 1300 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 10, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 10), }, }, { @@ -441,7 +467,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 50, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 50), }, }, { @@ -450,7 +477,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 900, CPUPerSecond: 900 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 100, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 100), }, }, { @@ -459,7 +487,8 @@ var ( Capacity: roachpb.StoreCapacity{ QueriesPerSecond: 500, CPUPerSecond: 500 * float64(time.Millisecond), - L0Sublevels: allocatorimpl.MaxL0SublevelThreshold + 100, + IOThreshold: allocatorimpl.TestingIOThresholdWithScore( + allocatorimpl.DefaultIOOverloadThreshold + 100), }, }, } @@ -1783,7 +1812,7 @@ func TestStoreRebalancerReadAmpCheck(t *testing.T) { require.Greater(t, len(rctx.hottestRanges), 0) rctx.options.StoreHealthOptions = allocatorimpl.StoreHealthOptions{ - EnforcementLevel: test.enforcement, L0SublevelThreshold: allocatorimpl.MaxL0SublevelThreshold} + EnforcementLevel: test.enforcement, IOThreshold: allocatorimpl.DefaultIOOverloadThreshold} rctx.options.LoadThreshold = allocatorimpl.WithAllDims(0.05) _, targetVoters, _ := sr.chooseRangeToRebalance(ctx, rctx) diff --git a/pkg/roachpb/metadata.go b/pkg/roachpb/metadata.go index 68482a57bc68..ad5952a2d077 100644 --- a/pkg/roachpb/metadata.go +++ b/pkg/roachpb/metadata.go @@ -508,11 +508,11 @@ func (sc StoreCapacity) String() string { func (sc StoreCapacity) SafeFormat(w redact.SafePrinter, _ rune) { w.Printf("disk (capacity=%s, available=%s, used=%s, logicalBytes=%s), "+ "ranges=%d, leases=%d, queries=%.2f, writes=%.2f, "+ - "l0Sublevels=%d, ioThreshold={%v} bytesPerReplica={%s}, writesPerReplica={%s}", + "ioThreshold={%v} bytesPerReplica={%s}, writesPerReplica={%s}", humanizeutil.IBytes(sc.Capacity), humanizeutil.IBytes(sc.Available), humanizeutil.IBytes(sc.Used), humanizeutil.IBytes(sc.LogicalBytes), sc.RangeCount, sc.LeaseCount, sc.QueriesPerSecond, sc.WritesPerSecond, - sc.L0Sublevels, sc.IOThreshold, sc.BytesPerReplica, sc.WritesPerReplica) + sc.IOThreshold, sc.BytesPerReplica, sc.WritesPerReplica) } // FractionUsed computes the fraction of storage capacity that is in use. diff --git a/pkg/roachpb/metadata.proto b/pkg/roachpb/metadata.proto index a3ab852f85f2..5e3828625b18 100644 --- a/pkg/roachpb/metadata.proto +++ b/pkg/roachpb/metadata.proto @@ -318,11 +318,6 @@ message StoreCapacity { // by ranges in the store. The stat is tracked over the time period defined // in storage/replica_stats.go, which as of July 2018 is 30 minutes. optional double writes_per_second = 5 [(gogoproto.nullable) = false]; - // l0_sublevels tracks the current number of l0 sublevels in the store. - // TODO(kvoli): Use of this field will need to be version-gated, to avoid - // instances where overlapping node-binary versions within a cluster result - // in this this field missing. - optional int64 l0_sublevels = 12 [(gogoproto.nullable) = false]; // cpu_per_second tracks the average store cpu use (ns) per second. // This is the sum of all the replica's cpu time on this store, which is // tracked in replica stats. @@ -334,6 +329,7 @@ message StoreCapacity { optional Percentiles bytes_per_replica = 6 [(gogoproto.nullable) = false]; optional Percentiles writes_per_replica = 7 [(gogoproto.nullable) = false]; reserved 11; + reserved 12; } // StoreProperties contains configuration and OS-level details for a storage device.