diff --git a/pkg/storage/allocator_scorer.go b/pkg/storage/allocator_scorer.go index d88f582aed5d..d7aee00b0861 100644 --- a/pkg/storage/allocator_scorer.go +++ b/pkg/storage/allocator_scorer.go @@ -217,6 +217,12 @@ func (c candidate) compare(o candidate) float64 { } return -(1 + (o.balanceScore.totalScore()-c.balanceScore.totalScore())/10.0) } + // Sometimes we compare partially-filled in candidates, e.g. those with + // diversity scores filled in but not balance scores or range counts. This + // avoids returning NaN in such cases. + if c.rangeCount == 0 && o.rangeCount == 0 { + return 0 + } if c.rangeCount < o.rangeCount { return float64(o.rangeCount-c.rangeCount) / float64(o.rangeCount) } @@ -303,8 +309,7 @@ func (cl candidateList) best() candidateList { for i := 1; i < len(cl); i++ { if cl[i].necessary != cl[0].necessary || cl[i].diversityScore < cl[0].diversityScore || - (cl[i].diversityScore == cl[len(cl)-1].diversityScore && - cl[i].convergesScore < cl[len(cl)-1].convergesScore) { + cl[i].convergesScore < cl[0].convergesScore { return cl[:i] } } @@ -623,7 +628,7 @@ func rebalanceCandidates( comparableCands = append(comparableCands, cand) if !needRebalanceFrom && !needRebalanceTo && existing.cand.less(cand) { needRebalanceTo = true - log.VEventf(ctx, 2, "s%dshould-rebalance(necessary/diversity=s%d): oldNecessary:%t, newNecessary:%t, oldDiversity:%f, newDiversity:%f, locality:%q", + log.VEventf(ctx, 2, "s%d: should-rebalance(necessary/diversity=s%d): oldNecessary:%t, newNecessary:%t, oldDiversity:%f, newDiversity:%f, locality:%q", existing.cand.store.StoreID, store.StoreID, existing.cand.necessary, cand.necessary, existing.cand.diversityScore, cand.diversityScore, store.Node.Locality) } diff --git a/pkg/storage/allocator_test.go b/pkg/storage/allocator_test.go index d86473f99d51..938a05137f9f 100644 --- a/pkg/storage/allocator_test.go +++ b/pkg/storage/allocator_test.go @@ -1163,6 +1163,217 @@ func TestAllocatorTransferLeaseTarget(t *testing.T) { } } +func TestAllocatorRebalanceDifferentLocalitySizes(t *testing.T) { + defer leaktest.AfterTest(t)() + + stopper, g, _, a, _ := createTestAllocator( /* deterministic */ false) + ctx := context.Background() + defer stopper.Stop(ctx) + + // Set up 8 stores -- 2 in each of the first 2 localities, and 4 in the third. + // Because of the desire for diversity, the nodes in the small localities end + // up being fuller than the nodes in the large locality. In the past this has + // caused an over-eagerness to rebalance to nodes in the large locality, and + // not enough willingness to rebalance within the small localities. This test + // verifies that we compare fairly amongst stores that will givve the store + // an optimal diversity score, not considering the fullness of those that + // will make for worse diversity. + stores := []*roachpb.StoreDescriptor{ + { + StoreID: 1, + Node: roachpb.NodeDescriptor{ + NodeID: roachpb.NodeID(1), + Locality: roachpb.Locality{ + Tiers: []roachpb.Tier{{Key: "locale", Value: "1"}}, + }, + }, + Capacity: testStoreCapacitySetup(50, 50), + }, + { + StoreID: 2, + Node: roachpb.NodeDescriptor{ + NodeID: roachpb.NodeID(2), + Locality: roachpb.Locality{ + Tiers: []roachpb.Tier{{Key: "locale", Value: "1"}}, + }, + }, + Capacity: testStoreCapacitySetup(40, 60), + }, + { + StoreID: 3, + Node: roachpb.NodeDescriptor{ + NodeID: roachpb.NodeID(3), + Locality: roachpb.Locality{ + Tiers: []roachpb.Tier{{Key: "locale", Value: "2"}}, + }, + }, + Capacity: testStoreCapacitySetup(50, 50), + }, + { + StoreID: 4, + Node: roachpb.NodeDescriptor{ + NodeID: roachpb.NodeID(4), + Locality: roachpb.Locality{ + Tiers: []roachpb.Tier{{Key: "locale", Value: "2"}}, + }, + }, + Capacity: testStoreCapacitySetup(40, 60), + }, + { + StoreID: 5, + Node: roachpb.NodeDescriptor{ + NodeID: roachpb.NodeID(5), + Locality: roachpb.Locality{ + Tiers: []roachpb.Tier{{Key: "locale", Value: "3"}}, + }, + }, + Capacity: testStoreCapacitySetup(90, 10), + }, + { + StoreID: 6, + Node: roachpb.NodeDescriptor{ + NodeID: roachpb.NodeID(6), + Locality: roachpb.Locality{ + Tiers: []roachpb.Tier{{Key: "locale", Value: "3"}}, + }, + }, + Capacity: testStoreCapacitySetup(80, 20), + }, + { + StoreID: 7, + Node: roachpb.NodeDescriptor{ + NodeID: roachpb.NodeID(7), + Locality: roachpb.Locality{ + Tiers: []roachpb.Tier{{Key: "locale", Value: "3"}}, + }, + }, + Capacity: testStoreCapacitySetup(80, 20), + }, + { + StoreID: 8, + Node: roachpb.NodeDescriptor{ + NodeID: roachpb.NodeID(8), + Locality: roachpb.Locality{ + Tiers: []roachpb.Tier{{Key: "locale", Value: "3"}}, + }, + }, + Capacity: testStoreCapacitySetup(80, 20), + }, + } + + sg := gossiputil.NewStoreGossiper(g) + sg.GossipStores(stores, t) + + replicas := func(storeIDs ...roachpb.StoreID) []roachpb.ReplicaDescriptor { + res := make([]roachpb.ReplicaDescriptor, len(storeIDs)) + for i, storeID := range storeIDs { + res[i].NodeID = roachpb.NodeID(storeID) + res[i].StoreID = storeID + } + return res + } + + testCases := []struct { + existing []roachpb.ReplicaDescriptor + expected roachpb.StoreID // 0 if no rebalance is expected + }{ + {replicas(1, 3, 5), 0}, + {replicas(2, 3, 5), 1}, + {replicas(1, 4, 5), 3}, + {replicas(1, 3, 6), 5}, + {replicas(1, 5, 6), 3}, + {replicas(2, 5, 6), 3}, + {replicas(3, 5, 6), 1}, + {replicas(4, 5, 6), 1}, + } + + for i, tc := range testCases { + result, details := a.RebalanceTarget( + ctx, + nil, /* constraints */ + nil, /* raftStatus */ + testRangeInfo(tc.existing, firstRange), + storeFilterThrottled, + false, + ) + var resultID roachpb.StoreID + if result != nil { + resultID = result.StoreID + } + if resultID != tc.expected { + t.Errorf("%d: RebalanceTarget(%v) expected s%d; got %v: %s", i, tc.existing, tc.expected, result, details) + } + } + + // Add a couple less full nodes in a fourth locality, then run a few more tests: + stores = append(stores, &roachpb.StoreDescriptor{ + StoreID: 9, + Node: roachpb.NodeDescriptor{ + NodeID: roachpb.NodeID(9), + Locality: roachpb.Locality{ + Tiers: []roachpb.Tier{{Key: "locale", Value: "4"}}, + }, + }, + Capacity: testStoreCapacitySetup(70, 30), + }) + stores = append(stores, &roachpb.StoreDescriptor{ + StoreID: 10, + Node: roachpb.NodeDescriptor{ + NodeID: roachpb.NodeID(10), + Locality: roachpb.Locality{ + Tiers: []roachpb.Tier{{Key: "locale", Value: "4"}}, + }, + }, + Capacity: testStoreCapacitySetup(60, 40), + }) + + sg.GossipStores(stores, t) + + testCases = []struct { + existing []roachpb.ReplicaDescriptor + expected roachpb.StoreID // 0 if no rebalance is expected + }{ + {replicas(1, 3, 5), 9}, + {replicas(2, 3, 5), 9}, + {replicas(1, 4, 5), 9}, + {replicas(1, 3, 6), 9}, + {replicas(1, 5, 6), 9}, + {replicas(2, 5, 6), 9}, + {replicas(3, 5, 6), 9}, + {replicas(4, 5, 6), 9}, + {replicas(5, 6, 7), 9}, + {replicas(1, 5, 9), 0}, + {replicas(3, 5, 9), 0}, + {replicas(1, 3, 9), 5}, + {replicas(1, 3, 10), 5}, + // This last case is a bit more interesting - the difference in range count + // between s10 an s9 is significant enough to motivate a rebalance if they + // were the only two valid options, but they're both considered underful + // relative to the other equally valid placement options (s3 and s4), so + // the system doesn't consider it helpful to rebalance between them. It'd + // prefer to move replicas onto both s9 and s10 from other stores. + {replicas(1, 5, 10), 0}, + } + + for i, tc := range testCases { + result, details := a.RebalanceTarget( + ctx, + nil, /* constraints */ + nil, /* raftStatus */ + testRangeInfo(tc.existing, firstRange), + storeFilterThrottled, + false, + ) + var resultID roachpb.StoreID + if result != nil { + resultID = result.StoreID + } + if resultID != tc.expected { + t.Errorf("%d: RebalanceTarget(%v) expected s%d; got %v: %s", i, tc.existing, tc.expected, result, details) + } + } +} + func TestAllocatorTransferLeaseTargetMultiStore(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, a, _ := createTestAllocator( /* deterministic */ true)