Skip to content

Commit

Permalink
storage: Avoid replica thrashing when localities are different sizes
Browse files Browse the repository at this point in the history
Fixes cockroachdb#20241

Release note (bug fix): avoid rebalance thrashing when localities have
very different numbers of nodes
  • Loading branch information
a-robinson committed Dec 15, 2017
1 parent afa184d commit ff35505
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 8 deletions.
42 changes: 39 additions & 3 deletions pkg/storage/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,9 @@ func (a Allocator) RebalanceTarget(
if target == nil {
return nil, ""
}
// We could make a simulation here to verify whether we'll remove the target we'll rebalance to.

// Determine whether we'll just remove the target immediately after adding it.
// If we would, we don't want to actually do the rebalance.
for len(candidates) > 0 {
newReplica := roachpb.ReplicaDescriptor{
NodeID: target.store.Node.NodeID,
Expand All @@ -542,7 +544,7 @@ func (a Allocator) RebalanceTarget(
raftStatus, desc.Replicas, newReplica.ReplicaID)
}

removeReplica, _, err := a.simulateRemoveTarget(
removeReplica, details, err := a.simulateRemoveTarget(
ctx,
target.store.StoreID,
constraints,
Expand All @@ -553,7 +555,7 @@ func (a Allocator) RebalanceTarget(
log.Warningf(ctx, "simulating RemoveTarget failed: %s", err)
return nil, ""
}
if removeReplica.StoreID != target.store.StoreID {
if shouldRebalanceToFrom(ctx, *target, removeReplica, existingCandidates, details, options) {
break
}
// Remove the considered target from our modified RangeDescriptor and from
Expand All @@ -577,6 +579,40 @@ func (a Allocator) RebalanceTarget(
return &target.store, string(details)
}

func shouldRebalanceToFrom(
ctx context.Context,
to candidate,
from roachpb.ReplicaDescriptor,
existingCandidates candidateList,
removeDetails string,
options scorerOptions,
) bool {
if from.StoreID == to.store.StoreID {
log.VEventf(ctx, 2, "not rebalancing to s%d because we'd immediately remove it: %s",
to.store.StoreID, removeDetails)
return false
}

// It's possible that we initially decided to rebalance based on comparing
// rebalance candidates in one locality to an existing replica in another
// locality (e.g. if one locality has many more nodes than another). This can
// make for unnecessary rebalances and even thrashing, so do a more direct
// comparison here of the replicas we'll actually be adding and removing.
for _, removeCandidate := range existingCandidates {
if removeCandidate.store.StoreID == from.StoreID {
if removeCandidate.worthRebalancingTo(to, options) {
return true
}
log.VEventf(ctx, 2, "not rebalancing to %s because it isn't an improvement over "+
"what we'd remove after adding it: %s", to, removeCandidate)
return false
}
}
// If the code reaches this point, from must be a non-live store, so let the
// rebalance happen.
return true
}

func (a *Allocator) scorerOptions(disableStatsBasedRebalancing bool) scorerOptions {
return scorerOptions{
deterministic: a.storePool.deterministic,
Expand Down
32 changes: 32 additions & 0 deletions pkg/storage/allocator_scorer.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,38 @@ func (c candidate) less(o candidate) bool {
return c.rangeCount > o.rangeCount
}

// worthRebalancingTo returns true if o is enough of a better fit for some
// range than c is that it's worth rebalancing from c to o.
func (c candidate) worthRebalancingTo(o candidate, options scorerOptions) bool {
if !o.valid {
return false
}
if !c.valid {
return true
}
if c.constraintScore != o.constraintScore {
return c.constraintScore < o.constraintScore
}
if c.convergesScore != o.convergesScore {
return c.convergesScore < o.convergesScore
}
// You might intuitively think that we should require o's balanceScore to
// be considerably higher than c's balanceScore, but that will effectively
// rule out rebalancing in clusters where one locality is much larger or
// smaller than the others, since all the stores in that locality will tend
// to have either a maximal or minimal balanceScore.
if c.balanceScore.totalScore() != o.balanceScore.totalScore() {
return c.balanceScore.totalScore() < o.balanceScore.totalScore()
}
// Instead, just require a gap between their number of ranges. This isn't
// great, particularly for stats-based rebalancing, but it only breaks
// balanceScore ties and it's a workable stop-gap on the way to something
// like #20751.
avgRangeCount := float64(c.rangeCount+o.rangeCount) / 2.0
overfullThreshold := math.Max(overfullRangeThreshold(options, avgRangeCount), avgRangeCount+1.5)
return float64(c.rangeCount) > overfullThreshold
}

type candidateList []candidate

func (cl candidateList) String() string {
Expand Down
10 changes: 5 additions & 5 deletions pkg/storage/allocator_scorer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ var (
testStoreEurope = roachpb.StoreID(5) // eur-a-1-5

testStores = map[roachpb.StoreID]roachpb.StoreDescriptor{
testStoreUSa15: roachpb.StoreDescriptor{
testStoreUSa15: {
StoreID: testStoreUSa15,
Attrs: roachpb.Attributes{
Attrs: []string{"a"},
Expand All @@ -356,7 +356,7 @@ var (
},
Capacity: testStoreCapacitySetup(1, 99),
},
testStoreUSa15Dupe: roachpb.StoreDescriptor{
testStoreUSa15Dupe: {
StoreID: testStoreUSa15Dupe,
Attrs: roachpb.Attributes{
Attrs: []string{"a"},
Expand All @@ -369,7 +369,7 @@ var (
},
Capacity: testStoreCapacitySetup(1, 99),
},
testStoreUSa1: roachpb.StoreDescriptor{
testStoreUSa1: {
StoreID: testStoreUSa1,
Attrs: roachpb.Attributes{
Attrs: []string{"a", "b"},
Expand All @@ -382,7 +382,7 @@ var (
},
Capacity: testStoreCapacitySetup(100, 0),
},
testStoreUSb: roachpb.StoreDescriptor{
testStoreUSb: {
StoreID: testStoreUSb,
Attrs: roachpb.Attributes{
Attrs: []string{"a", "b", "c"},
Expand All @@ -395,7 +395,7 @@ var (
},
Capacity: testStoreCapacitySetup(50, 50),
},
testStoreEurope: roachpb.StoreDescriptor{
testStoreEurope: {
StoreID: testStoreEurope,
Node: roachpb.NodeDescriptor{
NodeID: roachpb.NodeID(testStoreEurope),
Expand Down
38 changes: 38 additions & 0 deletions pkg/storage/allocator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,44 @@ func TestAllocatorRebalanceTarget(t *testing.T) {
t.Fatalf("expected no rebalance, but got target s%d; details: %s", result.StoreID, details)
}
}

// Set up a second round of testing where the other two stores in the big
// locality actually have fewer replicas, but enough that it still isn't
// worth rebalancing to them.
stores[1].Capacity.RangeCount = 46
stores[2].Capacity.RangeCount = 46
sg.GossipStores(stores, t)
for i := 0; i < 10; i++ {
result, details := a.RebalanceTarget(
context.Background(),
config.Constraints{},
status,
rangeInfo,
storeFilterThrottled,
false,
)
if result != nil {
t.Fatalf("expected no rebalance, but got target s%d; details: %s", result.StoreID, details)
}
}

// Make sure rebalancing does happen if we drop just a little further down.
stores[1].Capacity.RangeCount = 45
sg.GossipStores(stores, t)
for i := 0; i < 10; i++ {
result, details := a.RebalanceTarget(
context.Background(),
config.Constraints{},
status,
rangeInfo,
storeFilterThrottled,
false,
)
if result == nil || result.StoreID != stores[1].StoreID {
t.Fatalf("expected rebalance to s%d, but got %v; details: %s",
stores[1].StoreID, result, details)
}
}
}

func TestAllocatorRebalanceDeadNodes(t *testing.T) {
Expand Down

0 comments on commit ff35505

Please sign in to comment.