Skip to content

Commit

Permalink
storage: updating local Store before gossip should not crash
Browse files Browse the repository at this point in the history
Updating a target store write stats immediately after rebalancing was
recently addressed in cockroachdb#18425. With that change, if `updateLocalStoreAfterRebalance`
is called before the `StorePool` had seen the `StoreDescriptor` in gossip,
it will trigger a NPE. This change fixes this by making the update a
no-op if the descriptor has yet to be seen in gossip.
  • Loading branch information
nvanbenschoten authored and a-robinson committed Dec 20, 2017
1 parent 6ef56ae commit 4893268
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 5 deletions.
10 changes: 8 additions & 2 deletions pkg/storage/store_pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,14 +327,20 @@ func (sp *StorePool) deadReplicasGossipUpdate(_ string, content roachpb.Value) {
detail.deadReplicas = deadReplicas
}

// updateLocalStoreAfterRebalance is used to update local copy of target store
// after we make an rebalance immediately.
// updateLocalStoreAfterRebalance is used to update the local copy of the
// target store immediately after a rebalance.
func (sp *StorePool) updateLocalStoreAfterRebalance(
storeID roachpb.StoreID, repl *Replica, changeType roachpb.ReplicaChangeType,
) {
sp.detailsMu.Lock()
defer sp.detailsMu.Unlock()
detail := *sp.getStoreDetailLocked(storeID)
if detail.desc == nil {
// We don't have this store yet (this is normal when we're
// starting up and don't have full information from the gossip
// network). We can't update the local store at this time.
return
}
switch changeType {
case roachpb.ADD_REPLICA:
detail.desc.Capacity.LogicalBytes += repl.GetMVCCStats().Total()
Expand Down
45 changes: 42 additions & 3 deletions pkg/storage/store_pool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ import (
"testing"
"time"

"github.com/cockroachdb/cockroach/pkg/storage/engine"
"github.com/cockroachdb/cockroach/pkg/storage/engine/enginepb"

"github.com/pkg/errors"
"golang.org/x/net/context"

Expand All @@ -33,6 +30,8 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/rpc"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/storage/engine"
"github.com/cockroachdb/cockroach/pkg/storage/engine/enginepb"
"github.com/cockroachdb/cockroach/pkg/testutils/gossiputil"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
Expand Down Expand Up @@ -405,6 +404,46 @@ func TestStorePoolUpdateLocalStore(t *testing.T) {
}
}

// TestStorePoolUpdateLocalStoreBeforeGossip verifies that an attempt to update
// the local copy of store before that store has been gossiped will be a no-op.
func TestStorePoolUpdateLocalStoreBeforeGossip(t *testing.T) {
defer leaktest.AfterTest(t)()
manual := hlc.NewManualClock(123)
clock := hlc.NewClock(manual.UnixNano, time.Nanosecond)
stopper, _, _, sp, _ := createTestStorePool(
TestTimeUntilStoreDead, false /* deterministic */, nodeStatusDead)
defer stopper.Stop(context.TODO())

// Create store.
node := roachpb.NodeDescriptor{NodeID: roachpb.NodeID(1)}
eng := engine.NewInMem(roachpb.Attributes{}, 1<<20)
stopper.AddCloser(eng)
cfg := TestStoreConfig(clock)
cfg.Transport = NewDummyRaftTransport(cfg.Settings)
store := NewStore(cfg, eng, &node)

// Create replica.
rg := roachpb.RangeDescriptor{
RangeID: 1,
StartKey: roachpb.RKey([]byte("a")),
EndKey: roachpb.RKey([]byte("b")),
}
replica, err := NewReplica(&rg, store, roachpb.ReplicaID(0))
if err != nil {
t.Fatalf("make replica error : %s", err)
}

// Update StorePool, which should be a no-op.
storeID := roachpb.StoreID(1)
if _, ok := sp.getStoreDescriptor(storeID); ok {
t.Fatalf("StoreDescriptor not gossiped, should not be found")
}
sp.updateLocalStoreAfterRebalance(storeID, replica, roachpb.ADD_REPLICA)
if _, ok := sp.getStoreDescriptor(storeID); ok {
t.Fatalf("StoreDescriptor still not gossiped, should not be found")
}
}

func TestStorePoolGetStoreDetails(t *testing.T) {
defer leaktest.AfterTest(t)()
stopper, g, _, sp, _ := createTestStorePool(
Expand Down

0 comments on commit 4893268

Please sign in to comment.