Skip to content

Commit

Permalink
gossip: Don't check if the network should be tightened so often
Browse files Browse the repository at this point in the history
Touches cockroachdb#51838. See the thread there for more detail, but the gist is
that tightenNetwork gets called hundreds of times per second (taking a
full mutex lock each time) on large clusters, even when nothing needs
tightening.

Release note (performance improvement): Avoid wasteful contention on the
gossip mutex caused by checking if the network needs tightening hundreds
of times per second.
  • Loading branch information
a-robinson committed Sep 22, 2022
1 parent 87bd307 commit 3637fe0
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 0 deletions.
15 changes: 15 additions & 0 deletions pkg/gossip/gossip.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ const (
// StoreTTL is time-to-live for store-related info.
StoreTTL = 2 * StoresInterval

// gossipTightenInterval is how long to wait between tightenNetwork checks if
// we didn't need to tighten the last time we checked.
gossipTightenInterval = time.Second

unknownNodeID roachpb.NodeID = 0
)

Expand Down Expand Up @@ -1462,12 +1466,23 @@ func jitteredInterval(interval time.Duration) time.Duration {
func (g *Gossip) tightenNetwork(ctx context.Context) {
g.mu.Lock()
defer g.mu.Unlock()

now := time.Now()
if now.Before(g.mu.lastTighten.Add(gossipTightenInterval)) {
// It hasn't been long since we last tightened the network, so skip it.
return
}
g.mu.lastTighten = now

if g.outgoing.hasSpace() {
distantNodeID, distantHops := g.mu.is.mostDistant(g.hasOutgoingLocked)
log.VEventf(ctx, 2, "distantHops: %d from %d", distantHops, distantNodeID)
if distantHops <= maxHops {
return
}
// If tightening is needed, then reset lastTighten to avoid restricting how
// soon we try again.
g.mu.lastTighten = time.Time{}
if nodeAddr, err := g.getNodeIDAddress(distantNodeID, true /* locked */); err != nil || nodeAddr == nil {
log.Health.Errorf(ctx, "unable to get address for n%d: %s", distantNodeID, err)
} else {
Expand Down
8 changes: 8 additions & 0 deletions pkg/gossip/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ type server struct {
// composable. There's an open proposal to add them:
// https://github.com/golang/go/issues/16620
ready chan struct{}
// The time at which we last checked if the network should be tightened.
// Used to avoid burning CPU and mutex cycles on checking too frequently.
lastTighten time.Time
}
tighten chan struct{} // Sent on when we may want to tighten the network

Expand Down Expand Up @@ -344,6 +347,11 @@ func (s *server) gossipReceiver(
}

func (s *server) maybeTightenLocked() {
now := time.Now()
if now.Before(s.mu.lastTighten.Add(gossipTightenInterval)) {
// It hasn't been long since we last tightened the network, so skip it.
return
}
select {
case s.tighten <- struct{}{}:
default:
Expand Down

0 comments on commit 3637fe0

Please sign in to comment.