Skip to content

Commit

Permalink
upgrade: retry errors when dialing instances
Browse files Browse the repository at this point in the history
Release note: None
Epic: none
Closes cockroachdb#108860
  • Loading branch information
healthy-pod committed Sep 6, 2023
1 parent ad4e53f commit 1f6e734
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions pkg/upgrade/upgradecluster/tenant_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/retry"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/redact"
"google.golang.org/grpc"
)

// TenantCluster represents the set of sql nodes running in a secondary tenant.
Expand Down Expand Up @@ -228,8 +229,19 @@ func (t *TenantCluster) ForEveryNodeOrServer(
grp.GoCtx(func(ctx context.Context) error {
defer alloc.Release()

conn, err := t.Dialer.Dial(ctx, roachpb.NodeID(instance.InstanceID), rpc.DefaultClass)
if err != nil {
var conn *grpc.ClientConn
retryOpts := retry.Options{
InitialBackoff: 0,
MaxRetries: 2,
MaxBackoff: 10 * time.Millisecond,
}
// This retry was added to benefit our tests (not users) by reducing the chance of
// test flakes due to network issues.
if err := retry.WithMaxAttempts(ctx, retryOpts, retryOpts.MaxRetries+1, func() error {
var err error
conn, err = t.Dialer.Dial(ctx, roachpb.NodeID(instance.InstanceID), rpc.DefaultClass)
return err
}); err != nil {
if errors.HasType(err, (*netutil.InitialHeartbeatFailedError)(nil)) {
if errors.Is(err, rpc.VersionCompatError) {
return errors.WithHint(errors.Newf("upgrade failed due to active SQL servers with incompatible binary version(s)"),
Expand Down

0 comments on commit 1f6e734

Please sign in to comment.