diff --git a/internal/locate/region_request.go b/internal/locate/region_request.go index 2a50c3a8ef..ea4a62af13 100644 --- a/internal/locate/region_request.go +++ b/internal/locate/region_request.go @@ -1791,7 +1791,7 @@ func (s *RegionRequestSender) onRegionError(bo *retry.Backoffer, ctx *RPCContext return true, nil } } - logutil.Logger(bo.GetCtx()).Warn( + logutil.Logger(bo.GetCtx()).Debug( "tikv reports `ServerIsBusy` retry later", zap.String("reason", regionErr.GetServerIsBusy().GetReason()), zap.Stringer("ctx", ctx)) @@ -1895,7 +1895,7 @@ func (s *RegionRequestSender) onRegionError(bo *retry.Backoffer, ctx *RPCContext // This error is specific to stale read and the target replica is randomly selected. If the request is sent // to the leader, the data must be ready, so we don't backoff here. if regionErr.GetDataIsNotReady() != nil { - logutil.BgLogger().Warn("tikv reports `DataIsNotReady` retry later", + logutil.BgLogger().Debug("tikv reports `DataIsNotReady` retry later", zap.Uint64("store-id", ctx.Store.storeID), zap.Uint64("peer-id", regionErr.GetDataIsNotReady().GetPeerId()), zap.Uint64("region-id", regionErr.GetDataIsNotReady().GetRegionId()), diff --git a/internal/retry/backoff.go b/internal/retry/backoff.go index bdefc79930..03ba5e38e1 100644 --- a/internal/retry/backoff.go +++ b/internal/retry/backoff.go @@ -143,7 +143,13 @@ func (b *Backoffer) BackoffWithCfgAndMaxSleep(cfg *Config, maxSleepMs int, err e if b.noop { return err } - if b.maxSleep > 0 && (b.totalSleep-b.excludedSleep) >= b.maxSleep { + maxBackoffTimeExceeded := (b.totalSleep - b.excludedSleep) >= b.maxSleep + maxExcludedTimeExceeded := false + if maxLimit, ok := isSleepExcluded[cfg.name]; ok { + maxExcludedTimeExceeded = b.excludedSleep >= maxLimit && b.excludedSleep >= b.maxSleep + } + maxTimeExceeded := maxBackoffTimeExceeded || maxExcludedTimeExceeded + if b.maxSleep > 0 && maxTimeExceeded { longestSleepCfg, longestSleepTime := b.longestSleepCfg() errMsg := fmt.Sprintf("%s backoffer.maxSleep %dms is exceeded, errors:", cfg.String(), b.maxSleep) for i, err := range b.errors { @@ -163,7 +169,8 @@ func (b *Backoffer) BackoffWithCfgAndMaxSleep(cfg *Config, maxSleepMs int, err e backoffDetail.WriteString(":") backoffDetail.WriteString(strconv.Itoa(times)) } - errMsg += fmt.Sprintf("\ntotal-backoff-times: %v, backoff-detail: %v", totalTimes, backoffDetail.String()) + errMsg += fmt.Sprintf("\ntotal-backoff-times: %v, backoff-detail: %v, maxBackoffTimeExceeded: %v, maxExcludedTimeExceeded: %v", + totalTimes, backoffDetail.String(), maxBackoffTimeExceeded, maxExcludedTimeExceeded) returnedErr := err if longestSleepCfg != nil { errMsg += fmt.Sprintf("\nlongest sleep type: %s, time: %dms", longestSleepCfg.String(), longestSleepTime) diff --git a/internal/retry/backoff_test.go b/internal/retry/backoff_test.go index a4c8dfe429..1668c5d5d9 100644 --- a/internal/retry/backoff_test.go +++ b/internal/retry/backoff_test.go @@ -95,3 +95,15 @@ func TestBackoffDeepCopy(t *testing.T) { assert.ErrorIs(t, err, BoMaxDataNotReady.err) } } + +func TestBackoffWithMaxExcludedExceed(t *testing.T) { + setBackoffExcluded(BoTiKVServerBusy.name, 1) + b := NewBackofferWithVars(context.TODO(), 1, nil) + err := b.Backoff(BoTiKVServerBusy, errors.New("server is busy")) + assert.Nil(t, err) + + // As the total excluded sleep is greater than the max limited value, error should be returned. + err = b.Backoff(BoTiKVServerBusy, errors.New("server is busy")) + assert.NotNil(t, err) + assert.Greater(t, b.excludedSleep, b.maxSleep) +} diff --git a/internal/retry/config.go b/internal/retry/config.go index 19632d9f40..16032edda3 100644 --- a/internal/retry/config.go +++ b/internal/retry/config.go @@ -129,11 +129,18 @@ var ( BoTxnLockFast = NewConfig(txnLockFastName, &metrics.BackoffHistogramLockFast, NewBackoffFnCfg(2, 3000, EqualJitter), tikverr.ErrResolveLockTimeout) ) -var isSleepExcluded = map[string]struct{}{ - BoTiKVServerBusy.name: {}, +var isSleepExcluded = map[string]int{ + BoTiKVServerBusy.name: 600000, // The max excluded limit is 10min. // add BoTiFlashServerBusy if appropriate } +// setBackoffExcluded is used for test only. +func setBackoffExcluded(name string, maxVal int) { + if _, ok := isSleepExcluded[name]; ok { + isSleepExcluded[name] = maxVal + } +} + const ( // NoJitter makes the backoff sequence strict exponential. NoJitter = 1 + iota