diff --git a/br/pkg/lightning/common/BUILD.bazel b/br/pkg/lightning/common/BUILD.bazel index 97cdd59e09d93..4283b55552825 100644 --- a/br/pkg/lightning/common/BUILD.bazel +++ b/br/pkg/lightning/common/BUILD.bazel @@ -136,6 +136,7 @@ go_test( "@com_github_stretchr_testify//require", "@org_golang_google_grpc//codes", "@org_golang_google_grpc//status", + "@org_golang_x_time//rate", "@org_uber_go_goleak//:goleak", "@org_uber_go_multierr//:multierr", ], diff --git a/br/pkg/lightning/common/retry.go b/br/pkg/lightning/common/retry.go index 15fb4e78cb023..8fba7eadc9a67 100644 --- a/br/pkg/lightning/common/retry.go +++ b/br/pkg/lightning/common/retry.go @@ -40,6 +40,8 @@ var retryableErrorMsgList = []string{ // this error happens on when distsql.Checksum calls TiKV // see https://github.com/pingcap/tidb/blob/2c3d4f1ae418881a95686e8b93d4237f2e76eec6/store/copr/coprocessor.go#L941 "coprocessor task terminated due to exceeding the deadline", + // fix https://github.com/pingcap/tidb/issues/51383 + "rate: wait", } func isRetryableFromErrorMessage(err error) bool { diff --git a/br/pkg/lightning/common/retry_test.go b/br/pkg/lightning/common/retry_test.go index 974493736ab16..c3bc7bd6c4ff8 100644 --- a/br/pkg/lightning/common/retry_test.go +++ b/br/pkg/lightning/common/retry_test.go @@ -21,6 +21,7 @@ import ( "net" "net/url" "testing" + "time" "github.com/go-sql-driver/mysql" "github.com/pingcap/errors" @@ -28,6 +29,7 @@ import ( drivererr "github.com/pingcap/tidb/pkg/store/driver/error" "github.com/stretchr/testify/require" "go.uber.org/multierr" + "golang.org/x/time/rate" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" ) @@ -117,4 +119,13 @@ func TestIsRetryableError(t *testing.T) { require.False(t, IsRetryableError(multierr.Combine(context.Canceled, &net.DNSError{IsTimeout: true}))) require.True(t, IsRetryableError(errors.New("other error: Coprocessor task terminated due to exceeding the deadline"))) + + // error from limiter + l := rate.NewLimiter(rate.Limit(1), 1) + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + // context has 1 second timeout, can't wait for 10 seconds + err = l.WaitN(ctx, 10) + require.Error(t, err) + require.True(t, IsRetryableError(err)) }