Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added new error type that will not be retried by frontend #5772

Merged
merged 15 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 69 additions & 5 deletions .gen/go/shared/shared.go

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion cmd/server/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ require (
github.com/startreedata/pinot-client-go v0.2.0 // latest release supports pinot v0.12.0 which is also internal version
github.com/stretchr/testify v1.8.3
github.com/uber-go/tally v3.3.15+incompatible // indirect
github.com/uber/cadence-idl v0.0.0-20240212223805-34b4519b2709
github.com/uber/cadence-idl v0.0.0-20240318101217-afd574441210
github.com/uber/ringpop-go v0.8.5 // indirect
github.com/uber/tchannel-go v1.22.2 // indirect
github.com/urfave/cli v1.22.4
Expand Down
4 changes: 2 additions & 2 deletions cmd/server/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -397,8 +397,8 @@ github.com/uber-go/tally v3.3.12+incompatible/go.mod h1:YDTIBxdXyOU/sCWilKB4bgyu
github.com/uber-go/tally v3.3.15+incompatible h1:9hLSgNBP28CjIaDmAuRTq9qV+UZY+9PcvAkXO4nNMwg=
github.com/uber-go/tally v3.3.15+incompatible/go.mod h1:YDTIBxdXyOU/sCWilKB4bgyufu1cEi0jdVnRdxvjnmU=
github.com/uber/cadence-idl v0.0.0-20211111101836-d6b70b60eb8c/go.mod h1:oyUK7GCNCRHCCyWyzifSzXpVrRYVBbAMHAzF5dXiKws=
github.com/uber/cadence-idl v0.0.0-20240212223805-34b4519b2709 h1:1u+kMB6p8P9fjK6jk3QHAl8PxLyNjO9/TMXoPOVr1O8=
github.com/uber/cadence-idl v0.0.0-20240212223805-34b4519b2709/go.mod h1:oyUK7GCNCRHCCyWyzifSzXpVrRYVBbAMHAzF5dXiKws=
github.com/uber/cadence-idl v0.0.0-20240318101217-afd574441210 h1:iuPF2QAZM90Jpx0s+2LnxLN+ElGsxB2JnXKGNLc18+w=
github.com/uber/cadence-idl v0.0.0-20240318101217-afd574441210/go.mod h1:oyUK7GCNCRHCCyWyzifSzXpVrRYVBbAMHAzF5dXiKws=
github.com/uber/jaeger-client-go v2.22.1+incompatible h1:NHcubEkVbahf9t3p75TOCR83gdUHXjRJvjoBh1yACsM=
github.com/uber/jaeger-client-go v2.22.1+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk=
github.com/uber/jaeger-lib v2.2.0+incompatible h1:MxZXOiR2JuoANZ3J6DE/U0kSFv/eJ/GfSYVCjK7dyaw=
Expand Down
5 changes: 5 additions & 0 deletions common/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,3 +264,8 @@ const (
// DefaultHistoryMaxAutoResetPoints is the default maximum number for auto reset points
DefaultHistoryMaxAutoResetPoints = 20
)

const (
// WorkflowIDRateLimitReason is the reason set in ServiceBusyError when workflow ID rate limit is exceeded
WorkflowIDRateLimitReason = "external-workflow-id-rate-limit"
)
6 changes: 3 additions & 3 deletions common/resource/resourceImpl.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ func New(
frontendClient := retryable.NewFrontendClient(
frontendRawClient,
common.CreateFrontendServiceRetryPolicy(),
common.IsServiceTransientError,
serviceConfig.IsErrorRetryableFunction,
)

matchingRawClient, err := clientBean.GetMatchingClient(domainCache.GetDomainName)
Expand All @@ -233,14 +233,14 @@ func New(
matchingClient := retryable.NewMatchingClient(
matchingRawClient,
common.CreateMatchingServiceRetryPolicy(),
common.IsServiceTransientError,
serviceConfig.IsErrorRetryableFunction,
)

historyRawClient := clientBean.GetHistoryClient()
historyClient := retryable.NewHistoryClient(
historyRawClient,
common.CreateHistoryServiceRetryPolicy(),
common.IsServiceTransientError,
serviceConfig.IsErrorRetryableFunction,
)

historyArchiverBootstrapContainer := &archiver.HistoryBootstrapContainer{
Expand Down
7 changes: 6 additions & 1 deletion common/service/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@

package service

import "github.com/uber/cadence/common/dynamicconfig"
import (
"github.com/uber/cadence/common/backoff"
"github.com/uber/cadence/common/dynamicconfig"
)

type (
// Config is a subset of the service dynamic config for single service
Expand Down Expand Up @@ -50,5 +53,7 @@ type (
ValidSearchAttributes dynamicconfig.MapPropertyFn `yaml:"-" json:"-"`
// deprecated: never read from, all ES reads and writes erroneously use PersistenceMaxQPS
ESVisibilityListMaxQPS dynamicconfig.IntPropertyFnWithDomainFilter `yaml:"-" json:"-"`

IsErrorRetryableFunction backoff.IsRetryable
}
)
7 changes: 5 additions & 2 deletions common/types/mapper/proto/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,9 @@ func FromError(err error) error {
case *types.LimitExceededError:
return protobuf.NewError(yarpcerrors.CodeResourceExhausted, e.Message, protobuf.WithErrorDetails(&apiv1.LimitExceededError{}))
case *types.ServiceBusyError:
return protobuf.NewError(yarpcerrors.CodeResourceExhausted, e.Message, protobuf.WithErrorDetails(&apiv1.ServiceBusyError{}))
return protobuf.NewError(yarpcerrors.CodeResourceExhausted, e.Message, protobuf.WithErrorDetails(&apiv1.ServiceBusyError{
Reason: e.Reason,
}))
case *types.RemoteSyncMatchedError:
return protobuf.NewError(yarpcerrors.CodeUnavailable, e.Message, protobuf.WithErrorDetails(&sharedv1.RemoteSyncMatchedError{}))
case *types.StickyWorkerUnavailableError:
Expand Down Expand Up @@ -218,14 +220,15 @@ func ToError(err error) error {
}
}
case yarpcerrors.CodeResourceExhausted:
switch getErrorDetails(err).(type) {
switch details := getErrorDetails(err).(type) {
case *apiv1.LimitExceededError:
return &types.LimitExceededError{
Message: status.Message(),
}
case *apiv1.ServiceBusyError:
return &types.ServiceBusyError{
Message: status.Message(),
Reason: details.Reason,
}
}
case yarpcerrors.CodeUnavailable:
Expand Down
2 changes: 2 additions & 0 deletions common/types/mapper/thrift/shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -4871,6 +4871,7 @@ func FromServiceBusyError(t *types.ServiceBusyError) *shared.ServiceBusyError {
}
return &shared.ServiceBusyError{
Message: t.Message,
Reason: &t.Reason,
}
}

Expand All @@ -4881,6 +4882,7 @@ func ToServiceBusyError(t *shared.ServiceBusyError) *types.ServiceBusyError {
}
return &types.ServiceBusyError{
Message: t.Message,
Reason: t.GetReason(),
jakobht marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down
1 change: 1 addition & 0 deletions common/types/shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -5678,6 +5678,7 @@ func (v *SearchAttributes) GetIndexedFields() (o map[string][]byte) {
// ServiceBusyError is an internal type (TBD...)
type ServiceBusyError struct {
Message string `json:"message,required"`
Reason string `json:"reason,omitempty"`
}

// SignalExternalWorkflowExecutionDecisionAttributes is an internal type (TBD...)
Expand Down
2 changes: 2 additions & 0 deletions common/types/testdata/domain.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ const (

FailoverVersion1 = 301
FailoverVersion2 = 302

ErrorReason = "ErrorReason"
)

var (
Expand Down
1 change: 1 addition & 0 deletions common/types/testdata/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ var (
}
ServiceBusyError = types.ServiceBusyError{
Message: ErrorMessage,
Reason: ErrorReason,
}
ShardOwnershipLostError = types.ShardOwnershipLostError{
Message: ErrorMessage,
Expand Down
11 changes: 11 additions & 0 deletions common/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,17 @@ func ToServiceTransientError(err error) error {
return yarpcerrors.Newf(yarpcerrors.CodeUnavailable, err.Error())
}

// HistoryRetryFuncFrontendExceptions checks if an error should be retried
// in a call from frontend
func FrontendRetry(err error) bool {
var sbErr *types.ServiceBusyError
if errors.As(err, &sbErr) {
// If the service busy error is due to workflow id rate limiting, proxy it to the caller
return sbErr.Reason != WorkflowIDRateLimitReason
}
return IsServiceTransientError(err)
}

// IsServiceTransientError checks if the error is a transient error.
func IsServiceTransientError(err error) bool {

Expand Down
35 changes: 35 additions & 0 deletions common/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,41 @@ func TestIsServiceTransientError(t *testing.T) {

}

func TestFrontendRetry(t *testing.T) {
tests := []struct {
name string
err error
want bool
}{
{
name: "ServiceBusyError due to workflow id rate limiting",
err: &types.ServiceBusyError{Reason: WorkflowIDRateLimitReason},
want: false,
},
{
name: "ServiceBusyError not due to workflow id rate limiting",
err: &types.ServiceBusyError{Reason: "some other reason"},
want: true,
},
{
name: "ServiceBusyError empty reason",
err: &types.ServiceBusyError{Reason: ""},
want: true,
},
{
name: "Non-ServiceBusyError",
err: errors.New("some random error"),
want: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
require.Equal(t, tt.want, FrontendRetry(tt.err))
})
}
}

func TestIsContextTimeoutError(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond)
defer cancel()
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ require (
github.com/startreedata/pinot-client-go v0.2.0 // latest release supports pinot v0.12.0 which is also internal version
github.com/stretchr/testify v1.8.3
github.com/uber-go/tally v3.3.15+incompatible
github.com/uber/cadence-idl v0.0.0-20240212223805-34b4519b2709
github.com/uber/cadence-idl v0.0.0-20240318101217-afd574441210
github.com/uber/ringpop-go v0.8.5
github.com/uber/tchannel-go v1.22.2
github.com/urfave/cli v1.22.4
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -444,8 +444,8 @@ github.com/uber-go/tally v3.3.12+incompatible/go.mod h1:YDTIBxdXyOU/sCWilKB4bgyu
github.com/uber-go/tally v3.3.15+incompatible h1:9hLSgNBP28CjIaDmAuRTq9qV+UZY+9PcvAkXO4nNMwg=
github.com/uber-go/tally v3.3.15+incompatible/go.mod h1:YDTIBxdXyOU/sCWilKB4bgyufu1cEi0jdVnRdxvjnmU=
github.com/uber/cadence-idl v0.0.0-20211111101836-d6b70b60eb8c/go.mod h1:oyUK7GCNCRHCCyWyzifSzXpVrRYVBbAMHAzF5dXiKws=
github.com/uber/cadence-idl v0.0.0-20240212223805-34b4519b2709 h1:1u+kMB6p8P9fjK6jk3QHAl8PxLyNjO9/TMXoPOVr1O8=
github.com/uber/cadence-idl v0.0.0-20240212223805-34b4519b2709/go.mod h1:oyUK7GCNCRHCCyWyzifSzXpVrRYVBbAMHAzF5dXiKws=
github.com/uber/cadence-idl v0.0.0-20240318101217-afd574441210 h1:iuPF2QAZM90Jpx0s+2LnxLN+ElGsxB2JnXKGNLc18+w=
github.com/uber/cadence-idl v0.0.0-20240318101217-afd574441210/go.mod h1:oyUK7GCNCRHCCyWyzifSzXpVrRYVBbAMHAzF5dXiKws=
github.com/uber/jaeger-client-go v2.22.1+incompatible h1:NHcubEkVbahf9t3p75TOCR83gdUHXjRJvjoBh1yACsM=
github.com/uber/jaeger-client-go v2.22.1+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk=
github.com/uber/jaeger-lib v2.2.0+incompatible h1:MxZXOiR2JuoANZ3J6DE/U0kSFv/eJ/GfSYVCjK7dyaw=
Expand Down
2 changes: 1 addition & 1 deletion idls
Submodule idls updated from 34b451 to afd574
7 changes: 4 additions & 3 deletions service/frontend/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,10 @@ func NewService(
WriteDBVisibilityOpenMaxQPS: nil, // frontend service never write
WriteDBVisibilityClosedMaxQPS: nil, // frontend service never write

ESVisibilityListMaxQPS: serviceConfig.ESVisibilityListMaxQPS,
ESIndexMaxResultWindow: serviceConfig.ESIndexMaxResultWindow,
ValidSearchAttributes: serviceConfig.ValidSearchAttributes,
ESVisibilityListMaxQPS: serviceConfig.ESVisibilityListMaxQPS,
ESIndexMaxResultWindow: serviceConfig.ESIndexMaxResultWindow,
ValidSearchAttributes: serviceConfig.ValidSearchAttributes,
IsErrorRetryableFunction: common.FrontendRetry,
},
)
if err != nil {
Expand Down
7 changes: 4 additions & 3 deletions service/history/resource/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,10 @@ func New(
WriteDBVisibilityOpenMaxQPS: config.VisibilityOpenMaxQPS,
WriteDBVisibilityClosedMaxQPS: config.VisibilityClosedMaxQPS,

ESVisibilityListMaxQPS: nil, // history service never read,
ESIndexMaxResultWindow: nil, // history service never read,
ValidSearchAttributes: config.ValidSearchAttributes, // history service never read, (Pinot need this to initialize pinotQueryValidator)
ESVisibilityListMaxQPS: nil, // history service never read,
ESIndexMaxResultWindow: nil, // history service never read,
ValidSearchAttributes: config.ValidSearchAttributes, // history service never read, (Pinot need this to initialize pinotQueryValidator)
IsErrorRetryableFunction: common.IsServiceTransientError,
},
)
if err != nil {
Expand Down
6 changes: 5 additions & 1 deletion service/history/templates/ratelimited.tmpl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import (
"context"

"github.com/uber/cadence/common"
"github.com/uber/cadence/common/cache"
"github.com/uber/cadence/common/quotas"
"github.com/uber/cadence/common/types"
Expand Down Expand Up @@ -97,7 +98,10 @@ func (h *{{$decorator}}) {{$method.Declaration}} {
}

if !h.allowFunc({{$domainID}}, {{$workflowID}}) {
err = &types.ServiceBusyError{"Too many requests for the workflow ID"}
err = &types.ServiceBusyError{
Message: "Too many requests for the workflow ID",
Reason: common.WorkflowIDRateLimitReason,
}
return
}

Expand Down
21 changes: 17 additions & 4 deletions service/history/wrappers/ratelimited/handler_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/golang/mock/gomock"
"github.com/stretchr/testify/assert"

"github.com/uber/cadence/common"
"github.com/uber/cadence/common/log"
"github.com/uber/cadence/common/types"
"github.com/uber/cadence/service/history/handler"
Expand Down Expand Up @@ -138,6 +139,7 @@ func TestRatelimitedEndpoints_Table(t *testing.T) {
var sbErr *types.ServiceBusyError
assert.ErrorAs(t, err, &sbErr)
assert.ErrorContains(t, err, "Too many requests for the workflow ID")
assert.Equal(t, common.WorkflowIDRateLimitReason, sbErr.Reason)
})
}
}
7 changes: 4 additions & 3 deletions service/matching/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,10 @@ func NewService(
params,
service.Matching,
&service.Config{
PersistenceMaxQPS: serviceConfig.PersistenceMaxQPS,
PersistenceGlobalMaxQPS: serviceConfig.PersistenceGlobalMaxQPS,
ThrottledLoggerMaxRPS: serviceConfig.ThrottledLogRPS,
PersistenceMaxQPS: serviceConfig.PersistenceMaxQPS,
PersistenceGlobalMaxQPS: serviceConfig.PersistenceGlobalMaxQPS,
ThrottledLoggerMaxRPS: serviceConfig.ThrottledLogRPS,
IsErrorRetryableFunction: common.IsServiceTransientError,
// matching doesn't need visibility config as it never read or write visibility
},
)
Expand Down
7 changes: 4 additions & 3 deletions service/worker/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,10 @@ func NewService(params *resource.Params) (resource.Resource, error) {
params,
service.Worker,
&service.Config{
PersistenceMaxQPS: serviceConfig.PersistenceMaxQPS,
PersistenceGlobalMaxQPS: serviceConfig.PersistenceGlobalMaxQPS,
ThrottledLoggerMaxRPS: serviceConfig.ThrottledLogRPS,
PersistenceMaxQPS: serviceConfig.PersistenceMaxQPS,
PersistenceGlobalMaxQPS: serviceConfig.PersistenceGlobalMaxQPS,
ThrottledLoggerMaxRPS: serviceConfig.ThrottledLogRPS,
IsErrorRetryableFunction: common.IsServiceTransientError,
// worker service doesn't need visibility config as it never call visibilityManager API
},
)
Expand Down
Loading