diff --git a/service/history/api/queryworkflow/api.go b/service/history/api/queryworkflow/api.go index b9daa48a6c6..fa255f33b3b 100644 --- a/service/history/api/queryworkflow/api.go +++ b/service/history/api/queryworkflow/api.go @@ -32,6 +32,7 @@ import ( querypb "go.temporal.io/api/query/v1" "go.temporal.io/api/serviceerror" "go.temporal.io/api/workflowservice/v1" + "go.temporal.io/server/common/log/tag" "go.temporal.io/server/api/historyservice/v1" @@ -129,7 +130,7 @@ func Invoke( tag.WorkflowNamespaceID(workflowKey.NamespaceID), tag.WorkflowID(workflowKey.WorkflowID), tag.WorkflowRunID(workflowKey.RunID)) - return nil, serviceerror.NewWorkflowNotReady("Cannot query workflow due to Workflow Task in failed state.") + return nil, serviceerror.NewWorkflowNotReady("Unable to query workflow due to Workflow Task in failed state.") } // There are two ways in which queries get dispatched to workflow worker. First, queries can be dispatched on workflow tasks. diff --git a/service/history/api/updateworkflow/api.go b/service/history/api/updateworkflow/api.go index fe2ef5e78af..9ebe43e265b 100644 --- a/service/history/api/updateworkflow/api.go +++ b/service/history/api/updateworkflow/api.go @@ -42,6 +42,7 @@ import ( "go.temporal.io/server/api/matchingservice/v1" "go.temporal.io/server/common" "go.temporal.io/server/common/definition" + "go.temporal.io/server/common/log/tag" "go.temporal.io/server/common/namespace" serviceerrors "go.temporal.io/server/common/serviceerror" "go.temporal.io/server/internal/effect" @@ -52,6 +53,11 @@ import ( "go.temporal.io/server/service/history/workflow/update" ) +const ( + // Fail update fast if workflow task keeps failing (attempt >= 3). + failUpdateWorkflowTaskAttemptCount = 3 +) + func Invoke( ctx context.Context, req *historyservice.UpdateWorkflowExecutionRequest, @@ -127,6 +133,19 @@ func Invoke( return consts.ErrWorkflowExecutionNotFound } + if ms.GetExecutionInfo().WorkflowTaskAttempt >= failUpdateWorkflowTaskAttemptCount { + // If workflow task is constantly failing, the update to that workflow will also fail. + // Additionally, workflow update can't "fix" workflow state because updates (delivered with messages) + // are applied after events. + // Failing API call fast here to prevent wasting resources for an update that will fail. + shardCtx.GetLogger().Info("Fail update fast due to WorkflowTask in failed state.", + tag.WorkflowNamespace(req.Request.Namespace), + tag.WorkflowNamespaceID(wfKey.NamespaceID), + tag.WorkflowID(wfKey.WorkflowID), + tag.WorkflowRunID(wfKey.RunID)) + return serviceerror.NewWorkflowNotReady("Unable to perform workflow execution update due to Workflow Task in failed state.") + } + updateID := req.GetRequest().GetRequest().GetMeta().GetUpdateId() updateReg := weCtx.GetUpdateRegistry(ctx) var alreadyExisted bool