From 41e8cfcdcf5fc727fc7c03c341277254276fd7a0 Mon Sep 17 00:00:00 2001 From: Liam Stevens Date: Thu, 30 Jan 2025 15:48:43 +1000 Subject: [PATCH 01/11] feat: add configurable timeout --- src/aws/ecs.go | 5 ++--- src/aws/ecs_test.go | 2 +- src/plugin/config.go | 1 + src/plugin/config_test.go | 10 ++++++++++ src/plugin/task-runner.go | 2 +- 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/aws/ecs.go b/src/aws/ecs.go index 5386498..0e80d4b 100644 --- a/src/aws/ecs.go +++ b/src/aws/ecs.go @@ -59,11 +59,10 @@ func SubmitTask(ctx context.Context, ecsAPI EcsClientAPI, input *TaskRunnerConfi return *response.Tasks[0].TaskArn, nil } -func WaitForCompletion(ctx context.Context, waiter ecsWaiterAPI, taskArn string) (*ecs.DescribeTasksOutput, error) { +func WaitForCompletion(ctx context.Context, waiter ecsWaiterAPI, taskArn string, timeOut int) (*ecs.DescribeTasksOutput, error) { cluster := ClusterFromTaskArn(taskArn) - // TODO: This magic number will be resolved in a future piece of work, not going to refactor this right now - maxWaitDuration := 15 * time.Minute //nolint:mnd + maxWaitDuration := time.Duration(timeOut) * time.Second result, err := waiter.WaitForOutput(ctx, &ecs.DescribeTasksInput{ Cluster: aws.String(cluster), Tasks: []string{taskArn}, diff --git a/src/aws/ecs_test.go b/src/aws/ecs_test.go index 6859105..67cb109 100644 --- a/src/aws/ecs_test.go +++ b/src/aws/ecs_test.go @@ -365,7 +365,7 @@ func TestWaitForCompletion(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - result, err := WaitForCompletion(context.TODO(), tc.waiter, tc.input) + result, err := WaitForCompletion(context.TODO(), tc.waiter, tc.input, 15) t.Logf("result: '%v'", err) t.Logf("expected: detail: %v, reason: %v", *tc.expected.Failures[0].Detail, *tc.expected.Failures[0].Reason) diff --git a/src/plugin/config.go b/src/plugin/config.go index 1d535b8..f3bd5fe 100644 --- a/src/plugin/config.go +++ b/src/plugin/config.go @@ -7,6 +7,7 @@ import ( type Config struct { ParameterName string `required:"true" split_words:"true"` Script string `required:"true" split_words:"true"` + TimeOut int `default:"2700" split_words:"true"` } type EnvironmentConfigFetcher struct { diff --git a/src/plugin/config_test.go b/src/plugin/config_test.go index ea885f2..6ff6b22 100644 --- a/src/plugin/config_test.go +++ b/src/plugin/config_test.go @@ -25,6 +25,7 @@ func TestFailOnMissingRequiredEnvironment(t *testing.T) { disabledEnvVars: []string{ "BUILDKITE_PLUGIN_ECS_TASK_RUNNER_PARAMETER_NAME", "BUILDKITE_PLUGIN_ECS_TASK_RUNNER_SCRIPT", + "BUILDKITE_PLUGIN_ECS_TASK_RUNNER_TIMEOUT", }, enabledEnvVars: map[string]string{}, expectedErr: "required key BUILDKITE_PLUGIN_ECS_TASK_RUNNER_PARAMETER_NAME missing value", @@ -72,18 +73,27 @@ func TestFailOnMissingRequiredEnvironment(t *testing.T) { func TestFetchConfigFromEnvironment(t *testing.T) { unsetEnv(t, "BUILDKITE_PLUGIN_ECS_TASK_RUNNER_PARAMETER_NAME") unsetEnv(t, "BUILDKITE_PLUGIN_ECS_TASK_RUNNER_SCRIPT") + unsetEnv(t, "BUILDKITE_PLUGIN_ECS_TASK_RUNNER_TIME_OUT") var config plugin.Config fetcher := plugin.EnvironmentConfigFetcher{} t.Setenv("BUILDKITE_PLUGIN_ECS_TASK_RUNNER_PARAMETER_NAME", "test-parameter") t.Setenv("BUILDKITE_PLUGIN_ECS_TASK_RUNNER_SCRIPT", "hello-world") + t.Setenv("BUILDKITE_PLUGIN_ECS_TASK_RUNNER_TIME_OUT", "600") err := fetcher.Fetch(&config) require.NoError(t, err, "fetch should not error") assert.Equal(t, "test-parameter", config.ParameterName, "fetched message should match environment") assert.Equal(t, "hello-world", config.Script, "fetched message should match environment") + assert.Equal(t, 600, config.TimeOut, "fetched message should match environment") + + // test default value + unsetEnv(t, "BUILDKITE_PLUGIN_ECS_TASK_RUNNER_TIME_OUT") + err = fetcher.Fetch(&config) + require.NoError(t, err, "fetch should not error") + assert.Equal(t, 2700, config.TimeOut, "fetched message should match environment") } func unsetEnv(t *testing.T, key string) { diff --git a/src/plugin/task-runner.go b/src/plugin/task-runner.go index e9cde15..4c1ae34 100644 --- a/src/plugin/task-runner.go +++ b/src/plugin/task-runner.go @@ -57,7 +57,7 @@ func (trp TaskRunnerPlugin) Run(ctx context.Context, fetcher ConfigFetcher) erro // TODO: This is currently a magic number. If we want this to be configurable, remove the nolint directive and fix it up o.MaxDelay = 10 * time.Second //nolint:mnd }) - result, err := awsinternal.WaitForCompletion(ctx, waiterClient, taskArn) + result, err := awsinternal.WaitForCompletion(ctx, waiterClient, taskArn, config.TimeOut) if err != nil { return fmt.Errorf("failed to wait for task completion: %w\nFailure information: %v", err, result.Failures[0]) } From a53729ec7618c6adb14c5df668ed90d0a1e31177 Mon Sep 17 00:00:00 2001 From: Liam Stevens Date: Thu, 30 Jan 2025 16:01:58 +1000 Subject: [PATCH 02/11] feat: add annotation of build when wait failed --- src/plugin/task-runner.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/plugin/task-runner.go b/src/plugin/task-runner.go index 4c1ae34..79eda26 100644 --- a/src/plugin/task-runner.go +++ b/src/plugin/task-runner.go @@ -27,6 +27,7 @@ func (trp TaskRunnerPlugin) Run(ctx context.Context, fetcher ConfigFetcher) erro if err != nil { return fmt.Errorf("plugin configuration error: %w", err) } + buildKiteAgent := buildkite.Agent{} buildkite.Log("Executing task-runner plugin\n") @@ -59,6 +60,7 @@ func (trp TaskRunnerPlugin) Run(ctx context.Context, fetcher ConfigFetcher) erro }) result, err := awsinternal.WaitForCompletion(ctx, waiterClient, taskArn, config.TimeOut) if err != nil { + _ = buildKiteAgent.Annotate(ctx, fmt.Sprintf("Task did not complete successfully within timeout %v", result.Failures[0]), "error", "ecs-task-runner") return fmt.Errorf("failed to wait for task completion: %w\nFailure information: %v", err, result.Failures[0]) } // In a successful scenario for task completion, we would have a `tasks` slice with a single element From 857d5fbaa4a97009f7e9ac4ad6f9ca45b24b1581 Mon Sep 17 00:00:00 2001 From: Liam Stevens Date: Thu, 30 Jan 2025 16:31:41 +1000 Subject: [PATCH 03/11] fix: add attribute to plugin.yml --- plugin.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugin.yml b/plugin.yml index 910a838..7362648 100644 --- a/plugin.yml +++ b/plugin.yml @@ -9,3 +9,5 @@ configuration: type: string script: type: string + timeout: + type: integer From c6dd5452cf422f6b056ef46e38c220b86cb86642 Mon Sep 17 00:00:00 2001 From: Liam Stevens Date: Fri, 31 Jan 2025 15:07:36 +1000 Subject: [PATCH 04/11] fix: rework error handling of waiter --- src/plugin/task-runner.go | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/plugin/task-runner.go b/src/plugin/task-runner.go index 79eda26..afa40fe 100644 --- a/src/plugin/task-runner.go +++ b/src/plugin/task-runner.go @@ -2,6 +2,7 @@ package plugin import ( "context" + "errors" "fmt" "time" @@ -23,6 +24,7 @@ type ConfigFetcher interface { func (trp TaskRunnerPlugin) Run(ctx context.Context, fetcher ConfigFetcher) error { var config Config + timeoutError := errors.New("exceeded max wait time for TasksStopped waiter") err := fetcher.Fetch(&config) if err != nil { return fmt.Errorf("plugin configuration error: %w", err) @@ -60,9 +62,29 @@ func (trp TaskRunnerPlugin) Run(ctx context.Context, fetcher ConfigFetcher) erro }) result, err := awsinternal.WaitForCompletion(ctx, waiterClient, taskArn, config.TimeOut) if err != nil { - _ = buildKiteAgent.Annotate(ctx, fmt.Sprintf("Task did not complete successfully within timeout %v", result.Failures[0]), "error", "ecs-task-runner") - return fmt.Errorf("failed to wait for task completion: %w\nFailure information: %v", err, result.Failures[0]) + if errors.Is(err, timeoutError) { + err := buildKiteAgent.Annotate(ctx, fmt.Sprintf("Task did not complete successfully within timeout (%d seconds)", config.TimeOut), "error", "ecs-task-runner") + if err != nil { + return fmt.Errorf("failed to annotate buildkite with task timeout failure: %w", err) + } + } + bkerr := buildKiteAgent.Annotate(ctx, fmt.Sprintf("failed to wait for task completion: %v\n", err), "error", "ecs-task-runner") + if bkerr != nil { + return fmt.Errorf("failed to annotate buildkite with task wait failure: %w, annotation error: %w", err, bkerr) + } + } else if len(result.Failures) > 0 { + // There is still a scenario where the task could return failures but this isn't handled by the waiter + // This is due to the waiter only returning errors in scenarios where there are issues querying the task + // or scheduling the task. For a list of the Failures that can be returned in this case, see: + // https://docs.aws.amazon.com/AmazonECS/latest/developerguide/api_failures_messages.html + // specifically, under the `DescribeTasks` API. + err := buildKiteAgent.Annotate(ctx, fmt.Sprintf("Task did not complete successfully: %v", result.Failures[0]), "error", "ecs-task-runner") + if err != nil { + return fmt.Errorf("failed to annotate buildkite with task failure: %w", err) + } + return fmt.Errorf("task did not complete successfully: %v", result.Failures[0]) } + // In a successful scenario for task completion, we would have a `tasks` slice with a single element task := result.Tasks[0] taskLogDetails, err := awsinternal.FindLogStreamFromTask(ctx, ecsClient, task) From 5385ce64b756590c487b1f5741da247c3e94e928 Mon Sep 17 00:00:00 2001 From: Liam Stevens Date: Fri, 31 Jan 2025 15:13:58 +1000 Subject: [PATCH 05/11] fix: test descriptions --- src/plugin/config_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/plugin/config_test.go b/src/plugin/config_test.go index 6ff6b22..b7ba691 100644 --- a/src/plugin/config_test.go +++ b/src/plugin/config_test.go @@ -86,14 +86,14 @@ func TestFetchConfigFromEnvironment(t *testing.T) { require.NoError(t, err, "fetch should not error") assert.Equal(t, "test-parameter", config.ParameterName, "fetched message should match environment") - assert.Equal(t, "hello-world", config.Script, "fetched message should match environment") - assert.Equal(t, 600, config.TimeOut, "fetched message should match environment") + assert.Equal(t, "hello-world", config.Script, "fetched script should match environment") + assert.Equal(t, 600, config.TimeOut, "fetched timeout should match environment") // test default value unsetEnv(t, "BUILDKITE_PLUGIN_ECS_TASK_RUNNER_TIME_OUT") err = fetcher.Fetch(&config) require.NoError(t, err, "fetch should not error") - assert.Equal(t, 2700, config.TimeOut, "fetched message should match environment") + assert.Equal(t, 2700, config.TimeOut, "fetched timeout should match environment") } func unsetEnv(t *testing.T, key string) { From b6d57d196461ff33f7e0f32ab6a946eb811f574c Mon Sep 17 00:00:00 2001 From: Liam Stevens Date: Fri, 31 Jan 2025 15:50:37 +1000 Subject: [PATCH 06/11] fix: update README --- README.md | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2368d7a..40c7661 100644 --- a/README.md +++ b/README.md @@ -8,14 +8,23 @@ Add the following lines to your `pipeline.yml`: steps: - plugins: - cultureamp/ecs-task-runner#v0.0.0: - message: "This is the message that will be annotated!" + parameter-name: "test-parameter" + script: "/bin/migrate" + timeout: 900 ``` ## Configuration -### `message` (Required, string) +### `parameter-name` (Required, string) +The name or ARN of the parameter in Parameter Store that contains the task definition. -The message to annotate onto the build. +### `script` (Required, string) +The name of the script to run in the task. + +### `timeout` (Optional, integer) +The timeout in seconds that the plugin will wait for the task to complete. If the task does not complete within this time, the plugin will fail. The task execution will continue to run in the background. + +Default: 2700 ## Usage This plugin is based on an existing pattern in `murmur` where database migrations are run as a task on ECS. To provide additional context for how this plugin is expected to be used, this is the expected pattern: From 470f2a87f2ee8f926c821dac9157e6ea2a72761f Mon Sep 17 00:00:00 2001 From: Liam Stevens Date: Fri, 7 Feb 2025 12:18:36 +1000 Subject: [PATCH 07/11] feat: add interfaces for task runner and basic test to piggyback off this --- src/aws/ecs.go | 4 +- src/aws/ecs_test.go | 53 +++++++++------ src/buildkite/agent.go | 6 +- src/main.go | 3 +- src/plugin/task-runner.go | 61 ++++++++++------- src/plugin/task-runner_test.go | 117 +++++++++++++++++++++++++++++++++ 6 files changed, 196 insertions(+), 48 deletions(-) create mode 100644 src/plugin/task-runner_test.go diff --git a/src/aws/ecs.go b/src/aws/ecs.go index 0e80d4b..77cb6dc 100644 --- a/src/aws/ecs.go +++ b/src/aws/ecs.go @@ -19,7 +19,7 @@ type EcsClientAPI interface { DescribeTaskDefinition(ctx context.Context, params *ecs.DescribeTaskDefinitionInput, optFns ...func(*ecs.Options)) (*ecs.DescribeTaskDefinitionOutput, error) } -type ecsWaiterAPI interface { +type EcsWaiterAPI interface { WaitForOutput(ctx context.Context, params *ecs.DescribeTasksInput, maxWaitDur time.Duration, optFns ...func(*ecs.TasksStoppedWaiterOptions)) (*ecs.DescribeTasksOutput, error) } @@ -59,7 +59,7 @@ func SubmitTask(ctx context.Context, ecsAPI EcsClientAPI, input *TaskRunnerConfi return *response.Tasks[0].TaskArn, nil } -func WaitForCompletion(ctx context.Context, waiter ecsWaiterAPI, taskArn string, timeOut int) (*ecs.DescribeTasksOutput, error) { +func WaitForCompletion(ctx context.Context, waiter EcsWaiterAPI, taskArn string, timeOut int) (*ecs.DescribeTasksOutput, error) { cluster := ClusterFromTaskArn(taskArn) maxWaitDuration := time.Duration(timeOut) * time.Second diff --git a/src/aws/ecs_test.go b/src/aws/ecs_test.go index 67cb109..7d498a1 100644 --- a/src/aws/ecs_test.go +++ b/src/aws/ecs_test.go @@ -322,16 +322,23 @@ func TestFindLogStreamFromTaskNegative(t *testing.T) { // to allow thing to finish in the background. The return value is used only for when a task fails, and we push // this to a log. func TestWaitForCompletion(t *testing.T) { - mockedWaiter := mockECSWaiter{ - mockWaitForOutput: func(context.Context, *ecs.DescribeTasksInput, time.Duration, ...func(*ecs.TasksStoppedWaiterOptions)) (*ecs.DescribeTasksOutput, error) { - return &ecs.DescribeTasksOutput{ - Failures: []types.Failure{ - { - Arn: aws.String("arn:aws:ecs:us-west-2:123456789012:task/test-cluster/07cc583696bd44e0be450bff7314ddaf"), - Detail: aws.String("task stopped"), - Reason: aws.String("computer is full of beanz"), - }, - }}, errors.New("task stopped: computer is full of beanz") + mockedWaiter := map[string]mockECSWaiter{ + "beans": { + mockWaitForOutput: func(context.Context, *ecs.DescribeTasksInput, time.Duration, ...func(*ecs.TasksStoppedWaiterOptions)) (*ecs.DescribeTasksOutput, error) { + return &ecs.DescribeTasksOutput{ + Failures: []types.Failure{ + { + Arn: aws.String("arn:aws:ecs:us-west-2:123456789012:task/test-cluster/07cc583696bd44e0be450bff7314ddaf"), + Detail: aws.String("task stopped"), + Reason: aws.String("computer is full of beanz"), + }, + }}, nil + }, + }, + "slowpoke": { + mockWaitForOutput: func(context.Context, *ecs.DescribeTasksInput, time.Duration, ...func(*ecs.TasksStoppedWaiterOptions)) (*ecs.DescribeTasksOutput, error) { + return nil, errors.New("task timed out: computer is full of beanz") + }, }, } @@ -344,13 +351,13 @@ func TestWaitForCompletion(t *testing.T) { tests := []struct { name string input string - waiter ecsWaiterAPI + waiter EcsWaiterAPI expected expectedReturn }{ { name: "given a task ARN, it should return the task details", input: "arn:aws:ecs:us-west-2:123456789012:task/test-cluster/07cc583696bd44e0be450bff7314ddaf", - waiter: mockedWaiter, + waiter: mockedWaiter["beans"], expected: expectedReturn{&ecs.DescribeTasksOutput{ Failures: []types.Failure{ { @@ -358,21 +365,27 @@ func TestWaitForCompletion(t *testing.T) { Detail: aws.String("task stopped"), Reason: aws.String("computer is full of beanz"), }, - }}, errors.New("task stopped: computer is full of beanz"), + }}, nil, }, }, + { + name: "given a task that times out, it should return an error", + input: "arn:aws:ecs:us-west-2:123456789012:task/test-cluster/07cc583696bd44e0be450bff7314ddaf", + waiter: mockedWaiter["slowpoke"], + expected: expectedReturn{nil, errors.New("task timed out: computer is full of beanz")}, + }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { result, err := WaitForCompletion(context.TODO(), tc.waiter, tc.input, 15) - t.Logf("result: '%v'", err) - t.Logf("expected: detail: %v, reason: %v", *tc.expected.Failures[0].Detail, *tc.expected.Failures[0].Reason) - - // The function is most-useful when the underlying task fails. i.e. no news is good news in a real-world scenario - // So, we will test the failure cases - require.Error(t, err) - assert.Equal(t, tc.expected.Failures[0], result.Failures[0]) + t.Logf("name: %s result: '%v'", tc.name, err) + // Errors are only returned when the waiter times out + if err != nil { + require.Equal(t, tc.expected.Error(), err.Error()) + } else { + require.Equal(t, tc.expected.Failures, result.Failures) + } }) } } diff --git a/src/buildkite/agent.go b/src/buildkite/agent.go index 5dc0d7d..2250efd 100644 --- a/src/buildkite/agent.go +++ b/src/buildkite/agent.go @@ -11,10 +11,14 @@ import ( osexec "golang.org/x/sys/execabs" ) +type AgentAPI interface { + Annotate(ctx context.Context, message string, style string, annotationContext string) error +} + type Agent struct { } -func (a *Agent) Annotate(ctx context.Context, message string, style string, annotationContext string) error { +func (a Agent) Annotate(ctx context.Context, message string, style string, annotationContext string) error { return execCmd(ctx, "buildkite-agent", &message, "annotate", "--style", style, "--context", annotationContext) } diff --git a/src/main.go b/src/main.go index 1131fcd..cb8328c 100644 --- a/src/main.go +++ b/src/main.go @@ -4,6 +4,7 @@ import ( "context" "os" + awsinternal "github.com/cultureamp/ecs-task-runner-buildkite-plugin/aws" "github.com/cultureamp/ecs-task-runner-buildkite-plugin/buildkite" "github.com/cultureamp/ecs-task-runner-buildkite-plugin/plugin" ) @@ -13,7 +14,7 @@ func main() { fetcher := plugin.EnvironmentConfigFetcher{} taskRunnerPlugin := plugin.TaskRunnerPlugin{} - err := taskRunnerPlugin.Run(ctx, fetcher) + err := taskRunnerPlugin.Run(ctx, fetcher, awsinternal.WaitForCompletion) if err != nil { buildkite.LogFailuref("plugin execution failed: %s\n", err.Error()) diff --git a/src/plugin/task-runner.go b/src/plugin/task-runner.go index afa40fe..fbd7122 100644 --- a/src/plugin/task-runner.go +++ b/src/plugin/task-runner.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "strings" "time" awsinternal "github.com/cultureamp/ecs-task-runner-buildkite-plugin/aws" @@ -18,13 +19,14 @@ import ( type TaskRunnerPlugin struct { } +type WaitForCompletion func(ctx context.Context, waiter awsinternal.EcsWaiterAPI, taskArn string, timeOut int) (*ecs.DescribeTasksOutput, error) type ConfigFetcher interface { Fetch(config *Config) error } -func (trp TaskRunnerPlugin) Run(ctx context.Context, fetcher ConfigFetcher) error { +func (trp TaskRunnerPlugin) Run(ctx context.Context, fetcher ConfigFetcher, waiter WaitForCompletion) error { var config Config - timeoutError := errors.New("exceeded max wait time for TasksStopped waiter") + err := fetcher.Fetch(&config) if err != nil { return fmt.Errorf("plugin configuration error: %w", err) @@ -60,29 +62,10 @@ func (trp TaskRunnerPlugin) Run(ctx context.Context, fetcher ConfigFetcher) erro // TODO: This is currently a magic number. If we want this to be configurable, remove the nolint directive and fix it up o.MaxDelay = 10 * time.Second //nolint:mnd }) - result, err := awsinternal.WaitForCompletion(ctx, waiterClient, taskArn, config.TimeOut) + result, err := waiter(ctx, waiterClient, taskArn, config.TimeOut) + err = trp.HandleResults(ctx, result, err, buildKiteAgent, config) if err != nil { - if errors.Is(err, timeoutError) { - err := buildKiteAgent.Annotate(ctx, fmt.Sprintf("Task did not complete successfully within timeout (%d seconds)", config.TimeOut), "error", "ecs-task-runner") - if err != nil { - return fmt.Errorf("failed to annotate buildkite with task timeout failure: %w", err) - } - } - bkerr := buildKiteAgent.Annotate(ctx, fmt.Sprintf("failed to wait for task completion: %v\n", err), "error", "ecs-task-runner") - if bkerr != nil { - return fmt.Errorf("failed to annotate buildkite with task wait failure: %w, annotation error: %w", err, bkerr) - } - } else if len(result.Failures) > 0 { - // There is still a scenario where the task could return failures but this isn't handled by the waiter - // This is due to the waiter only returning errors in scenarios where there are issues querying the task - // or scheduling the task. For a list of the Failures that can be returned in this case, see: - // https://docs.aws.amazon.com/AmazonECS/latest/developerguide/api_failures_messages.html - // specifically, under the `DescribeTasks` API. - err := buildKiteAgent.Annotate(ctx, fmt.Sprintf("Task did not complete successfully: %v", result.Failures[0]), "error", "ecs-task-runner") - if err != nil { - return fmt.Errorf("failed to annotate buildkite with task failure: %w", err) - } - return fmt.Errorf("task did not complete successfully: %v", result.Failures[0]) + return fmt.Errorf("failed to handle task results: %w", err) } // In a successful scenario for task completion, we would have a `tasks` slice with a single element @@ -124,3 +107,33 @@ func (trp TaskRunnerPlugin) Run(ctx context.Context, fetcher ConfigFetcher) erro buildkite.Log("done. \n") return nil } + +func (trp TaskRunnerPlugin) HandleResults(ctx context.Context, output *ecs.DescribeTasksOutput, err error, bkAgent buildkite.AgentAPI, config Config) error { + if err != nil { + // This comparison is hacky, but is the only way that I could get the wrapped errors surfaced + // from the AWS library to be properly handled. It would be better if this was done using errors.As + if strings.Contains(err.Error(), "exceeded max wait time for TasksStopped waiter") { + err := bkAgent.Annotate(ctx, fmt.Sprintf("Task did not complete successfully within timeout (%d seconds)", config.TimeOut), "error", "ecs-task-runner") + if err != nil { + return fmt.Errorf("failed to annotate buildkite with task timeout failure: %w", err) + } + return errors.New("task did not complete within the time limit") + } + bkerr := bkAgent.Annotate(ctx, fmt.Sprintf("failed to wait for task completion: %v\n", err), "error", "ecs-task-runner") + if bkerr != nil { + return fmt.Errorf("failed to annotate buildkite with task wait failure: %w, annotation error: %w", err, bkerr) + } + } else if len(output.Failures) > 0 { + // There is still a scenario where the task could return failures but this isn't handled by the waiter + // This is due to the waiter only returning errors in scenarios where there are issues querying the task + // or scheduling the task. For a list of the Failures that can be returned in this case, see: + // https://docs.aws.amazon.com/AmazonECS/latest/developerguide/api_failures_messages.html + // specifically, under the `DescribeTasks` API. + err := bkAgent.Annotate(ctx, fmt.Sprintf("Task did not complete successfully: %v", output.Failures[0]), "error", "ecs-task-runner") + if err != nil { + return fmt.Errorf("failed to annotate buildkite with task failure: %w", err) + } + return fmt.Errorf("task did not complete successfully: %v", output.Failures[0]) + } + return nil +} diff --git a/src/plugin/task-runner_test.go b/src/plugin/task-runner_test.go new file mode 100644 index 0000000..1f5b1e1 --- /dev/null +++ b/src/plugin/task-runner_test.go @@ -0,0 +1,117 @@ +package plugin_test + +import ( + "context" + "errors" + "testing" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ecs" + "github.com/aws/aws-sdk-go-v2/service/ecs/types" + awsinternal "github.com/cultureamp/ecs-task-runner-buildkite-plugin/aws" + "github.com/cultureamp/ecs-task-runner-buildkite-plugin/plugin" + "github.com/stretchr/testify/require" +) + +type MockBuildKiteAgent struct{} + +func (m MockBuildKiteAgent) Annotate(ctx context.Context, message string, style string, annotationContext string) error { + return nil +} + +func TestRunPluginResponse(t *testing.T) { + buildKiteAgent := MockBuildKiteAgent{} + t.Setenv("BUILDKITE_PLUGIN_ECS_TASK_RUNNER_PARAMETER_NAME", "test-parameter") + t.Setenv("BUILDKITE_PLUGIN_ECS_TASK_RUNNER_SCRIPT", "hello-world") + t.Setenv("BUILDKITE_PLUGIN_ECS_TASK_RUNNER_TIME_OUT", "15") + mockFetcher := plugin.EnvironmentConfigFetcher{} + var config plugin.Config + err := mockFetcher.Fetch(&config) + require.NoError(t, err) + + mockContainers := map[string]types.Container{ + "success": { + ExitCode: aws.Int32(0), + Image: aws.String("nginx"), + Name: aws.String("gateway"), + Reason: aws.String("Gracefully Terminated"), + }, + "failed": { + ExitCode: aws.Int32(1), + Image: aws.String("nginx"), + Name: aws.String("gateway"), + Reason: aws.String("Panicked"), + }, + "running": { + Image: aws.String("nginx"), + Name: aws.String("gateway"), + Reason: aws.String("Panicked"), + }, + } + + mockResponses := map[string]plugin.WaitForCompletion{ + "success": func(ctx context.Context, waiter awsinternal.EcsWaiterAPI, taskArn string, timeOut int) (*ecs.DescribeTasksOutput, error) { + return &ecs.DescribeTasksOutput{ + Tasks: []types.Task{{ + Containers: []types.Container{ + mockContainers["success"], + }, + LastStatus: aws.String("STOPPED"), + }, + }, + }, nil + }, + "failed": func(ctx context.Context, waiter awsinternal.EcsWaiterAPI, taskArn string, timeOut int) (*ecs.DescribeTasksOutput, error) { + return &ecs.DescribeTasksOutput{ + Tasks: []types.Task{{ + Containers: []types.Container{ + mockContainers["failed"], + }, + LastStatus: aws.String("STOPPED"), + }, + }, + Failures: []types.Failure{ + { + Arn: aws.String("test-task-arn"), + Reason: aws.String("Panicked"), + Detail: aws.String("Container gateway panicked with non-zero exit code 1"), + }, + }, + }, nil + }, + "running": func(ctx context.Context, waiter awsinternal.EcsWaiterAPI, taskArn string, timeOut int) (*ecs.DescribeTasksOutput, error) { + return &ecs.DescribeTasksOutput{ + Tasks: []types.Task{{ + Containers: []types.Container{ + mockContainers["running"], + }, + LastStatus: aws.String("RUNNING"), + }, + }, + }, errors.New("exceeded max wait time for TasksStopped waiter") + }, + } + + expectedString := map[string]string{ + "success": "", + "failed": "task did not complete successfully", + "running": "task did not complete within the time limit", + } + // expectedError := map[string]error{ + // "success": nil, + // "failed": errors.New(expectedString["failed"]), + // "running": errors.New(expectedString["running"]), + // } + + for name, mockResponse := range mockResponses { + t.Run(name, func(t *testing.T) { + result, err := mockResponse(context.TODO(), nil, "test-task-arn", 15) + plugin := plugin.TaskRunnerPlugin{} + err = plugin.HandleResults(context.TODO(), result, err, buildKiteAgent, config) + if err != nil { + require.ErrorContains(t, err, expectedString[name]) + t.Logf("expected: %v, actual: %v", expectedString[name], err) + } + }) + } +} From 0f51a16fdcd8bd2a9e22a8e783b19f23f6796891 Mon Sep 17 00:00:00 2001 From: Liam Stevens Date: Fri, 7 Feb 2025 15:48:52 +1000 Subject: [PATCH 08/11] fix: linting --- src/plugin/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugin/config.go b/src/plugin/config.go index a4bede1..1dcc40f 100644 --- a/src/plugin/config.go +++ b/src/plugin/config.go @@ -7,7 +7,7 @@ import ( type Config struct { ParameterName string `required:"true" split_words:"true"` Command string `required:"false" split_words:"true"` - TimeOut int `default:"2700" split_words:"true"` + TimeOut int `default:"2700" split_words:"true"` } type EnvironmentConfigFetcher struct { From a59458acd013e26673893bc1e680acf105233067 Mon Sep 17 00:00:00 2001 From: Liam Stevens Date: Fri, 7 Feb 2025 15:50:16 +1000 Subject: [PATCH 09/11] fix: readme --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 40c7661..807a346 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ steps: - plugins: - cultureamp/ecs-task-runner#v0.0.0: parameter-name: "test-parameter" - script: "/bin/migrate" + command: "/bin/migrate" timeout: 900 ``` @@ -18,8 +18,8 @@ steps: ### `parameter-name` (Required, string) The name or ARN of the parameter in Parameter Store that contains the task definition. -### `script` (Required, string) -The name of the script to run in the task. +### `command` (Required, string) +The name of the command to run in the task. ### `timeout` (Optional, integer) The timeout in seconds that the plugin will wait for the task to complete. If the task does not complete within this time, the plugin will fail. The task execution will continue to run in the background. @@ -43,7 +43,7 @@ This plugin comes with some assumed infrastructure that needs to be deployed bef - An IAM role for the BK agent to start the task - A Parameter Store parameter extending the task definition by providing entrypoint overrides and networking configuration - A log group for the task -- A security group for your service (this can be the [base-infrastructure-for-services](https://github.com/cultureamp/base-infrastructure-for-services) source security group +- A security group for your service (this can be the [base-infrastructure-for-services](https://github.com/cultureamp/base-infrastructure-for-services) source security group) This can be visualised below: ![The overall flow of this plugin and AWS resources](docs/images/diagram.svg) From 4b3cfac162408c367ea6f8ae8f4926e12e06944a Mon Sep 17 00:00:00 2001 From: Liam Stevens Date: Fri, 7 Feb 2025 15:52:02 +1000 Subject: [PATCH 10/11] fix: linting --- src/plugin/config_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugin/config_test.go b/src/plugin/config_test.go index bcef9a1..e0ea314 100644 --- a/src/plugin/config_test.go +++ b/src/plugin/config_test.go @@ -24,7 +24,7 @@ func TestFailOnMissingRequiredEnvironment(t *testing.T) { name: "all required parameters are unset", disabledEnvVars: []string{ "BUILDKITE_PLUGIN_ECS_TASK_RUNNER_PARAMETER_NAME", - "BUILDKITE_PLUGIN_ECS_TASK_RUNNER_COMMAND", + "BUILDKITE_PLUGIN_ECS_TASK_RUNNER_COMMAND", "BUILDKITE_PLUGIN_ECS_TASK_RUNNER_TIMEOUT", }, enabledEnvVars: map[string]string{}, From 23951dd68afe8804227d344b1684b55995ba9891 Mon Sep 17 00:00:00 2001 From: Liam Stevens Date: Mon, 10 Feb 2025 11:18:33 +1000 Subject: [PATCH 11/11] fix: update test reasoning --- src/aws/ecs_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aws/ecs_test.go b/src/aws/ecs_test.go index 69237b6..40eeeb1 100644 --- a/src/aws/ecs_test.go +++ b/src/aws/ecs_test.go @@ -403,7 +403,7 @@ func TestWaitForCompletion(t *testing.T) { }, "slowpoke": { mockWaitForOutput: func(context.Context, *ecs.DescribeTasksInput, time.Duration, ...func(*ecs.TasksStoppedWaiterOptions)) (*ecs.DescribeTasksOutput, error) { - return nil, errors.New("task timed out: computer is full of beanz") + return nil, errors.New("task timed out: computer still thinking") }, }, } @@ -438,7 +438,7 @@ func TestWaitForCompletion(t *testing.T) { name: "given a task that times out, it should return an error", input: "arn:aws:ecs:us-west-2:123456789012:task/test-cluster/07cc583696bd44e0be450bff7314ddaf", waiter: mockedWaiter["slowpoke"], - expected: expectedReturn{nil, errors.New("task timed out: computer is full of beanz")}, + expected: expectedReturn{nil, errors.New("task timed out: computer still thinking")}, }, }