From a5ee12b42a6b26b3824630050ff48b57b5252c4f Mon Sep 17 00:00:00 2001 From: Jonathan Budzenski Date: Mon, 1 Aug 2022 14:23:22 -0500 Subject: [PATCH 1/3] [artifacts] Improve cloud deployment error handling --- .buildkite/pipelines/artifacts.yml | 7 +++- .buildkite/scripts/steps/artifacts/cloud.sh | 37 ++++++++++++--------- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/.buildkite/pipelines/artifacts.yml b/.buildkite/pipelines/artifacts.yml index 8e08c736694e8..8ff24c3f2b063 100644 --- a/.buildkite/pipelines/artifacts.yml +++ b/.buildkite/pipelines/artifacts.yml @@ -73,13 +73,18 @@ steps: - command: .buildkite/scripts/steps/artifacts/cloud.sh label: 'Cloud Deployment' - soft_fail: true + soft_fail: + - exit_status: 255 agents: queue: n2-2 timeout_in_minutes: 30 if: "build.env('RELEASE_BUILD') == null || build.env('RELEASE_BUILD') == '' || build.env('RELEASE_BUILD') == 'false'" retry: automatic: + # Matches buildkite forced agent shutdown (timeout_in_minutes) and ecctl create failures + # This is typically caused by trying to use an unavailable on Cloud stack version + - exit_status: 255 + limit: 0 - exit_status: '*' limit: 1 diff --git a/.buildkite/scripts/steps/artifacts/cloud.sh b/.buildkite/scripts/steps/artifacts/cloud.sh index 5bf2285ab162a..3ea9a67d08f7b 100644 --- a/.buildkite/scripts/steps/artifacts/cloud.sh +++ b/.buildkite/scripts/steps/artifacts/cloud.sh @@ -12,28 +12,32 @@ mkdir -p target download_artifact "kibana-$FULL_VERSION-linux-x86_64.tar.gz" ./target --build "${KIBANA_BUILD_ID:-$BUILDKITE_BUILD_ID}" -node scripts/build \ - --skip-initialize \ - --skip-generic-folders \ - --skip-platform-folders \ - --skip-archives \ - --docker-images \ - --skip-docker-ubi \ - --skip-docker-ubuntu \ - --skip-docker-contexts - -docker load --input target/kibana-cloud-$FULL_VERSION-docker-image.tar.gz - TAG="$FULL_VERSION-$GIT_COMMIT" -KIBANA_BASE_IMAGE="docker.elastic.co/kibana-ci/kibana-cloud:$FULL_VERSION" KIBANA_TEST_IMAGE="docker.elastic.co/kibana-ci/kibana-cloud:$TAG" -docker tag "$KIBANA_BASE_IMAGE" "$KIBANA_TEST_IMAGE" - echo "$KIBANA_DOCKER_PASSWORD" | docker login -u "$KIBANA_DOCKER_USERNAME" --password-stdin docker.elastic.co trap 'docker logout docker.elastic.co' EXIT -docker push "$KIBANA_TEST_IMAGE" +set +e +DISTRIBUTION_EXISTS=$(docker manifest inspect $KIBANA_TEST_IMAGE &> /dev/null; echo $?) +set -e + +if [ $DISTRIBUTION_EXISTS -eq 0 ]; then + echo "Distribution already exists, skipping build" +else + node scripts/build \ + --skip-initialize \ + --skip-generic-folders \ + --skip-platform-folders \ + --skip-archives \ + --docker-images \ + --docker-tag-qualifier="$GIT_COMMIT" \ + --docker-push \ + --skip-docker-ubi \ + --skip-docker-ubuntu \ + --skip-docker-contexts +fi + docker logout docker.elastic.co echo "--- Create deployment" @@ -62,6 +66,7 @@ function shutdown { trap "shutdown" EXIT ecctl deployment create --track --output json --file "$DEPLOYMENT_SPEC" > "$LOGS" + CLOUD_DEPLOYMENT_USERNAME=$(jq -r --slurp '.[]|select(.resources).resources[] | select(.credentials).credentials.username' "$LOGS") CLOUD_DEPLOYMENT_PASSWORD=$(jq -r --slurp '.[]|select(.resources).resources[] | select(.credentials).credentials.password' "$LOGS") CLOUD_DEPLOYMENT_ID=$(jq -r --slurp '.[0].id' "$LOGS") From 06aeeec9c1815ba1715fd230ced015fb8096926a Mon Sep 17 00:00:00 2001 From: Jonathan Budzenski Date: Thu, 1 Sep 2022 10:19:44 -0500 Subject: [PATCH 2/3] Update .buildkite/scripts/steps/artifacts/cloud.sh Co-authored-by: Spencer --- .buildkite/scripts/steps/artifacts/cloud.sh | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.buildkite/scripts/steps/artifacts/cloud.sh b/.buildkite/scripts/steps/artifacts/cloud.sh index 3ea9a67d08f7b..4d2317ce0b6c7 100644 --- a/.buildkite/scripts/steps/artifacts/cloud.sh +++ b/.buildkite/scripts/steps/artifacts/cloud.sh @@ -18,11 +18,7 @@ KIBANA_TEST_IMAGE="docker.elastic.co/kibana-ci/kibana-cloud:$TAG" echo "$KIBANA_DOCKER_PASSWORD" | docker login -u "$KIBANA_DOCKER_USERNAME" --password-stdin docker.elastic.co trap 'docker logout docker.elastic.co' EXIT -set +e -DISTRIBUTION_EXISTS=$(docker manifest inspect $KIBANA_TEST_IMAGE &> /dev/null; echo $?) -set -e - -if [ $DISTRIBUTION_EXISTS -eq 0 ]; then +if docker manifest inspect $KIBANA_TEST_IMAGE &> /dev/null; then echo "Distribution already exists, skipping build" else node scripts/build \ From b774d164baebab3e67652d6f93018d85689452a3 Mon Sep 17 00:00:00 2001 From: Jonathan Budzenski Date: Thu, 1 Sep 2022 10:28:44 -0500 Subject: [PATCH 3/3] update retry codes --- .buildkite/pipelines/artifacts.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.buildkite/pipelines/artifacts.yml b/.buildkite/pipelines/artifacts.yml index 8ff24c3f2b063..b6d3cc9fc9b14 100644 --- a/.buildkite/pipelines/artifacts.yml +++ b/.buildkite/pipelines/artifacts.yml @@ -81,10 +81,15 @@ steps: if: "build.env('RELEASE_BUILD') == null || build.env('RELEASE_BUILD') == '' || build.env('RELEASE_BUILD') == 'false'" retry: automatic: - # Matches buildkite forced agent shutdown (timeout_in_minutes) and ecctl create failures - # This is typically caused by trying to use an unavailable on Cloud stack version + # Timeout and graceful shutdown | ecctl deployment create falure - exit_status: 255 limit: 0 + + # Timeout and forced shutdown + - exit_status: '-1' + limit: 0 + + # Test failures - exit_status: '*' limit: 1