From 5f74d7df7f2810508db4b2e60cd485d59f10cac7 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Thu, 7 Dec 2023 12:30:36 +0100 Subject: [PATCH] Fix optimization of PROD image building side-effect The change #35856 optimized waiting time before PROD image builds start - rather than waiting for full constratints generation, the PROD image building just used source constraints generated right after building the CI image quickly. This is fine for main because there we install airflow and packages using constraints from sources, but for release branches we use the provider constraints - in order to be able to install providers from PyPI rather than from sources. This means that we have to wait for constraints generation to complete before we start building PROD images - because we need to download the constraints generated there to use them. Unfortunately GitHub Actions do not have conditional dependencies depending on where the workflow is run - so instead we have to effectively duplicate PROD build steps and skip steps in them instead. --- .github/actions/build-prod-images/action.yml | 7 + .github/workflows/ci.yml | 142 +++++++++++++++++- airflow/providers/installed_providers.txt | 1 + .../params/build_prod_params.py | 2 +- 4 files changed, 143 insertions(+), 9 deletions(-) diff --git a/.github/actions/build-prod-images/action.yml b/.github/actions/build-prod-images/action.yml index f038234087c4e..14a5aa8de90c6 100644 --- a/.github/actions/build-prod-images/action.yml +++ b/.github/actions/build-prod-images/action.yml @@ -66,6 +66,13 @@ runs: with: name: source-constraints path: ./docker-context-files + if: ${{ inputs.build-provider-packages == 'true' }} + - name: "Download constraints from the Generate & Verify build" + uses: actions/download-artifact@v3 + with: + name: constraints + path: ./docker-context-files + if: ${{ inputs.build-provider-packages != 'true' }} - name: "Build & Push PROD images with source providers ${{ env.IMAGE_TAG }}:${{ env.PYTHON_VERSIONS }}" shell: bash run: > diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 09d672546a626..333c1e65db716 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1589,7 +1589,7 @@ jobs: build-prod-images: timeout-minutes: 80 name: > - ${{needs.build-info.outputs.build-job-description}} PROD images + ${{needs.build-info.outputs.build-job-description}} PROD images (main) ${{needs.build-info.outputs.all-python-versions-list-as-string}} runs-on: ${{fromJSON(needs.build-info.outputs.runs-on)}} needs: [build-info, build-ci-images] @@ -1605,20 +1605,28 @@ jobs: steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: needs.build-info.outputs.in-workflow-build == 'true' + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch == 'main' - uses: actions/checkout@v4 with: ref: ${{ needs.build-info.outputs.targetCommitSha }} persist-credentials: false - if: needs.build-info.outputs.in-workflow-build == 'true' + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch == 'main' - name: "Install Breeze" uses: ./.github/actions/breeze - if: needs.build-info.outputs.in-workflow-build == 'true' + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch == 'main' - name: > Build PROD Images ${{needs.build-info.outputs.all-python-versions-list-as-string}}:${{env.IMAGE_TAG}} uses: ./.github/actions/build-prod-images - if: needs.build-info.outputs.in-workflow-build == 'true' + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch == 'main' with: build-provider-packages: ${{ needs.build-info.outputs.default-branch == 'main' }} chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} @@ -1631,7 +1639,7 @@ jobs: build-prod-images-bullseye: timeout-minutes: 80 name: > - Build Bullseye PROD images + Build Bullseye PROD images (main) ${{needs.build-info.outputs.all-python-versions-list-as-string}} runs-on: ${{fromJSON(needs.build-info.outputs.runs-on)}} needs: [build-info, build-ci-images] @@ -1648,17 +1656,135 @@ jobs: steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch == 'main' + - uses: actions/checkout@v3 + with: + ref: ${{ needs.build-info.outputs.targetCommitSha }} + persist-credentials: false + submodules: recursive + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch == 'main' + - name: "Install Breeze" + uses: ./.github/actions/breeze + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch == 'main' + - name: > + Build Bullseye PROD Images + ${{needs.build-info.outputs.all-python-versions-list-as-string}}:${{env.IMAGE_TAG}} + uses: ./.github/actions/build-prod-images + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch == 'main' + with: + build-provider-packages: ${{ needs.build-info.outputs.default-branch == 'main' }} + chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} + env: + UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} + DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }} + PYTHON_VERSIONS: ${{needs.build-info.outputs.all-python-versions-list-as-string}} + DEBUG_RESOURCES: ${{ needs.build-info.outputs.debug-resources }} + DEBIAN_VERSION: "bullseye" + # Do not override the "bookworm" image - just push a new bullseye image + # TODO: improve caching for that build + IMAGE_TAG: "bullseye-${{ github.event.pull_request.head.sha || github.sha }}" + + build-prod-images-release-branch: + timeout-minutes: 80 + name: > + ${{needs.build-info.outputs.build-job-description}} PROD images (v2_*_test) + ${{needs.build-info.outputs.all-python-versions-list-as-string}} + runs-on: ${{fromJSON(needs.build-info.outputs.runs-on)}} + needs: [build-info, generate-constraints] + env: + DEFAULT_BRANCH: ${{ needs.build-info.outputs.default-branch }} + DEFAULT_CONSTRAINTS_BRANCH: ${{ needs.build-info.outputs.default-constraints-branch }} + RUNS_ON: "${{needs.build-info.outputs.runs-on}}" + BACKEND: sqlite + VERSION_SUFFIX_FOR_PYPI: "dev0" + DEBUG_RESOURCES: ${{needs.build-info.outputs.debug-resources}} + # Force more parallelism for build even on public images + PARALLELISM: 6 + steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch != 'main' + - uses: actions/checkout@v4 + with: + ref: ${{ needs.build-info.outputs.targetCommitSha }} + persist-credentials: false + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch != 'main' + - name: "Install Breeze" + uses: ./.github/actions/breeze + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch != 'main' + - name: > + Build PROD Images + ${{needs.build-info.outputs.all-python-versions-list-as-string}}:${{env.IMAGE_TAG}} + uses: ./.github/actions/build-prod-images + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch != 'main' + with: + build-provider-packages: ${{ needs.build-info.outputs.default-branch == 'main' }} + chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} + env: + UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} + DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }} + PYTHON_VERSIONS: ${{needs.build-info.outputs.all-python-versions-list-as-string}} + DEBUG_RESOURCES: ${{ needs.build-info.outputs.debug-resources }} + + build-prod-images-bullseye-release-branch: + timeout-minutes: 80 + name: > + Build Bullseye PROD images (v2_*_test) + ${{needs.build-info.outputs.all-python-versions-list-as-string}} + runs-on: ${{fromJSON(needs.build-info.outputs.runs-on)}} + needs: [build-info, generate-constraints] + if: needs.build-info.outputs.canary-run == 'true' + env: + DEFAULT_BRANCH: ${{ needs.build-info.outputs.default-branch }} + DEFAULT_CONSTRAINTS_BRANCH: ${{ needs.build-info.outputs.default-constraints-branch }} + RUNS_ON: "${{needs.build-info.outputs.runs-on}}" + BACKEND: sqlite + VERSION_SUFFIX_FOR_PYPI: "dev0" + DEBUG_RESOURCES: ${{needs.build-info.outputs.debug-resources}} + # Force more parallelism for build even on public images + PARALLELISM: 6 + steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch != 'main' - uses: actions/checkout@v3 with: ref: ${{ needs.build-info.outputs.targetCommitSha }} persist-credentials: false submodules: recursive + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch != 'main' - name: "Install Breeze" uses: ./.github/actions/breeze + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch != 'main' - name: > Build Bullseye PROD Images ${{needs.build-info.outputs.all-python-versions-list-as-string}}:${{env.IMAGE_TAG}} uses: ./.github/actions/build-prod-images + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch != 'main' with: build-provider-packages: ${{ needs.build-info.outputs.default-branch == 'main' }} chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} @@ -1676,7 +1802,7 @@ jobs: timeout-minutes: 80 name: "Wait for PROD images" runs-on: ${{fromJSON(needs.build-info.outputs.runs-on)}} - needs: [build-info, wait-for-ci-images, build-prod-images] + needs: [build-info, wait-for-ci-images, build-prod-images, build-prod-images-release-branch] if: needs.build-info.outputs.prod-image-build == 'true' env: RUNS_ON: "${{needs.build-info.outputs.runs-on}}" @@ -1698,7 +1824,7 @@ jobs: if: needs.build-info.outputs.in-workflow-build == 'false' - name: Wait for PROD images ${{ env.PYTHON_VERSIONS }}:${{ env.IMAGE_TAG }} # We wait for the images to be available either from "build-images.yml' run as pull_request_target - # or from build-prod-images above. + # or from build-prod-images (or build-prod-images-release-branch) above. # We are utilising single job to wait for all images because this job merely waits # For the images to be available. run: breeze prod-image pull --wait-for-image --run-in-parallel diff --git a/airflow/providers/installed_providers.txt b/airflow/providers/installed_providers.txt index 410fc8da83e13..d416a8a47b68e 100644 --- a/airflow/providers/installed_providers.txt +++ b/airflow/providers/installed_providers.txt @@ -1,6 +1,7 @@ amazon celery cncf.kubernetes +common.io common.sql docker elasticsearch diff --git a/dev/breeze/src/airflow_breeze/params/build_prod_params.py b/dev/breeze/src/airflow_breeze/params/build_prod_params.py index ed9a6b674106f..b880f2b0434d2 100644 --- a/dev/breeze/src/airflow_breeze/params/build_prod_params.py +++ b/dev/breeze/src/airflow_breeze/params/build_prod_params.py @@ -191,7 +191,7 @@ def install_postgres_client(self) -> str: @property def docker_context_files(self) -> str: - return "docker-context-files" + return "./docker-context-files" @property def airflow_image_kubernetes(self) -> str: