From b7ba796a9699a53bfb13272198fbdd97ef402919 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Wed, 24 Jan 2024 09:51:18 -0800 Subject: [PATCH 1/2] fix(docker): improve docker tags to be cleared and avoid conflicts --- .asf.yaml | 13 +- .github/workflows/docker-release.yml | 12 +- .github/workflows/docker.yml | 29 +- .github/workflows/pr-lint.yml | 2 +- .github/workflows/superset-e2e.yml | 4 + .../superset-python-integrationtest.yml | 4 + .../installing-superset-from-scratch.mdx | 2 +- pytest.ini | 1 + scripts/build_docker.py | 253 ++++++++++++++++ scripts/docker_build_push.sh | 156 ---------- tests/unit_tests/fixtures/bash_mock.py | 12 - tests/unit_tests/scripts/docker_build.py | 277 ++++++++++++++++++ .../scripts/docker_build_push_test.py | 66 ----- tests/unit_tests/utils/docker.py | 269 +++++++++++++++++ 14 files changed, 847 insertions(+), 253 deletions(-) create mode 100755 scripts/build_docker.py delete mode 100755 scripts/docker_build_push.sh create mode 100644 tests/unit_tests/scripts/docker_build.py delete mode 100644 tests/unit_tests/scripts/docker_build_push_test.py create mode 100644 tests/unit_tests/utils/docker.py diff --git a/.asf.yaml b/.asf.yaml index ab980e0c3fb2b..b16ca8b579a46 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -59,8 +59,10 @@ github: # strict means "Require branches to be up to date before merging". strict: false # contexts are the names of checks that must pass + # unfortunately AFAICT for `matrix:` jobs, we have to itemize every + # combination here. contexts: - - check + - lint-check - cypress-matrix (1, chrome) - cypress-matrix (2, chrome) - cypress-matrix (3, chrome) @@ -72,6 +74,15 @@ github: - test-postgres (3.9) - test-postgres (3.10) - test-sqlite (3.9) + - ephemeral-docker-build + - docker-build (dev, linux/amd64) + - docker-build (lean, linux/amd64) + - docker-build (py310, linux/arm64) + - docker-build (py310, linux/amd64) + - docker-build (websocket, linux/arm64) + - docker-build (websocket, linux/amd64) + - docker-build (dockerize, linux/arm64) + - docker-build (dockerize, linux/amd64) required_pull_request_reviews: dismiss_stale_reviews: false diff --git a/.github/workflows/docker-release.yml b/.github/workflows/docker-release.yml index 3e205fdeffaaf..fe355c54dc50e 100644 --- a/.github/workflows/docker-release.yml +++ b/.github/workflows/docker-release.yml @@ -24,8 +24,15 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - target: ["dev", "lean", "lean310", "websocket", "dockerize"] + build_preset: ["dev", "lean", "py310", "websocket", "dockerize"] platform: ["linux/amd64", "linux/arm64"] + exclude: + # disabling because slow! no python wheels for arm/py39 and + # QEMU is slow! + - build_preset: "dev" + platform: "linux/arm64" + - build_preset: "lean" + platform: "linux/arm64" fail-fast: false steps: - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" @@ -47,4 +54,5 @@ jobs: DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} run: | GITHUB_RELEASE_TAG_NAME="${{ github.event.release.tag_name }}" - ./scripts/docker_build_push.sh "$GITHUB_RELEASE_TAG_NAME" ${{ matrix.target }} ${{ matrix.platform }} + pip install click + ./scripts/build_docker.py ${{ matrix.build_preset }} release --platform ${{ matrix.platform }} diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index c4a89c4358ba9..df92ca5639be3 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -7,20 +7,24 @@ on: pull_request: types: [synchronize, opened, reopened, ready_for_review] +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }} + cancel-in-progress: true + jobs: docker-build: name: docker-build runs-on: ubuntu-latest strategy: matrix: - target: ["dev", "lean", "lean310", "websocket", "dockerize"] + build_preset: ["dev", "lean", "py310", "websocket", "dockerize"] platform: ["linux/amd64", "linux/arm64"] exclude: # disabling because slow! no python wheels for arm/py39 and # QEMU is slow! - - target: "dev" + - build_preset: "dev" platform: "linux/arm64" - - target: "lean" + - build_preset: "lean" platform: "linux/arm64" fail-fast: false steps: @@ -41,11 +45,11 @@ jobs: DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USER }} DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} run: | - ./scripts/docker_build_push.sh "" ${{ matrix.target }} ${{ matrix.platform }} + pip install click + ./scripts/build_docker.py ${{ matrix.build_preset }} ${{ github.event_name }} --platform ${{ matrix.platform }} ephemeral-docker-build: - name: docker-build runs-on: ubuntu-latest steps: - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" @@ -61,19 +65,16 @@ jobs: - name: Build ephemeral env image if: github.event_name == 'pull_request' + env: + DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USER }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} run: | + pip install click + ./scripts/build_docker.py "ci" "pull_request" --platform linux/amd64 mkdir -p ./build echo ${{ github.sha }} > ./build/SHA echo ${{ github.event.pull_request.number }} > ./build/PR-NUM - docker buildx build --target ci \ - --load \ - --cache-from=type=registry,ref=apache/superset:lean \ - -t ${{ github.sha }} \ - -t "pr-${{ github.event.pull_request.number }}" \ - --platform linux/amd64 \ - --label "build_actor=${GITHUB_ACTOR}" \ - . - docker save ${{ github.sha }} | gzip > ./build/${{ github.sha }}.tar.gz + docker save ${{ github.sha }}-ci | gzip > ./build/${{ github.sha }}.tar.gz - name: Upload build artifacts if: github.event_name == 'pull_request' diff --git a/.github/workflows/pr-lint.yml b/.github/workflows/pr-lint.yml index 5283e138c1a0a..156a5fbb1ba7d 100644 --- a/.github/workflows/pr-lint.yml +++ b/.github/workflows/pr-lint.yml @@ -9,7 +9,7 @@ on: types: [opened, edited, reopened, synchronize] jobs: - check: + lint-check: runs-on: ubuntu-latest permissions: contents: read diff --git a/.github/workflows/superset-e2e.yml b/.github/workflows/superset-e2e.yml index 0f01a1be157f0..c0de1e1cd36fb 100644 --- a/.github/workflows/superset-e2e.yml +++ b/.github/workflows/superset-e2e.yml @@ -7,6 +7,10 @@ on: pull_request: types: [synchronize, opened, reopened, ready_for_review] +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }} + cancel-in-progress: true + jobs: cypress-matrix: runs-on: ubuntu-20.04 diff --git a/.github/workflows/superset-python-integrationtest.yml b/.github/workflows/superset-python-integrationtest.yml index 76187250f4c7d..e518dff7be648 100644 --- a/.github/workflows/superset-python-integrationtest.yml +++ b/.github/workflows/superset-python-integrationtest.yml @@ -8,6 +8,10 @@ on: pull_request: types: [synchronize, opened, reopened, ready_for_review] +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }} + cancel-in-progress: true + jobs: test-mysql: runs-on: ubuntu-20.04 diff --git a/docs/docs/installation/installing-superset-from-scratch.mdx b/docs/docs/installation/installing-superset-from-scratch.mdx index 7ad19e8cb8ad8..0dadacbc40e7e 100644 --- a/docs/docs/installation/installing-superset-from-scratch.mdx +++ b/docs/docs/installation/installing-superset-from-scratch.mdx @@ -67,7 +67,7 @@ brew install readline pkg-config libffi openssl mysql postgresql@14 You should install a recent version of Python. Refer to the [setup.py](https://github.com/apache/superset/blob/master/setup.py) file for a list of Python versions officially supported by Superset. We'd recommend using a Python version manager like [pyenv](https://github.com/pyenv/pyenv) (and also [pyenv-virtualenv](https://github.com/pyenv/pyenv-virtualenv)). :::tip -To identify the Python version used by the official docker image, see the [Dockerfile](https://github.com/apache/superset/blob/master/Dockerfile). Additional docker images published for newer versions of Python can be found in [this file](https://github.com/apache/superset/blob/master/scripts/docker_build_push.sh). +To identify the Python version used by the official docker image, see the [Dockerfile](https://github.com/apache/superset/blob/master/Dockerfile). Additional docker images published for newer versions of Python can be found in [this file](https://github.com/apache/superset/blob/master/scripts/build_docker.py). ::: Let's also make sure we have the latest version of `pip` and `setuptools`: diff --git a/pytest.ini b/pytest.ini index 3fec965e72ae6..37ee2f0dd8140 100644 --- a/pytest.ini +++ b/pytest.ini @@ -18,3 +18,4 @@ testpaths = tests python_files = *_test.py test_*.py *_tests.py *viz/utils.py +addopts = -p no:warnings diff --git a/scripts/build_docker.py b/scripts/build_docker.py new file mode 100755 index 0000000000000..f2323eb7eddc8 --- /dev/null +++ b/scripts/build_docker.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python3 + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import os +import re +import subprocess +from textwrap import dedent + +import click + +REPO = "apache/superset" +CACHE_REPO = f"{REPO}-cache" +BASE_PY_IMAGE = "3.9-slim-bookworm" + + +def run_cmd(command: str) -> str: + process = subprocess.Popen( + command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True + ) + + output = "" + if process.stdout is not None: + for line in iter(process.stdout.readline, ""): + print(line.strip()) # Print the line to stdout in real-time + output += line + + process.wait() # Wait for the subprocess to finish + return output + + +def get_git_sha() -> str: + return run_cmd("git rev-parse HEAD").strip() + + +def get_build_context_ref(build_context: str) -> str: + """ + Given a context, return a ref: + - if context is pull_request, return the PR's id + - if context is push, return the branch + - if context is release, return the release ref + """ + + event = os.getenv("GITHUB_EVENT_NAME") + github_ref = os.getenv("GITHUB_REF", "") + + if event == "pull_request": + github_head_ref = os.getenv("GITHUB_HEAD_REF", "") + return re.sub("[^a-zA-Z0-9]", "-", github_head_ref)[:40] + elif event == "release": + return re.sub("refs/tags/", "", github_ref)[:40] + elif event == "push": + return re.sub("[^a-zA-Z0-9]", "-", re.sub("refs/heads/", "", github_ref))[:40] + return "" + + +def is_latest_release(release: str) -> bool: + output = run_cmd(f"./scripts/tag_latest_release.sh {release} --dry-run") or "" + return "SKIP_TAG::false" in output + + +def make_docker_tag(l: list[str]) -> str: + return f"{REPO}:" + "-".join([o for o in l if o]) + + +def get_docker_tags( + build_preset: str, + build_platform: str, + sha: str, + build_context: str, + build_context_ref: str, +) -> set[str]: + """ + Return a set of tags given a given build context + """ + tags: set[str] = set() + tag_chunks: list[str] = [] + + short_build_platform = build_platform.replace("linux/", "").replace("64", "") + + is_latest = is_latest_release(build_context_ref) + + if build_preset != "lean": + # Always add the preset_build name if different from default (lean) + tag_chunks += [build_preset] + + if short_build_platform != "amd": + # Always a platform indicator if different from default (amd) + tag_chunks += [short_build_platform] + + # Always craft a tag for the SHA + tags.add(make_docker_tag([sha] + tag_chunks)) + # also a short SHA, cause it's nice + tags.add(make_docker_tag([sha[:7]] + tag_chunks)) + + if build_context == "release": + # add a release tag + tags.add(make_docker_tag([build_context_ref] + tag_chunks)) + if is_latest: + # add a latest tag + tags.add(make_docker_tag(["latest"] + tag_chunks)) + elif build_context == "push" and build_context_ref == "master": + tags.add(make_docker_tag(["master"] + tag_chunks)) + return tags + + +def get_docker_command( + build_preset: str, + build_platform: str, + is_authenticated: bool, + sha: str, + build_context: str, + build_context_ref: str, +) -> str: + tag = "" + build_target = "" + py_ver = BASE_PY_IMAGE + docker_context = "." + + if build_preset == "dev": + build_target = "dev" + elif build_preset == "lean": + build_target = "lean" + elif build_preset == "py310": + build_target = "lean" + py_ver = "3.10-slim-bookworm" + elif build_preset == "websocket": + build_target = "" + docker_context = "superset-websocket" + elif build_preset == "ci": + build_target = "ci" + elif build_preset == "dockerize": + build_target = "" + docker_context = "-f dockerize.Dockerfile ." + else: + print(f"Invalid build preset: {build_preset}") + exit(1) + + # Try to get context reference if missing + if not build_context_ref: + build_context_ref = get_build_context_ref(build_context) + + tags = get_docker_tags( + build_preset, + build_platform, + sha, + build_context, + build_context_ref, + ) + docker_tags = ("\\\n" + 8 * " ").join([f"-t {s} " for s in tags]) + + docker_args = "--load" if not is_authenticated else "--push" + target_argument = f"--target {build_target}" if build_target else "" + short_build_platform = build_platform.replace("linux/", "").replace("64", "") + cache_ref = f"{CACHE_REPO}:{py_ver}-{short_build_platform}" + cache_from_arg = f"--cache-from=type=registry,ref={cache_ref}" + cache_to_arg = ( + f"--cache-to=type=registry,mode=max,ref={cache_ref}" if is_authenticated else "" + ) + build_arg = f"--build-arg PY_VER={py_ver}" if py_ver else "" + actor = os.getenv("GITHUB_ACTOR") + + return dedent( + f"""\ + docker buildx build \\ + {docker_args} \\ + {docker_tags} \\ + {cache_from_arg} \\ + {cache_to_arg} \\ + {build_arg} \\ + --platform {build_platform} \\ + --label sha={sha} \\ + --label target={build_target} \\ + --label build_trigger={build_context} \\ + --label base={py_ver} \\ + --label build_actor={actor} \\ + {docker_context}""" + ) + + +@click.command() +@click.argument( + "build_preset", + type=click.Choice(["lean", "dev", "dockerize", "websocket", "py310", "ci"]), +) +@click.argument("build_context", type=click.Choice(["push", "pull_request", "release"])) +@click.option( + "--platform", + type=click.Choice(["linux/arm64", "linux/amd64"]), + default="linux/amd64", +) +@click.option("--build_context_ref", help="a reference to the pr, release or branch") +@click.option("--dry-run", is_flag=True, help="Run the command in dry-run mode.") +def main( + build_preset: str, + build_context: str, + build_context_ref: str, + platform: str, + dry_run: bool, +) -> None: + """ + This script executes docker build and push commands based on given arguments. + """ + + is_authenticated = ( + True if os.getenv("DOCKERHUB_TOKEN") and os.getenv("DOCKERHUB_USER") else False + ) + build_context_ref = get_build_context_ref(build_context) + + docker_build_command = get_docker_command( + build_preset, + platform, + is_authenticated, + get_git_sha(), + build_context, + get_build_context_ref(build_context), + ) + + if not dry_run: + print("Executing Docker Build Command:") + print(docker_build_command) + script = "" + if os.getenv("DOCKERHUB_USER"): + script = dedent( + f"""\ + docker logout + docker login --username "{os.getenv("DOCKERHUB_USER")}" --password "{os.getenv("DOCKERHUB_TOKEN")}" + DOCKER_ARGS="--push" + """ + ) + script = script + docker_build_command + stdout = run_cmd(script) + else: + print("Dry Run - Docker Build Command:") + print(docker_build_command) + + +if __name__ == "__main__": + main() diff --git a/scripts/docker_build_push.sh b/scripts/docker_build_push.sh deleted file mode 100755 index 3d0271cb2b4d2..0000000000000 --- a/scripts/docker_build_push.sh +++ /dev/null @@ -1,156 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -set -eo pipefail - -GITHUB_RELEASE_TAG_NAME="$1" -TARGET="$2" -BUILD_PLATFORM="$3" # should be either 'linux/amd64' or 'linux/arm64' - -# Common variables -SHA=$(git rev-parse HEAD) -REPO_NAME="apache/superset" -DOCKER_ARGS="--load" # default args, change as needed -DOCKER_CONTEXT="." - - -if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then - REFSPEC=$(echo "${GITHUB_HEAD_REF}" | sed 's/[^a-zA-Z0-9]/-/g' | head -c 40) - PR_NUM=$(echo "${GITHUB_REF}" | sed 's:refs/pull/::' | sed 's:/merge::') - LATEST_TAG="pr-${PR_NUM}" -elif [[ "${GITHUB_EVENT_NAME}" == "release" ]]; then - REFSPEC=$(echo "${GITHUB_REF}" | sed 's:refs/tags/::' | head -c 40) - LATEST_TAG="${REFSPEC}" -else - REFSPEC=$(echo "${GITHUB_REF}" | sed 's:refs/heads/::' | sed 's/[^a-zA-Z0-9]/-/g' | head -c 40) - LATEST_TAG="${REFSPEC}" -fi - - -if [[ "${REFSPEC}" == "master" ]]; then - LATEST_TAG="master" -fi - -# get the latest release tag -if [ -n "${GITHUB_RELEASE_TAG_NAME}" ]; then - output=$(source ./scripts/tag_latest_release.sh "${GITHUB_RELEASE_TAG_NAME}" --dry-run) || true - SKIP_TAG=$(echo "${output}" | grep "SKIP_TAG" | cut -d'=' -f2) - if [[ "${SKIP_TAG}" == "SKIP_TAG::false" ]]; then - LATEST_TAG="latest" - fi -fi - -if [[ "${TEST_ENV}" == "true" ]]; then - # don't run the build in test environment - echo "LATEST_TAG is ${LATEST_TAG}" - exit 0 -fi - -# for the dev image, it's ok to tag master as latest-dev -# for production, we only want to tag the latest official release as latest -if [ "${LATEST_TAG}" = "master" ]; then - DEV_TAG="${REPO_NAME}:latest-dev" -else - DEV_TAG="${REPO_NAME}:${LATEST_TAG}-dev" -fi - -BUILD_ARG="3.9-slim-bookworm" - -# Replace '/' with '-' in BUILD_PLATFORM -SAFE_BUILD_PLATFORM=$(echo "${BUILD_PLATFORM}" | sed 's/\//-/g') -MAIN_UNIQUE_TAG="${REPO_NAME}:${SHA}-${TARGET}-${SAFE_BUILD_PLATFORM}-${BUILD_ARG}" - -case "${TARGET}" in - "dev") - DOCKER_TAGS="-t ${MAIN_UNIQUE_TAG} -t ${REPO_NAME}:${SHA}-dev -t ${REPO_NAME}:${REFSPEC}-dev -t ${DEV_TAG}" - BUILD_TARGET="dev" - ;; - "lean") - DOCKER_TAGS="-t ${MAIN_UNIQUE_TAG} -t ${REPO_NAME}:${SHA} -t ${REPO_NAME}:${REFSPEC} -t ${REPO_NAME}:${LATEST_TAG}" - BUILD_TARGET="lean" - ;; - "lean310") - DOCKER_TAGS="-t ${MAIN_UNIQUE_TAG} -t ${REPO_NAME}:${SHA}-py310 -t ${REPO_NAME}:${REFSPEC}-py310 -t ${REPO_NAME}:${LATEST_TAG}-py310" - BUILD_TARGET="lean" - BUILD_ARG="3.10-slim-bookworm" - ;; - "websocket") - DOCKER_TAGS="-t ${MAIN_UNIQUE_TAG} -t ${REPO_NAME}:${SHA}-websocket -t ${REPO_NAME}:${REFSPEC}-websocket -t ${REPO_NAME}:${LATEST_TAG}-websocket" - BUILD_TARGET="" - DOCKER_CONTEXT="superset-websocket" - ;; - "dockerize") - DOCKER_TAGS="-t ${MAIN_UNIQUE_TAG} -t ${REPO_NAME}:dockerize" - BUILD_TARGET="" - DOCKER_CONTEXT="-f dockerize.Dockerfile ." - ;; - *) - echo "Invalid TARGET: ${TARGET}" - exit 1 - ;; -esac - -cat< Date: Tue, 30 Jan 2024 21:54:00 -0800 Subject: [PATCH 2/2] dummy no-op checks for lingering checks --- .github/workflows/no-op.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/workflows/no-op.yml b/.github/workflows/no-op.yml index 835063640aa96..4fecc2ba94030 100644 --- a/.github/workflows/no-op.yml +++ b/.github/workflows/no-op.yml @@ -52,3 +52,17 @@ jobs: run: | echo "This is a no-op step for python-lint to ensure a successful status." exit 0 + check: + runs-on: ubuntu-latest + steps: + - name: No-op for frontend-build + run: | + echo "This is a no-op step for frontend-build to ensure a successful status." + exit 0 + docker-build: + runs-on: ubuntu-latest + steps: + - name: No-op for frontend-build + run: | + echo "This is a no-op step for frontend-build to ensure a successful status." + exit 0