Skip to content

Commit

Permalink
Proxy DinD calls of airbyte-ci through Tailscale to access pull throu…
Browse files Browse the repository at this point in the history
…gh cache (#32965)
  • Loading branch information
Conor authored Dec 6, 2023
1 parent f73827e commit ff96866
Show file tree
Hide file tree
Showing 11 changed files with 137 additions and 24 deletions.
9 changes: 9 additions & 0 deletions .github/actions/run-dagger-pipeline/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ inputs:
docker_hub_password:
description: "Dockerhub password"
required: true
docker_registry_mirror_url:
description: "Docker registry mirror URL (not including http or https)"
required: false
# Do not use http or https here
default: "ci-dockerhub-registry.airbyte.com"
options:
description: "Options for the subcommand"
required: false
Expand Down Expand Up @@ -69,6 +74,8 @@ inputs:
s3_build_cache_secret_key:
description: "Gradle S3 Build Cache AWS secret key"
required: false
tailscale_auth_key:
description: "Tailscale auth key"
airbyte_ci_binary_url:
description: "URL to airbyte-ci binary"
required: false
Expand Down Expand Up @@ -130,3 +137,5 @@ runs:
S3_BUILD_CACHE_ACCESS_KEY_ID: ${{ inputs.s3_build_cache_access_key_id }}
S3_BUILD_CACHE_SECRET_KEY: ${{ inputs.s3_build_cache_secret_key }}
CI: "True"
TAILSCALE_AUTH_KEY: ${{ inputs.tailscale_auth_key }}
DOCKER_REGISTRY_MIRROR_URL: ${{ inputs.docker_registry_mirror_url }}
6 changes: 6 additions & 0 deletions .github/workflows/airbyte-ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ jobs:
github_token: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }}
subcommand: "test airbyte-ci/connectors/connector_ops"
airbyte_ci_binary_url: ${{ inputs.airbyte_ci_binary_url || 'https://connectors.airbyte.com/airbyte-ci/releases/ubuntu/latest/airbyte-ci' }}
tailscale_auth_key: ${{ secrets.TAILSCALE_AUTH_KEY }}

- name: Run airbyte-ci/connectors/pipelines tests
id: run-airbyte-ci-connectors-pipelines-tests
Expand All @@ -89,6 +90,7 @@ jobs:
github_token: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }}
subcommand: "test airbyte-ci/connectors/pipelines"
airbyte_ci_binary_url: ${{ inputs.airbyte_ci_binary_url || 'https://connectors.airbyte.com/airbyte-ci/releases/ubuntu/latest/airbyte-ci' }}
tailscale_auth_key: ${{ secrets.TAILSCALE_AUTH_KEY }}

- name: Run airbyte-ci/connectors/base_images tests
id: run-airbyte-ci-connectors-base-images-tests
Expand All @@ -103,6 +105,7 @@ jobs:
github_token: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }}
subcommand: "test airbyte-ci/connectors/base_images"
airbyte_ci_binary_url: ${{ inputs.airbyte_ci_binary_url || 'https://connectors.airbyte.com/airbyte-ci/releases/ubuntu/latest/airbyte-ci' }}
tailscale_auth_key: ${{ secrets.TAILSCALE_AUTH_KEY }}

- name: Run test pipeline for the metadata lib
id: metadata_lib-test-pipeline
Expand All @@ -115,6 +118,8 @@ jobs:
docker_hub_username: ${{ secrets.DOCKER_HUB_USERNAME }}
docker_hub_password: ${{ secrets.DOCKER_HUB_PASSWORD }}
airbyte_ci_binary_url: ${{ inputs.airbyte_ci_binary_url || 'https://connectors.airbyte.com/airbyte-ci/releases/ubuntu/latest/airbyte-ci' }}
tailscale_auth_key: ${{ secrets.TAILSCALE_AUTH_KEY }}

- name: Run test for the metadata orchestrator
id: metadata_orchestrator-test-pipeline
if: steps.changes.outputs.metadata_orchestrator_any_changed == 'true'
Expand All @@ -126,3 +131,4 @@ jobs:
docker_hub_username: ${{ secrets.DOCKER_HUB_USERNAME }}
docker_hub_password: ${{ secrets.DOCKER_HUB_PASSWORD }}
airbyte_ci_binary_url: ${{ inputs.airbyte_ci_binary_url || 'https://connectors.airbyte.com/airbyte-ci/releases/ubuntu/latest/airbyte-ci' }}
tailscale_auth_key: ${{ secrets.TAILSCALE_AUTH_KEY }}
1 change: 1 addition & 0 deletions .github/workflows/cat-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ jobs:
sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }}
github_token: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }}
subcommand: "test airbyte-integrations/bases/connector-acceptance-test --test-directory=unit_tests"
tailscale_auth_key: ${{ secrets.TAILSCALE_AUTH_KEY }}
2 changes: 2 additions & 0 deletions .github/workflows/connectors_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ jobs:
s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
subcommand: "connectors ${{ github.event.inputs.test-connectors-options }} test"
airbyte_ci_binary_url: ${{ github.event.inputs.airbyte_ci_binary_url }}
tailscale_auth_key: ${{ secrets.TAILSCALE_AUTH_KEY }}
- name: Test connectors [PULL REQUESTS]
if: github.event_name == 'pull_request'
uses: ./.github/actions/run-dagger-pipeline
Expand All @@ -85,4 +86,5 @@ jobs:
github_token: ${{ env.PAT }}
s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
tailscale_auth_key: ${{ secrets.TAILSCALE_AUTH_KEY }}
subcommand: "connectors --modified test"
1 change: 1 addition & 0 deletions .github/workflows/connectors_weekly_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,5 @@ jobs:
gcp_gsm_credentials: ${{ secrets.GCP_GSM_CREDENTIALS }}
git_branch: ${{ steps.extract_branch.outputs.branch }}
github_token: ${{ secrets.GITHUB_TOKEN }}
tailscale_auth_key: ${{ secrets.TAILSCALE_AUTH_KEY }}
subcommand: '--show-dagger-logs connectors ${{ inputs.test-connectors-options || ''--concurrency=3 --metadata-query="(data.ab_internal.ql > 100) & (data.ab_internal.sl < 200)"'' }} test'
1 change: 1 addition & 0 deletions .github/workflows/format_check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ jobs:
gcs_credentials: ${{ secrets.METADATA_SERVICE_PROD_GCS_CREDENTIALS }}
sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }}
github_token: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }}
tailscale_auth_key: ${{ secrets.TAILSCALE_AUTH_KEY }}
subcommand: "format check all"

# This is helpful in the case that we change a previously committed generated file to be ignored by git.
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/format_fix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ jobs:
gcs_credentials: ${{ secrets.METADATA_SERVICE_PROD_GCS_CREDENTIALS }}
sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }}
github_token: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }}
tailscale_auth_key: ${{ secrets.TAILSCALE_AUTH_KEY }}
subcommand: "format fix all"

# This is helpful in the case that we change a previously committed generated file to be ignored by git.
Expand All @@ -74,4 +75,5 @@ jobs:
gcs_credentials: ${{ secrets.METADATA_SERVICE_PROD_GCS_CREDENTIALS }}
sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }}
github_token: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }}
tailscale_auth_key: ${{ secrets.TAILSCALE_AUTH_KEY }}
subcommand: "format check all"
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,6 @@ jobs:
docker_hub_username: ${{ secrets.DOCKER_HUB_USERNAME }}
docker_hub_password: ${{ secrets.DOCKER_HUB_PASSWORD }}
gcp_gsm_credentials: ${{ secrets.GCP_GSM_CREDENTIALS }}
tailscale_auth_key: ${{ secrets.TAILSCALE_AUTH_KEY }}
env:
DAGSTER_CLOUD_METADATA_API_TOKEN: ${{ secrets.DAGSTER_CLOUD_METADATA_API_TOKEN }}
2 changes: 2 additions & 0 deletions .github/workflows/publish_connectors.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ jobs:
spec_cache_gcs_credentials: ${{ secrets.SPEC_CACHE_SERVICE_ACCOUNT_KEY_PUBLISH }}
s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
tailscale_auth_key: ${{ secrets.TAILSCALE_AUTH_KEY }}
subcommand: "connectors --concurrency=1 --execute-timeout=3600 --metadata-changes-only publish --main-release"

- name: Publish connectors [manual]
Expand All @@ -65,6 +66,7 @@ jobs:
spec_cache_gcs_credentials: ${{ secrets.SPEC_CACHE_SERVICE_ACCOUNT_KEY_PUBLISH }}
s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
tailscale_auth_key: ${{ secrets.TAILSCALE_AUTH_KEY }}
subcommand: "connectors ${{ github.event.inputs.connectors-options }} publish ${{ github.event.inputs.publish-options }}"
airbyte_ci_binary_url: ${{ github.event.inputs.airbyte-ci-binary-url }}

Expand Down
6 changes: 6 additions & 0 deletions airbyte-ci/connectors/pipelines/pipelines/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#

import os
import platform
from enum import Enum

Expand Down Expand Up @@ -37,6 +38,9 @@
DOCKER_VERSION = "24.0.2"
DOCKER_DIND_IMAGE = f"docker:{DOCKER_VERSION}-dind"
DOCKER_CLI_IMAGE = f"docker:{DOCKER_VERSION}-cli"
DOCKER_REGISTRY_MIRROR_URL = os.getenv("DOCKER_REGISTRY_MIRROR_URL")
DOCKER_REGISTRY_ADDRESS = "docker.io"
DOCKER_VAR_LIB_VOLUME_NAME = "docker-cache"
GRADLE_CACHE_PATH = "/root/.gradle/caches"
GRADLE_BUILD_CACHE_PATH = f"{GRADLE_CACHE_PATH}/build-cache-1"
GRADLE_READ_ONLY_DEPENDENCY_CACHE_PATH = "/root/gradle_dependency_cache"
Expand All @@ -53,6 +57,8 @@
PIP_CACHE_PATH = "/root/.cache/pip"
POETRY_CACHE_VOLUME_NAME = "poetry_cache"
POETRY_CACHE_PATH = "/root/.cache/pypoetry"
STORAGE_DRIVER = "fuse-overlayfs"
TAILSCALE_AUTH_KEY = os.getenv("TAILSCALE_AUTH_KEY")


class CIContext(str, Enum):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,60 +8,142 @@

from dagger import Client, Container, File, Secret
from pipelines import consts
from pipelines.airbyte_ci.connectors.context import ConnectorContext, PipelineContext
from pipelines.consts import DOCKER_HOST_NAME, DOCKER_HOST_PORT, DOCKER_TMP_VOLUME_NAME, DOCKER_VAR_LIB_VOLUME_NAME
from pipelines.airbyte_ci.connectors.context import ConnectorContext
from pipelines.consts import (
DOCKER_HOST_NAME,
DOCKER_HOST_PORT,
DOCKER_REGISTRY_ADDRESS,
DOCKER_REGISTRY_MIRROR_URL,
DOCKER_TMP_VOLUME_NAME,
DOCKER_VAR_LIB_VOLUME_NAME,
STORAGE_DRIVER,
TAILSCALE_AUTH_KEY,
)
from pipelines.helpers.utils import sh_dash_c
from pipelines.models.contexts.pipeline_context import PipelineContext


def with_global_dockerd_service(
dagger_client: Client, docker_hub_username_secret: Optional[Secret] = None, docker_hub_password_secret: Optional[Secret] = None
) -> Container:
"""Create a container with a docker daemon running.
We expose its 2375 port to use it as a docker host for docker-in-docker use cases.
def get_base_dockerd_container(dagger_client: Client) -> Container:
"""Provision a container to run a docker daemon.
It will be used as a docker host for docker-in-docker use cases.
Args:
dagger_client (Client): The dagger client used to create the container.
docker_hub_username_secret (Optional[Secret]): The DockerHub username secret.
docker_hub_password_secret (Optional[Secret]): The DockerHub password secret.
Returns:
Container: The container running dockerd as a service
Container: The container to run dockerd as a service
"""
dockerd_container = (
apk_packages_to_install = [
STORAGE_DRIVER,
# Curl is only used for debugging purposes.
"curl",
]
base_container = (
dagger_client.container()
.from_(consts.DOCKER_DIND_IMAGE)
# We set this env var because we need to use a non-default zombie reaper setting.
# The reason for this is that by default it will want to set its parent process ID to 1 when reaping.
# This won't be possible because of container-ception: dind is running inside the dagger engine.
# See https://github.com/krallin/tini#subreaping for details.
.with_env_variable("TINI_SUBREAPER", "")
# Similarly, because of container-ception, we have to use the fuse-overlayfs storage engine.
.with_exec(
sh_dash_c(
[
# Update package metadata.
"apk update",
# Install the storage driver package.
"apk add fuse-overlayfs",
# Update daemon config with storage driver.
f"apk add {' '.join(apk_packages_to_install)}",
"mkdir /etc/docker",
'(echo {\\"storage-driver\\": \\"fuse-overlayfs\\"} > /etc/docker/daemon.json)',
]
)
)
# Expose the docker host port.
.with_exposed_port(DOCKER_HOST_PORT)
# Mount the docker cache volumes.
.with_mounted_cache("/var/lib/docker", dagger_client.cache_volume(DOCKER_VAR_LIB_VOLUME_NAME))
# We cache /tmp for file sharing between client and daemon.
.with_mounted_cache("/tmp", dagger_client.cache_volume(DOCKER_TMP_VOLUME_NAME))
)
if docker_hub_username_secret and docker_hub_password_secret:
dockerd_container = (

# We cache /var/lib/docker to avoid downloading images and layers multiple times.
base_container = base_container.with_mounted_cache("/var/lib/docker", dagger_client.cache_volume(DOCKER_VAR_LIB_VOLUME_NAME))
return base_container


def get_daemon_config_json(registry_mirror_url: Optional[str] = None) -> str:
"""Get the json representation of the docker daemon config.
Args:
registry_mirror_url (Optional[str]): The registry mirror url to use.
Returns:
str: The json representation of the docker daemon config.
"""
daemon_config = {
"storage-driver": STORAGE_DRIVER,
}
if registry_mirror_url:
daemon_config["registry-mirrors"] = ["http://" + registry_mirror_url]
daemon_config["insecure-registries"] = [registry_mirror_url]
return json.dumps(daemon_config)


def docker_login(
dockerd_container: Container,
docker_registry_username_secret: Optional[Secret],
docker_registry_password_secret: Optional[Secret],
) -> Container:
"""Login to a docker registry if the username and password secrets are provided.
Args:
dockerd_container (Container): The dockerd_container container to login to the registry.
docker_registry_username_secret (Optional[Secret]): The docker registry username secret.
docker_registry_password_secret (Optional[Secret]): The docker registry password secret.
docker_registry_address (Optional[str]): The docker registry address to login to. Defaults to "docker.io" (DockerHub).
Returns:
Container: The container with the docker login command executed if the username and password secrets are provided. Noop otherwise.
"""
if docker_registry_username_secret and docker_registry_username_secret:
return (
dockerd_container
# We use a cache buster here to guarantee the docker login is always executed.
.with_env_variable("CACHEBUSTER", str(uuid.uuid4()))
.with_secret_variable("DOCKER_HUB_USERNAME", docker_hub_username_secret)
.with_secret_variable("DOCKER_HUB_PASSWORD", docker_hub_password_secret)
.with_exec(sh_dash_c(["docker login -u $DOCKER_HUB_USERNAME -p $DOCKER_HUB_PASSWORD"]), skip_entrypoint=True)
.with_secret_variable("DOCKER_REGISTRY_USERNAME", docker_registry_username_secret)
.with_secret_variable("DOCKER_REGISTRY_PASSWORD", docker_registry_password_secret)
.with_exec(
sh_dash_c([f"docker login -u $DOCKER_REGISTRY_USERNAME -p $DOCKER_REGISTRY_PASSWORD {DOCKER_REGISTRY_ADDRESS}"]),
skip_entrypoint=True,
)
)
else:
return dockerd_container


def with_global_dockerd_service(
dagger_client: Client,
docker_hub_username_secret: Optional[Secret] = None,
docker_hub_password_secret: Optional[Secret] = None,
) -> Container:
"""Create a container with a docker daemon running.
We expose its 2375 port to use it as a docker host for docker-in-docker use cases.
It is optionally bound to a tailscale VPN if the TAILSCALE_AUTH_KEY env var is set.
Args:
dagger_client (Client): The dagger client used to create the container.
docker_hub_username_secret (Optional[Secret]): The DockerHub username secret.
docker_hub_password_secret (Optional[Secret]): The DockerHub password secret.
Returns:
Container: The container running dockerd as a service
"""

dockerd_container = get_base_dockerd_container(dagger_client)
if TAILSCALE_AUTH_KEY is not None:
# Ping the registry mirror host to make sure it's reachable through VPN
# We set a cache buster here to guarantee the curl command is always executed.
dockerd_container = dockerd_container.with_env_variable("CACHEBUSTER", str(uuid.uuid4())).with_exec(
["curl", "-vvv", f"http://{DOCKER_REGISTRY_MIRROR_URL}/v2/"], skip_entrypoint=True
)
daemon_config_json = get_daemon_config_json(DOCKER_REGISTRY_MIRROR_URL)
else:
daemon_config_json = get_daemon_config_json()

dockerd_container = dockerd_container.with_new_file("/etc/docker/daemon.json", daemon_config_json)
# Docker login happens late because there's a cache buster in the docker login command.
dockerd_container = docker_login(dockerd_container, docker_hub_username_secret, docker_hub_password_secret)
return dockerd_container.with_exec(
["dockerd", "--log-level=error", f"--host=tcp://0.0.0.0:{DOCKER_HOST_PORT}", "--tls=false"], insecure_root_capabilities=True
)
Expand Down

0 comments on commit ff96866

Please sign in to comment.