diff --git a/README.md b/README.md index f8fd6621..3f172419 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,52 @@ You can also use this to run a bash shell or any other command in the same envir docker run --rm -ti puckel/docker-airflow bash docker run --rm -ti puckel/docker-airflow ipython +# Simplified SQL database configuration using PostgreSQL + +If the executor type is set to anything else than *SequentialExecutor* you'll need an SQL database. +Here is a list of PostgreSQL configuration variables and their default values. They're used to compute +the `AIRFLOW__CORE__SQL_ALCHEMY_CONN` and `AIRFLOW__CELERY__RESULT_BACKEND` variables when needed for you +if you don't provide them explicitly: + +| Variable | Default value | Role | +|---------------------|---------------|----------------------| +| `POSTGRES_HOST` | `postgres` | Database server host | +| `POSTGRES_PORT` | `5432` | Database server port | +| `POSTGRES_USER` | `airflow` | Database user | +| `POSTGRES_PASSWORD` | `airflow` | Database password | +| `POSTGRES_DB` | `airflow` | Database name | +| `POSTGRES_EXTRAS` | empty | Extras parameters | + +You can also use those variables to adapt your compose file to match an existing PostgreSQL instance managed elsewhere. + +Please refer to the Airflow documentation to understand the use of extras parameters, for example in order to configure +a connection that uses TLS encryption. + +Here's an important thing to consider: + +> When specifying the connection as URI (in AIRFLOW_CONN_* variable) you should specify it following the standard syntax of DB connections, +> where extras are passed as parameters of the URI (note that all components of the URI should be URL-encoded). + +Therefore you must provide extras parameters URL-encoded, starting with a leading `?`. For example: + + POSTGRES_EXTRAS="?sslmode=verify-full&sslrootcert=%2Fetc%2Fssl%2Fcerts%2Fca-certificates.crt" + +# Simplified Celery broker configuration using Redis + +If the executor type is set to *CeleryExecutor* you'll need a Celery broker. Here is a list of Redis configuration variables +and their default values. They're used to compute the `AIRFLOW__CELERY__BROKER_URL` variable for you if you don't provide +it explicitly: + +| Variable | Default value | Role | +|-------------------|---------------|--------------------------------| +| `REDIS_PROTO` | `redis://` | Protocol | +| `REDIS_HOST` | `redis` | Redis server host | +| `REDIS_PORT` | `6379` | Redis server port | +| `REDIS_PASSWORD` | empty | If Redis is password protected | +| `REDIS_DBNUM` | `1` | Database number | + +You can also use those variables to adapt your compose file to match an existing Redis instance managed elsewhere. + # Wanna help? Fork, improve and PR. diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 06532713..166f4837 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -1,49 +1,33 @@ #!/usr/bin/env bash -TRY_LOOP="20" - -: "${REDIS_HOST:="redis"}" -: "${REDIS_PORT:="6379"}" -: "${REDIS_PASSWORD:=""}" +# User-provided configuration must always be respected. +# +# Therefore, this script must only derives Airflow AIRFLOW__ variables from other variables +# when the user did not provide their own configuration. -: "${POSTGRES_HOST:="postgres"}" -: "${POSTGRES_PORT:="5432"}" -: "${POSTGRES_USER:="airflow"}" -: "${POSTGRES_PASSWORD:="airflow"}" -: "${POSTGRES_DB:="airflow"}" +TRY_LOOP="20" -# Defaults and back-compat +# Global defaults and back-compat : "${AIRFLOW_HOME:="/usr/local/airflow"}" : "${AIRFLOW__CORE__FERNET_KEY:=${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")}}" : "${AIRFLOW__CORE__EXECUTOR:=${EXECUTOR:-Sequential}Executor}" +# Load DAGs examples (default: Yes) +if [[ -z "$AIRFLOW__CORE__LOAD_EXAMPLES" && "${LOAD_EX:=n}" == n ]]; then + AIRFLOW__CORE__LOAD_EXAMPLES=False +fi + export \ AIRFLOW_HOME \ - AIRFLOW__CELERY__BROKER_URL \ - AIRFLOW__CELERY__RESULT_BACKEND \ AIRFLOW__CORE__EXECUTOR \ AIRFLOW__CORE__FERNET_KEY \ AIRFLOW__CORE__LOAD_EXAMPLES \ - AIRFLOW__CORE__SQL_ALCHEMY_CONN \ - - -# Load DAGs examples (default: Yes) -if [[ -z "$AIRFLOW__CORE__LOAD_EXAMPLES" && "${LOAD_EX:=n}" == n ]] -then - AIRFLOW__CORE__LOAD_EXAMPLES=False -fi # Install custom python package if requirements.txt is present if [ -e "/requirements.txt" ]; then $(command -v pip) install --user -r /requirements.txt fi -if [ -n "$REDIS_PASSWORD" ]; then - REDIS_PREFIX=:${REDIS_PASSWORD}@ -else - REDIS_PREFIX= -fi - wait_for_port() { local name="$1" host="$2" port="$3" local j=0 @@ -58,14 +42,68 @@ wait_for_port() { done } +# Other executors than SequentialExecutor drive the need for an SQL database, here PostgreSQL is used if [ "$AIRFLOW__CORE__EXECUTOR" != "SequentialExecutor" ]; then - AIRFLOW__CORE__SQL_ALCHEMY_CONN="postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" - AIRFLOW__CELERY__RESULT_BACKEND="db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" + # Check if the user has provided explicit Airflow configuration concerning the database + if [ -z "$AIRFLOW__CORE__SQL_ALCHEMY_CONN" ]; then + # Default values corresponding to the default compose files + : "${POSTGRES_HOST:="postgres"}" + : "${POSTGRES_PORT:="5432"}" + : "${POSTGRES_USER:="airflow"}" + : "${POSTGRES_PASSWORD:="airflow"}" + : "${POSTGRES_DB:="airflow"}" + : "${POSTGRES_EXTRAS:-""}" + + AIRFLOW__CORE__SQL_ALCHEMY_CONN="postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}${POSTGRES_EXTRAS}" + export AIRFLOW__CORE__SQL_ALCHEMY_CONN + + # Check if the user has provided explicit Airflow configuration for the broker's connection to the database + if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ]; then + AIRFLOW__CELERY__RESULT_BACKEND="db+postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}${POSTGRES_EXTRAS}" + export AIRFLOW__CELERY__RESULT_BACKEND + fi + else + if [[ "$AIRFLOW__CORE__EXECUTOR" == "CeleryExecutor" && -z "$AIRFLOW__CELERY__RESULT_BACKEND" ]]; then + >&2 printf '%s\n' "FATAL: if you set AIRFLOW__CORE__SQL_ALCHEMY_CONN manually with CeleryExecutor you must also set AIRFLOW__CELERY__RESULT_BACKEND" + exit 1 + fi + + # Derive useful variables from the AIRFLOW__ variables provided explicitly by the user + POSTGRES_ENDPOINT=$(echo -n "$AIRFLOW__CORE__SQL_ALCHEMY_CONN" | cut -d '/' -f3 | sed -e 's,.*@,,') + POSTGRES_HOST=$(echo -n "$POSTGRES_ENDPOINT" | cut -d ':' -f1) + POSTGRES_PORT=$(echo -n "$POSTGRES_ENDPOINT" | cut -d ':' -f2) + fi + wait_for_port "Postgres" "$POSTGRES_HOST" "$POSTGRES_PORT" fi +# CeleryExecutor drives the need for a Celery broker, here Redis is used if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ]; then - AIRFLOW__CELERY__BROKER_URL="redis://$REDIS_PREFIX$REDIS_HOST:$REDIS_PORT/1" + # Check if the user has provided explicit Airflow configuration concerning the broker + if [ -z "$AIRFLOW__CELERY__BROKER_URL" ]; then + # Default values corresponding to the default compose files + : "${REDIS_PROTO:="redis://"}" + : "${REDIS_HOST:="redis"}" + : "${REDIS_PORT:="6379"}" + : "${REDIS_PASSWORD:=""}" + : "${REDIS_DBNUM:="1"}" + + # When Redis is secured by basic auth, it does not handle the username part of basic auth, only a token + if [ -n "$REDIS_PASSWORD" ]; then + REDIS_PREFIX=":${REDIS_PASSWORD}@" + else + REDIS_PREFIX= + fi + + AIRFLOW__CELERY__BROKER_URL="${REDIS_PROTO}${REDIS_PREFIX}${REDIS_HOST}:${REDIS_PORT}/${REDIS_DBNUM}" + export AIRFLOW__CELERY__BROKER_URL + else + # Derive useful variables from the AIRFLOW__ variables provided explicitly by the user + REDIS_ENDPOINT=$(echo -n "$AIRFLOW__CELERY__BROKER_URL" | cut -d '/' -f3 | sed -e 's,.*@,,') + REDIS_HOST=$(echo -n "$POSTGRES_ENDPOINT" | cut -d ':' -f1) + REDIS_PORT=$(echo -n "$POSTGRES_ENDPOINT" | cut -d ':' -f2) + fi + wait_for_port "Redis" "$REDIS_HOST" "$REDIS_PORT" fi