Skip to content
This repository has been archived by the owner on Mar 29, 2022. It is now read-only.

switched to official airflow image #143

Merged
merged 7 commits into from
Dec 22, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM puckel/docker-airflow:1.10.9
FROM apache/airflow:1.10.14-python3.6

USER root

Expand All @@ -25,27 +25,28 @@ RUN mkdir -p /usr/local/gcloud \
&& /usr/local/gcloud/google-cloud-sdk/install.sh

USER airflow
ENV AIRFLOW_HOME=/opt/airflow
ENV AIRFLOW_USER_HOME=/home/airflow

ENV PATH /usr/local/gcloud/google-cloud-sdk/bin:$PATH

ENV PATH /usr/local/airflow/.local/bin:$PATH
ENV PATH ${AIRFLOW_USER_HOME}/.local/bin:$PATH

COPY --chown=airflow:airflow requirements.txt ./
RUN pip install --user -r requirements.txt

ARG install_dev
ARG install_dev=n
COPY --chown=airflow:airflow requirements.dev.txt ./
RUN if [ "${install_dev}" = "y" ]; then pip install --user -r requirements.dev.txt; fi

COPY --chown=airflow:airflow dags ./dags

ENV DOCKER_SCRIPTS_DIR=/usr/local/airflow/docker
ENV DOCKER_SCRIPTS_DIR=${AIRFLOW_HOME}/docker
COPY --chown=airflow:airflow docker "${DOCKER_SCRIPTS_DIR}"

ENV HELM_CHARTS_DIR=/usr/local/airflow/helm
ENV HELM_CHARTS_DIR=${AIRFLOW_HOME}/helm
COPY --chown=airflow:airflow helm ${HELM_CHARTS_DIR}
RUN cd ${HELM_CHARTS_DIR}/sciencebeam \
&& helm repo add stable https://kubernetes-charts.storage.googleapis.com \
&& helm dep update

COPY --chown=airflow:airflow sciencebeam_airflow ./sciencebeam_airflow
Expand Down
54 changes: 31 additions & 23 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,14 @@ build: helm-charts-update
$(DOCKER_COMPOSE) build airflow-image


build-init:
$(DOCKER_COMPOSE) build init


build-dev: helm-charts-update
# only dev compose file has "init" service defined
@if [ "$(DOCKER_COMPOSE)" = "$(DOCKER_COMPOSE_DEV)" ]; then \
$(DOCKER_COMPOSE) build init; \
$(MAKE) build-init; \
fi
$(DOCKER_COMPOSE) build airflow-dev-base-image airflow-dev

Expand All @@ -190,6 +194,10 @@ deploy-sciencebeam:
python -m sciencebeam_airflow.tools.deploy_sciencebeam $(ARGS)


airflow-initdb:
$(DOCKER_COMPOSE) run --rm airflow-webserver initdb


start: helm-charts-update
$(eval SERVICE_NAMES = $(shell docker-compose config --services | grep -v 'airflow-dev'))
$(DOCKER_COMPOSE) up --build -d --scale airflow-worker=2 $(SERVICE_NAMES)
Expand All @@ -216,47 +224,47 @@ web-shell:


web-exec:
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint.sh bash
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint bash


worker-shell:
$(DOCKER_COMPOSE) run --no-deps airflow-worker bash


worker-exec:
$(DOCKER_COMPOSE) exec airflow-worker /entrypoint.sh bash
$(DOCKER_COMPOSE) exec airflow-worker /entrypoint bash


dags-list:
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint.sh airflow list_dags
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint list_dags


dags-unpause:
$(eval DAG_IDS = $(shell \
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint.sh airflow list_dags \
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint list_dags \
| grep -P "^(sciencebeam_|grobid_)\S+" \
))
@echo DAG_IDS=$(DAG_IDS)
@for DAG_ID in $(DAG_IDS); do \
echo DAG_ID=$${DAG_ID}; \
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint.sh airflow unpause $${DAG_ID}; \
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint unpause $${DAG_ID}; \
done


tasks-list:
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint.sh airflow list_tasks $(ARGS)
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint list_tasks $(ARGS)


trigger-gs-list-buckets:
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint.sh airflow trigger_dag gs_list_buckets
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint trigger_dag gs_list_buckets


trigger-kube-list-pods:
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint.sh airflow trigger_dag kube_list_pods
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint trigger_dag kube_list_pods


trigger-helm-version:
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint.sh airflow trigger_dag helm_version
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint trigger_dag helm_version


.run_id:
Expand Down Expand Up @@ -319,21 +327,21 @@ trigger-helm-version:


trigger-sciencebeam-convert: .run_id .sciencebeam-convert-eval-conf
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint.sh airflow trigger_dag \
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint trigger_dag \
--run_id "$(RUN_ID)" \
--conf "$$SCIENCEBEAM_CONVERT_EVAL_CONF" \
sciencebeam_convert


trigger-sciencebeam-evaluate: .run_id .sciencebeam-convert-eval-conf
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint.sh airflow trigger_dag \
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint trigger_dag \
--run_id "$(RUN_ID)" \
--conf "$$SCIENCEBEAM_CONVERT_EVAL_CONF" \
sciencebeam_evaluate


trigger-sciencebeam-evaluation-results-to-bq: .run_id .sciencebeam-convert-eval-conf
docker-compose exec airflow-webserver /entrypoint.sh airflow trigger_dag \
docker-compose exec airflow-webserver /entrypoint trigger_dag \
--run_id "$(RUN_ID)" \
--conf "$$SCIENCEBEAM_CONVERT_EVAL_CONF" \
sciencebeam_evaluation_results_to_bq
Expand Down Expand Up @@ -381,35 +389,35 @@ trigger-sciencebeam-evaluation-results-to-bq: .run_id .sciencebeam-convert-eval-


trigger-grobid-train-prepare: .run_id .grobid-train-conf
docker-compose exec airflow-webserver /entrypoint.sh airflow trigger_dag \
docker-compose exec airflow-webserver /entrypoint trigger_dag \
--run_id "$(RUN_ID)" \
--conf "$$SCIENCEBEAM_TRAIN_CONF" \
grobid_train_prepare


trigger-grobid-train-evaluate: .run_id .grobid-train-conf
docker-compose exec airflow-webserver /entrypoint.sh airflow trigger_dag \
docker-compose exec airflow-webserver /entrypoint trigger_dag \
--run_id "$(RUN_ID)" \
--conf "$$SCIENCEBEAM_TRAIN_CONF" \
grobid_train_evaluate


trigger-grobid-train-model: .run_id .grobid-train-conf
docker-compose exec airflow-webserver /entrypoint.sh airflow trigger_dag \
docker-compose exec airflow-webserver /entrypoint trigger_dag \
--run_id "$(RUN_ID)" \
--conf "$$SCIENCEBEAM_TRAIN_CONF" \
grobid_train_model


trigger-grobid-build-image: .run_id .grobid-train-conf
docker-compose exec airflow-webserver /entrypoint.sh airflow trigger_dag \
docker-compose exec airflow-webserver /entrypoint trigger_dag \
--run_id "$(RUN_ID)" \
--conf "$$SCIENCEBEAM_TRAIN_CONF" \
grobid_build_image


trigger-grobid-train-evaluate-source-dataset: .run_id .grobid-train-conf
docker-compose exec airflow-webserver /entrypoint.sh airflow trigger_dag \
docker-compose exec airflow-webserver /entrypoint trigger_dag \
--run_id "$(RUN_ID)" \
--conf "$$SCIENCEBEAM_TRAIN_CONF" \
grobid_train_evaluate_source_dataset
Expand Down Expand Up @@ -481,35 +489,35 @@ trigger-grobid-train-evaluate-source-dataset: .run_id .grobid-train-conf


trigger-sciencebeam-autocut-convert-training-data: .run_id .sciencebeam-autocut-train-conf
docker-compose exec airflow-webserver /entrypoint.sh airflow trigger_dag \
docker-compose exec airflow-webserver /entrypoint trigger_dag \
--run_id "$(RUN_ID)" \
--conf "$$SCIENCEBEAM_AUTOCUT_TRAIN_CONF" \
sciencebeam_autocut_convert_training_data


trigger-sciencebeam-autocut-train-model: .run_id .sciencebeam-autocut-train-conf
docker-compose exec airflow-webserver /entrypoint.sh airflow trigger_dag \
docker-compose exec airflow-webserver /entrypoint trigger_dag \
--run_id "$(RUN_ID)" \
--conf "$$SCIENCEBEAM_AUTOCUT_TRAIN_CONF" \
sciencebeam_autocut_train_model


trigger-sciencebeam-autocut-build-image: .run_id .sciencebeam-autocut-train-conf
docker-compose exec airflow-webserver /entrypoint.sh airflow trigger_dag \
docker-compose exec airflow-webserver /entrypoint trigger_dag \
--run_id "$(RUN_ID)" \
--conf "$$SCIENCEBEAM_AUTOCUT_TRAIN_CONF" \
sciencebeam_autocut_build_image


trigger-sciencebeam-autocut-convert-and-evaluate: .run_id .sciencebeam-autocut-train-conf
docker-compose exec airflow-webserver /entrypoint.sh airflow trigger_dag \
docker-compose exec airflow-webserver /entrypoint trigger_dag \
--run_id "$(RUN_ID)" \
--conf "$$SCIENCEBEAM_AUTOCUT_TRAIN_CONF" \
sciencebeam_convert


trigger-sciencebeam-watch:
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint.sh airflow trigger_dag sciencebeam_watch_experiments
$(DOCKER_COMPOSE) exec airflow-webserver /entrypoint trigger_dag sciencebeam_watch_experiments


ci-build-and-test:
Expand Down
8 changes: 3 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
> ...
> Airflow is not a data streaming solution.

We are using the [Puckel Airflow Image](https://github.com/puckel/docker-airflow).
We are using the official [Airflow Image](https://hub.docker.com/r/apache/airflow).

## Prerequisites

Expand All @@ -16,15 +16,13 @@ We are using the [Puckel Airflow Image](https://github.com/puckel/docker-airflow

## gcloud setup

`gcloud auth login`
`gcloud auth application-default login`

## Configuration

Airflow, using the [Puckel Airflow Image](https://github.com/puckel/docker-airflow), is mainly configured in the following way:
Airflow, using the official [Airflow Image](https://hub.docker.com/r/apache/airflow), is mainly configured in the following way:

* Environment variables interpreted by the [entrypoint](https://github.com/puckel/docker-airflow/blob/master/script/entrypoint.sh), e.g. `POSTGRES_HOST`
* Environment variables interpreted by [Airflow](http://airflow.apache.org/howto/set-config.html), e.g. `AIRFLOW__CORE__SQL_ALCHEMY_CONN`
* Default configuration in the [airflow.cfg](https://github.com/puckel/docker-airflow/blob/master/config/airflow.cfg) file
* Default configuration by the Airflow project in [default_airflow.cfg](https://github.com/apache/airflow/blob/master/airflow/config_templates/default_airflow.cfg)

(Since we are using Docker Compose, environment variables would be passed in via [docker-compose.yml](docker-compose.yml))
Expand Down
30 changes: 15 additions & 15 deletions docker-compose.override.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@ services:

airflow-dev:
volumes:
- ./dags:/usr/local/airflow/dags
- ./sciencebeam_airflow:/usr/local/airflow/sciencebeam_airflow
- ./tests:/usr/local/airflow/tests
- ./dags:/opt/airflow/dags
- ./sciencebeam_airflow:/opt/airflow/sciencebeam_airflow
- ./tests:/opt/airflow/tests

airflow-webserver:
volumes:
- ./dags:/usr/local/airflow/dags
- ./sciencebeam_airflow:/usr/local/airflow/sciencebeam_airflow
- config-gcloud:/usr/local/airflow/.config/gcloud
- config-kube:/usr/local/airflow/.kube
- ./dags:/opt/airflow/dags
- ./sciencebeam_airflow:/opt/airflow/sciencebeam_airflow
- config-gcloud:/home/airflow/.config/gcloud
- config-kube:/home/airflow/.kube
depends_on:
- init
environment:
Expand All @@ -32,10 +32,10 @@ services:

airflow-scheduler:
volumes:
- ./dags:/usr/local/airflow/dags
- ./sciencebeam_airflow:/usr/local/airflow/sciencebeam_airflow
- config-gcloud:/usr/local/airflow/.config/gcloud
- config-kube:/usr/local/airflow/.kube
- ./dags:/opt/airflow/dags
- ./sciencebeam_airflow:/opt/airflow/sciencebeam_airflow
- config-gcloud:/home/airflow/.config/gcloud
- config-kube:/home/airflow/.kube
depends_on:
- init
environment:
Expand All @@ -44,10 +44,10 @@ services:

airflow-worker:
volumes:
- ./dags:/usr/local/airflow/dags
- ./sciencebeam_airflow:/usr/local/airflow/sciencebeam_airflow
- config-gcloud:/usr/local/airflow/.config/gcloud
- config-kube:/usr/local/airflow/.kube
- ./dags:/opt/airflow/dags
- ./sciencebeam_airflow:/opt/airflow/sciencebeam_airflow
- config-gcloud:/home/airflow/.config/gcloud
- config-kube:/home/airflow/.kube
depends_on:
- init
environment:
Expand Down
8 changes: 6 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@ x-airflow-env:
&airflow-env
- LOAD_EX=n
- FERNET_KEY=${FERNET_KEY}
- EXECUTOR=Celery
- POSTGRES_HOST=airflow-postgres
- REDIS_HOST=airflow-redis
- AIRFLOW__CORE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@airflow-postgres:5432/airflow
- AIRFLOW__CORE__EXECUTOR=CeleryExecutor
- AIRFLOW__CELERY__BROKER_URL=redis://airflow-redis:6379/1
- AIRFLOW__CELERY__RESULT_BACKEND=db+postgresql://airflow:airflow@airflow-postgres:5432/airflow
- SCIENCEBEAM_NAMESPACE=${SCIENCEBEAM_NAMESPACE}
- SCIENCEBEAM_CONFIG_DATA_PATH=${SCIENCEBEAM_CONFIG_DATA_PATH}
- SCIENCEBEAM_JUDGE_IMAGE=${SCIENCEBEAM_JUDGE_IMAGE}
Expand All @@ -21,13 +24,14 @@ services:
context: .
image: ${AIRFLOW_IMAGE_NAME}:${IMAGE_TAG}
command: /bin/sh -c exit 0
entrypoint: []

airflow-dev-base-image:
build:
context: .
dockerfile: Dockerfile
args:
install_dev: y
install_dev: "y"
image: ${AIRFLOW_IMAGE_NAME}-dev-base:${IMAGE_TAG}
command: /bin/sh -c exit 0
entrypoint: []
Expand Down
2 changes: 1 addition & 1 deletion docker/init/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -e

# airflow user id
USER_ID=1000
USER_ID=50000
echo 'changing ownership to $USER_ID, and...'

echo 'copying gcloud credentials...'
Expand Down
2 changes: 1 addition & 1 deletion requirements.prereq.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
apache-airflow[crypto,celery,ssh]==1.10.12
apache-airflow[crypto,celery,ssh]==1.10.14