diff --git a/projects/control-service/projects/vdk_job_builder/.gitignore b/projects/control-service/projects/vdk_job_builder/.gitignore deleted file mode 100644 index 6b243f63d7..0000000000 --- a/projects/control-service/projects/vdk_job_builder/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -.idea -vdk_job_builder/__pycache__ -vdk_job_builder/__init__.pyc -build -dist -vdk_job_builder.egg-info diff --git a/projects/control-service/projects/vdk_job_builder/README.md b/projects/control-service/projects/vdk_job_builder/README.md deleted file mode 100644 index 9db2d403a1..0000000000 --- a/projects/control-service/projects/vdk_job_builder/README.md +++ /dev/null @@ -1,60 +0,0 @@ -# Job Builder -This package provides a way to configure and build your own Data Job images. - -## Who will use this module -The module is to be used in a couple of places: -* VDK (possible local builds can be seen how in the `cli.py`) -* Deployer -* Taurus (Spawning CronJobs which will build the images by passing env vars) - -## Default variables -If no environmental variables are set and the default constructor is used to build the configuration the values -used in the Dockerfile will be taken into account as per the -[documentation](https://docs.docker.com/engine/reference/builder/#arg). - -## Examples -The package exposes three ways you can build docker image with the job: -1. Only job name with specific folder hierarchy -2. Job name along with Dockerfile location and job location -3. Configuration through environmental variables - -> NOTE: we will not be setting environmental variables so the default -> configuration for the VDK build arguments will be taken from the Dockerfile - -### Job name Dockerfile location and Job location -In `cli.py` you can see the three ways you can build an image. - -In order to try them out: - -1. Install `vdk_job_builder` module from this folder: -``` -python3.7 setup.py bdist -``` -2. Clone the data job jobs repo -3. Build the image with directories in our case from this folder: -``` -python3.7 cli.py example ./vdk_job_builder/Dockerfile -``` - -### Minimal configuration -The package can build job images with a minimal configuration: - -``` -JobConfig().with_default_config() -``` - -Which expects the module to be ran in the following hierarchy: - -``` -/job_source_folder # Build context folder - /cli.py # Run from here - /Dockerfile - / - /job_files_here -``` - -This minimal configuration will overwrite the needed properties which were set by the environmental variables. - -### Configuration through environmental variables -This is the most agile approach, a shell script can set environment variables and call the code which will -use the module to build images. diff --git a/projects/control-service/projects/vdk_job_builder/cli.py b/projects/control-service/projects/vdk_job_builder/cli.py deleted file mode 100644 index ec1abe5048..0000000000 --- a/projects/control-service/projects/vdk_job_builder/cli.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2021 VMware, Inc. -# SPDX-License-Identifier: Apache-2.0 -import sys - -from vdk_job_builder.vdk_job_builder import JobConfig - - -# TODO: create binary which will use this module instead -# of this script here. Possible choices -# https://pypi.org/project/click/ -# https://hugovk.github.io/top-pypi-packages/ -# -# This is only example implementation which shows -# how the package can be used -if len(sys.argv) == 2: - print("Job name only requires specific folder structure") - job_name = sys.argv.__getitem__(1) - print(f"job_name={job_name}") - j = JobConfig().with_default_config(job_name) - j.build_image() - # j.push_images() # Optional -elif len(sys.argv) == 4: - print("Job name, Dockerfile location and build context") - job_name = sys.argv.__getitem__(1) - dockerfile_path = sys.argv.__getitem__(2) - source_path = sys.argv.__getitem__(3) - print( - f"job_name={job_name}\nsource_path={source_path}\ndockerfile_path={dockerfile_path}" - ) - j = JobConfig().with_paths_override(job_name, dockerfile_path, source_path) - j.build_image() - # j.push_images() # Optional -elif len(sys.argv) == 1: - print("Configuration from environment variables and Dockerfile defaults") - j = JobConfig() - j.build_image() - # j.push_images() # Optional diff --git a/projects/control-service/projects/vdk_job_builder/k8s_vdk_job_builder/Dockerfile b/projects/control-service/projects/vdk_job_builder/k8s_vdk_job_builder/Dockerfile deleted file mode 100644 index 7cc205407a..0000000000 --- a/projects/control-service/projects/vdk_job_builder/k8s_vdk_job_builder/Dockerfile +++ /dev/null @@ -1,42 +0,0 @@ -# Executed in a k8s environment and used to trigger a build for a data job image. -# -# To build the image: -# docker build -t taurus-builder -f k8s_vdk_job_builder/Dockerfile . -# To run the image locally: -# docker run -v /var/run/docker.sock:/var/run/docker.sock -e taurus-builder:latest -FROM r.j3ss.co/img AS img -USER root - -# Setup Python and Git -## Update & Install dependencies -RUN apk add --no-cache --update \ - git \ - bash \ - python3 py3-pip - -# create symlink to img -RUN ln -s "$(which img)" "/usr/local/bin/docker" -RUN docker version - -# AWS CLI -RUN pip3 install awscli \ - && apk --purge -v del py3-pip \ - && rm -rf /var/cache/apk/* - -# Copy builder script -COPY k8s_vdk_job_builder/build_image.sh /build_image.sh -RUN chmod +x /build_image.sh - -# TODO: -# Download the latest vdk_job_builder python module from artifactory (pip repo) in build_image.sh -COPY cli.py /cli.py -COPY vdk_job_builder/ /vdk_job_builder - -ENTRYPOINT ["/build_image.sh"] - -# docker - Apache 2.0 https://docs.docker.com/engine/#licensing -# git - GPLv2 https://git-scm.com/about/free-and-open-source -# bash - GNU General Public License https://www.gnu.org/software/bash/ -# python3 - PSF, Zero-Clause BSD https://docs.python.org/3/license.html -# pip - MIT https://github.com/pypa/pip/blob/main/LICENSE.txt -# awscli - Apache 2.0 https://github.com/aws/aws-cli/blob/develop/LICENSE.txt diff --git a/projects/control-service/projects/vdk_job_builder/k8s_vdk_job_builder/Dockerfile-data-job-base b/projects/control-service/projects/vdk_job_builder/k8s_vdk_job_builder/Dockerfile-data-job-base deleted file mode 100644 index eefa9a14a5..0000000000 --- a/projects/control-service/projects/vdk_job_builder/k8s_vdk_job_builder/Dockerfile-data-job-base +++ /dev/null @@ -1,33 +0,0 @@ -# https://docs.docker.com/develop/develop-images/dockerfile_best-practices\ -# Creating data job base image with support for Oracle. - -ARG base_image=python:3.7-slim -FROM $base_image - -# Set the working directory -WORKDIR /job - - -# Install native dependencies so that requirements in requirements.txt can be installed -# some (like openssl) should be pre-installed in the base image but let's be explicit -RUN set -ex \ - && apt-get update \ - && apt-get -y install --no-install-recommends \ - build-essential openssl g++ - -# Install the native dependencies necessary for cx_Oracle python library -# See https://oracle.github.io/odpi/doc/installation.html#linux -RUN set -ex \ - && echo "Installing native dependencies related to support for cx_Oracle python library ..." \ - && mkdir -p /opt/lib/native \ - && apt-get -y install --no-install-recommends libaio1 curl unzip \ - && curl --insecure --output oracle-instantclient.zip https://download.oracle.com/otn_software/linux/instantclient/1911000/instantclient-basic-linux.x64-19.11.0.0.0dbru.zip \ - && unzip oracle-instantclient.zip -d /opt/lib/native/oracle && rm -f oracle-instantclient.zip \ - && sh -c "echo /opt/lib/native/oracle/instantclient_19_11 > /etc/ld.so.conf.d/oracle-instantclient.conf" \ - && ldconfig \ - && apt-get purge -y --auto-remove curl unzip - -# libaio1 - LGPL-2.1+ https://developer.puri.sm/licenses/Librem5/Birch/libaio1/copyright -# curl - MIT/X* modified https://curl.se/docs/copyright.html -# unzip - MIT https://github.com/vipsoft/Unzip/blob/master/LICENSE -# oracle instant client license - Oracle Technology Network Development and Distribution License Agreement https://www.oracle.com/downloads/licenses/instant-client-lic.html diff --git a/projects/control-service/projects/vdk_job_builder/k8s_vdk_job_builder/build_image.sh b/projects/control-service/projects/vdk_job_builder/k8s_vdk_job_builder/build_image.sh deleted file mode 100644 index bb00e28f5f..0000000000 --- a/projects/control-service/projects/vdk_job_builder/k8s_vdk_job_builder/build_image.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/sh - -# Copyright 2021 VMware, Inc. -# SPDX-License-Identifier: Apache-2.0 - -# TODO: deprecate and switch with officially supported docker image builders - -# Fail on any error -set -ex - -# Clone job -aws_access_key_id=$1 -aws_secret_access_key=$2 -aws_region=$3 -aws_ecr_registry=$4 -git_username=$5 -git_password=$6 -git_repository=$7 -registry_type=$8 -registry_username=$9 -registry_password=$10 - - -# Defaulting to Amazon ECR in case the registry type is not set in order to not break backwards -# compatibility -# and we start to version the builder image -if [ "$registry_type" = "ecr" ] || [ "$registry_type" = "ECR" ] || [ "$registry_type" = "" ]; then - # Docker login to ECR - aws configure set aws_access_key_id $aws_access_key_id - aws configure set aws_secret_access_key $aws_secret_access_key - aws ecr get-login-password --region $aws_region | docker login --username AWS --password-stdin $aws_ecr_registry - - # https://stackoverflow.com/questions/1199613/extract-filename-and-path-from-url-in-bash-script - repository_prefix=${aws_ecr_registry#*/} - # Create docker repository if it does not exist - aws ecr describe-repositories --region $aws_region --repository-names $repository_prefix/${DATA_JOB_NAME} || - aws ecr create-repository --region $aws_region --repository-name $repository_prefix/${DATA_JOB_NAME} -elif [ "$registry_type" = "generic" ] || [ "$registry_type" = "GENERIC" ]; then - echo -n "$registry_password" | docker login $IMAGE_REGISTRY_PATH --username $registry_username --password-stdin -fi - -# Clone repo into /data-jobs dir to get job's source -git clone https://$git_username:$git_password@$git_repository /data-jobs -cd /data-jobs -git reset --hard $GIT_COMMIT || ( echo ">data-job-not-found<" && exit 1 ) -if [ ! -d ${DATA_JOB_NAME} ]; then - echo ">data-job-not-found<" - exit 1 -fi -cd .. - -python3 cli.py $DATA_JOB_NAME ./vdk_job_builder/Dockerfile /data-jobs - -docker push ${IMAGE_REGISTRY_PATH}/${DATA_JOB_NAME}:${GIT_COMMIT} diff --git a/projects/control-service/projects/vdk_job_builder/publish-vdk-job-builder.sh b/projects/control-service/projects/vdk_job_builder/publish-vdk-job-builder.sh deleted file mode 100755 index cef55df496..0000000000 --- a/projects/control-service/projects/vdk_job_builder/publish-vdk-job-builder.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash - -# Copyright 2021 VMware, Inc. -# SPDX-License-Identifier: Apache-2.0 - -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -VERSION_TAG=$(cat "$SCRIPT_DIR/version.txt") -VDK_DOCKER_REGISTRY_URL=${VDK_DOCKER_REGISTRY_URL:-"registry.hub.docker.com/versatiledatakit"} - -function build_and_push_image() { - name="$1" - docker_file="$2" - arguments="$3" - - image_repo="$VDK_DOCKER_REGISTRY_URL/$name" - image_tag="$image_repo:$VERSION_TAG" - - docker build -t $image_tag -t $image_repo:latest -f "$SCRIPT_DIR/k8s_vdk_job_builder/$docker_file" $arguments "$SCRIPT_DIR" - docker push $image_tag - docker push $image_repo:latest -} - -build_and_push_image \ - "data-job-base-python-3.7" \ - Dockerfile-data-job-base \ - "--build-arg base_image=python:3.7-slim" - -build_and_push_image \ - "data-job-base-python-3.8" \ - Dockerfile-data-job-base \ - "--build-arg base_image=python:3.8-slim" - -build_and_push_image \ - "data-job-base-python-3.9" \ - Dockerfile-data-job-base \ - "--build-arg base_image=python:3.9-slim" - -build_and_push_image "job-builder" Dockerfile diff --git a/projects/control-service/projects/vdk_job_builder/setup.py b/projects/control-service/projects/vdk_job_builder/setup.py deleted file mode 100644 index ccfe8035ec..0000000000 --- a/projects/control-service/projects/vdk_job_builder/setup.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright 2021 VMware, Inc. -# SPDX-License-Identifier: Apache-2.0 -import setuptools - -"""Builds a package with the help of setuptools in order for this package to be imported in other projects -""" - -with open("README.md") as fh: - long_description = fh.read() - -setuptools.setup( - name="vdk-job-builder", - version="0.0.1", - # author="Example Author", - author_email="versatiledatakit@groups.vmware.com", - description="Package which builds Data Jobs with Docker", - long_description=long_description, - long_description_content_type="text/markdown", - # url="https://github.com/pypa/sampleproject", - packages=setuptools.find_packages(), - classifiers=[ - "Programming Language :: Python :: 3", - # "License :: OSI Approved :: Apache Software License", - # "Operating System :: OS Independent", - ], - python_requires=">=3.7", -) diff --git a/projects/control-service/projects/vdk_job_builder/vdk_job_builder/Dockerfile b/projects/control-service/projects/vdk_job_builder/vdk_job_builder/Dockerfile deleted file mode 100644 index 3ce3c583d8..0000000000 --- a/projects/control-service/projects/vdk_job_builder/vdk_job_builder/Dockerfile +++ /dev/null @@ -1,21 +0,0 @@ -# https://docs.docker.com/develop/develop-images/dockerfile_best-practices - -ARG base_image=python:3.9-slim - -FROM $base_image - -# Set the working directory -WORKDIR /job - -# Copy the actual job that has to be executed -ARG job_name -COPY $job_name $job_name/ - -# TODO: this would trigger for any change in job even if requirements.txt does not change -# but there's no COPY_IF_EXISTS command in docker to try copy it. -ARG requirements_file=requirements.txt -RUN if [ -f "$job_name/$requirements_file" ]; then pip3 install --disable-pip-version-check -q -r "$job_name/$requirements_file" || ( echo ">requirements_failed<" && exit 1 ) ; fi - -ARG job_githash= -ENV JOB_NAME $job_name -ENV VDK_JOB_GITHASH $job_githash diff --git a/projects/control-service/projects/vdk_job_builder/vdk_job_builder/__init__.py b/projects/control-service/projects/vdk_job_builder/vdk_job_builder/__init__.py deleted file mode 100644 index 50c007580a..0000000000 --- a/projects/control-service/projects/vdk_job_builder/vdk_job_builder/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright 2021 VMware, Inc. -# SPDX-License-Identifier: Apache-2.0 diff --git a/projects/control-service/projects/vdk_job_builder/vdk_job_builder/vdk_job_builder.py b/projects/control-service/projects/vdk_job_builder/vdk_job_builder/vdk_job_builder.py deleted file mode 100644 index 2fad03e532..0000000000 --- a/projects/control-service/projects/vdk_job_builder/vdk_job_builder/vdk_job_builder.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright 2021 VMware, Inc. -# SPDX-License-Identifier: Apache-2.0 -import logging -import os -import re -import subprocess -import sys - -logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format="") -log = logging.getLogger(__name__) - - -class JobConfig: - """ - A class used to build VDK configuration. Additional methods are provided to build the image with that - configuration and push it in a docker registry. THe building and pushing functionality should be abstracted in a - separate class down the line. - - VDK Dockerfile (vdk-base:latest): - https://gitlab.eng.vmware.com/product-analytics/data-pipelines/deployer/blob/master/docker/Dockerfile-base - Data Job Dockerfile(which is the same as the one in this directory): - https://gitlab.eng.vmware.com/product-analytics/data-pipelines/deployer/blob/master/docker/Dockerfile-data-job-release - - In order to use the class you can set the desired environment variables which the VDK uses which can be found in - the constructor and run the build_image method. The method itself will need Dockerfile path and DataJob folder. - """ - - def __init__(self): - """All those variables are used by VDK and are specific for the Dockerfile image which VDK uses to run a Data Job - Data Job. You can search each one of them in the VDK repo to get insight on how it is used - """ - self.job_config = {} - self.job_config["data_job_name"] = os.getenv("DATA_JOB_NAME") - self.job_config["dockerfile_path"] = os.getenv("DOCKERFILE_PATH") - self.job_config["source_path"] = os.getenv("SOURCE_PATH") - self.job_config["image_registry_path"] = os.getenv("IMAGE_REGISTRY_PATH") - self.job_config["image_name"] = os.getenv("IMAGE_NAME") - self.docker_build_command = "" - self.tags = [] - self.build_args = [] - self.job_config["image_tags"] = { - "build_tag": os.getenv("BUILD_TAG"), - "label": os.getenv("LABEL"), - "git_commit": os.getenv("GIT_COMMIT"), - "environment": os.getenv("ENVIRONMENT"), - } - self.job_config["build_arg"] = { - "job_githash": os.getenv("JOB_GITHASH"), - "base_image": os.getenv("BASE_IMAGE"), - } - - def with_default_config(self, job_name): - if self.__validate_data_job_name(job_name): - self.job_config["data_job_name"] = job_name - self.job_config["image_name"] = job_name - self.job_config["dockerfile_path"] = "Dockerfile" - self.job_config["source_path"] = "." - return self - - def with_paths_override(self, job_name, dockerfile_path, source_path): - if self.__validate_data_job_name(job_name): - self.job_config["data_job_name"] = job_name - self.job_config["image_name"] = job_name - self.job_config["build_arg"]["job_name"] = job_name - self.job_config["dockerfile_path"] = dockerfile_path - self.job_config["source_path"] = source_path - return self - - def build_image(self): - """Builds docker image with docker binary: - https://docs.docker.com/engine/reference/commandline/build/ - """ - # TODO: move the underlying implementation to use the python SDK the Docker API instead of building command - self.__create_base_command() - - self.__create_tags() - if len(self.tags) == 0: - # throw exception - return None - else: - self.__apply_tags() - self.__create_build_args() - self.__apply_build_args() - self.__apply_source_path() - self.__build_image_code() - - def __create_tags(self): - """Creates tags with the configuration for the docker build command: - https://docs.docker.com/engine/reference/commandline/tag/ - """ - tag_only_name = True - image_tags = self.job_config["image_tags"] - name = self.job_config["image_name"] - registry_path = self.job_config["image_registry_path"] - if not name: - return [] - if registry_path: - for key in image_tags: - if image_tags[key]: - tag_only_name = False - self.tags.append(f"{registry_path}/{name}:{image_tags[key]}") - if tag_only_name: - self.tags.append(f"{registry_path}/{name}") - else: - for key in image_tags: - if image_tags[key]: - tag_only_name = False - self.tags.append(f"{name}:{image_tags[key]}") - if tag_only_name: - self.tags.append(f"{name}") - return self.tags - - def __apply_tags(self): - """Applies tags to the docker build command""" - for tag in self.tags: - self.docker_build_command = ( - self.docker_build_command - + f" \ - -t {tag}" - ) - - def __create_base_command(self): - """Creates the docker build command""" - self.docker_build_command = f"docker build --no-cache" - if self.job_config["dockerfile_path"]: - self.docker_build_command = ( - self.docker_build_command - + f" \ - --file {self.job_config['dockerfile_path']}" - ) - - def __apply_build_args(self): - """Creates the docker build command""" - for build_arg in self.build_args: - if build_arg: - self.docker_build_command = ( - self.docker_build_command - + f" \ - --build-arg {build_arg}" - ) - - def __create_build_args(self): - """Creates build arguments with the configuration: - https://docs.docker.com/engine/reference/commandline/build/#set-build-time-variables---build-arg - """ - if not self.job_config["data_job_name"]: - # throw exception - return None - build_args_map = self.job_config["build_arg"] - for key in build_args_map: - if build_args_map[key]: - self.build_args.append(f"{key}={build_args_map[key]}") - - def __apply_source_path(self): - """Changes build context so you can build images from different directories: - https://docs.docker.com/engine/reference/commandline/build/#specify-a-dockerfile--f - """ - self.docker_build_command = ( - self.docker_build_command - + f" \ - {self.job_config['source_path']}" - ) - - def __validate_data_job_name(self, data_job_name): - if re.match(r"^[a-z][a-z0-9-]{0,44}$", data_job_name): - return data_job_name - else: - # throw exception - return None - - def __build_image_code(self): - """Executes the docker build command""" - build_process = subprocess.Popen( - self.docker_build_command, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - encoding="utf-8", - shell=True, - ) - for line in build_process.stdout: - log.info(line.rstrip("\n")) - self.docker_build_command = "" - exit(build_process.wait()) - - def push_images(self): - """Pushes the images which are separated by their tags: - https://docs.docker.com/engine/reference/commandline/push/ - """ - for tag in self.tags: - os.system("docker push " + tag) diff --git a/projects/control-service/projects/vdk_job_builder/version.txt b/projects/control-service/projects/vdk_job_builder/version.txt deleted file mode 100644 index 90a27f9cea..0000000000 --- a/projects/control-service/projects/vdk_job_builder/version.txt +++ /dev/null @@ -1 +0,0 @@ -1.0.5