-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 91885fb
Showing
18 changed files
with
533 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
[run] | ||
branch = True | ||
source = cicd_sample_project | ||
|
||
[report] | ||
exclude_lines = | ||
if self.debug: | ||
pragma: no cover | ||
raise NotImplementedError | ||
if __name__ == .__main__.: | ||
|
||
ignore_errors = True | ||
omit = | ||
tests/* | ||
setup.py | ||
# this file is autogenerated by dbx | ||
cicd_sample_project/common.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
{ | ||
"environments": { | ||
"default": { | ||
"profile": "dev", | ||
"workspace_dir": "/Shared/dbx/projects/dbx-example-project", | ||
"artifact_location": "dbfs:/dbx/dbx-example-project" | ||
}, | ||
"dev": { | ||
"profile": "dev", | ||
"workspace_dir": "/Shared/dbx/projects/dbx-example-project", | ||
"artifact_location": "dbfs:/dbx/dbx-example-project" | ||
}, | ||
"staging": { | ||
"profile": "staging", | ||
"workspace_dir": "/Shared/dbx/projects/dbx-example-project", | ||
"artifact_location": "dbfs:/dbx/dbx-example-project" | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
name: CI pipeline | ||
|
||
on: | ||
push: | ||
branches: | ||
- '**' | ||
tags-ignore: | ||
- 'v*' # this tag type is used for release pipelines | ||
|
||
jobs: | ||
ci-pipeline: | ||
|
||
runs-on: ubuntu-latest | ||
strategy: | ||
max-parallel: 4 | ||
|
||
env: | ||
DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }} | ||
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} | ||
|
||
steps: | ||
- uses: actions/checkout@v1 | ||
|
||
- name: Set up Python | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: 3.7.5 | ||
|
||
- name: Install pip | ||
run: | | ||
python -m pip install --upgrade pip | ||
- name: Install dependencies and project in dev mode | ||
run: | | ||
pip install -r unit-requirements.txt | ||
pip install -e . | ||
- name: Run unit tests | ||
run: | | ||
echo "Launching unit tests" | ||
pytest tests/unit | ||
- name: Deploy integration test | ||
run: | | ||
dbx deploy --jobs=cicd-sample-project-sample-integration-test --files-only | ||
- name: Run integration test | ||
run: | | ||
dbx launch --job=cicd-sample-project-sample-integration-test --as-run-submit --trace | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
name: Release pipeline | ||
|
||
on: | ||
push: | ||
tags: | ||
- 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 | ||
|
||
|
||
jobs: | ||
release-pipeline: | ||
|
||
runs-on: ubuntu-latest | ||
strategy: | ||
max-parallel: 4 | ||
matrix: | ||
python-version: [ 3.7 ] | ||
|
||
env: | ||
DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }} | ||
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} | ||
|
||
steps: | ||
- uses: actions/checkout@v1 | ||
|
||
- name: Set up Python | ||
uses: actions/setup-python@v1 | ||
with: | ||
python-version: 3.7 | ||
|
||
- name: Install pip | ||
run: | | ||
python -m pip install --upgrade pip | ||
- name: Install dependencies and project in dev mode | ||
run: | | ||
pip install -r unit-requirements.txt | ||
- name: Deploy the job | ||
run: | | ||
dbx deploy --jobs=cicd-sample-project-sample | ||
- name: Create Release | ||
id: create_release | ||
uses: actions/create-release@v1 | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions | ||
with: | ||
tag_name: ${{ github.ref }} | ||
release_name: Release ${{ github.ref }} | ||
body: | | ||
Release for version ${{ github.ref }}. | ||
draft: false | ||
prerelease: false | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# Distribution / packaging | ||
*.egg-info/ | ||
build | ||
dist | ||
|
||
# Unit test / coverage reports | ||
.coverage | ||
coverage.xml | ||
junit/* | ||
htmlcov/* | ||
|
||
# Caches | ||
.pytest_cache/ | ||
|
||
# VSCode | ||
.vscode/ | ||
|
||
# Idea | ||
.idea/ | ||
*.iml | ||
|
||
# MacOS | ||
.DS_Store | ||
|
||
# Databricks eXtensions | ||
.dbx/lock.json | ||
|
||
# local mlflow files | ||
mlruns/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
# cicd-sample-project | ||
|
||
This is a sample project for Databricks, generated via cookiecutter. | ||
|
||
While using this project, you need Python 3.X and `pip` or `conda` for package management. | ||
|
||
## Installing project requirements | ||
|
||
```bash | ||
pip install -r unit-requirements.txt | ||
``` | ||
|
||
## Install project package in a developer mode | ||
|
||
```bash | ||
pip install -e . | ||
``` | ||
|
||
## Testing | ||
|
||
For local unit testing, please use `pytest`: | ||
``` | ||
pytest tests/unit --cov | ||
``` | ||
|
||
For an integration test on interactive cluster, use the following command: | ||
``` | ||
dbx execute --cluster-name=<name of interactive cluster> --job=cicd-sample-project-sample-integration-test | ||
``` | ||
|
||
For a test on an automated job cluster, deploy the job files and then launch: | ||
``` | ||
dbx deploy --jobs=cicd-sample-project-sample-integration-test --files-only | ||
dbx launch --job=cicd-sample-project-sample-integration-test --as-run-submit --trace | ||
``` | ||
|
||
## Interactive execution and development | ||
|
||
1. `dbx` expects that cluster for interactive execution supports `%pip` and `%conda` magic [commands](https://docs.databricks.com/libraries/notebooks-python-libraries.html). | ||
2. Please configure your job in `conf/deployment.yml` file. | ||
2. To execute the code interactively, provide either `--cluster-id` or `--cluster-name`. | ||
```bash | ||
dbx execute \ | ||
--cluster-name="<some-cluster-name>" \ | ||
--job=job-name | ||
``` | ||
|
||
Multiple users also can use the same cluster for development. Libraries will be isolated per each execution context. | ||
|
||
## Preparing deployment file | ||
|
||
Next step would be to configure your deployment objects. To make this process easy and flexible, we're using YAML for configuration. | ||
|
||
By default, deployment configuration is stored in `conf/deployment.yml`. | ||
|
||
## Deployment for Run Submit API | ||
|
||
To deploy only the files and not to override the job definitions, do the following: | ||
|
||
```bash | ||
dbx deploy --files-only | ||
``` | ||
|
||
To launch the file-based deployment: | ||
``` | ||
dbx launch --as-run-submit --trace | ||
``` | ||
|
||
This type of deployment is handy for working in different branches, not to affect the main job definition. | ||
|
||
## Deployment for Run Now API | ||
|
||
To deploy files and update the job definitions: | ||
|
||
```bash | ||
dbx deploy | ||
``` | ||
|
||
To launch the file-based deployment: | ||
``` | ||
dbx launch --job=<job-name> | ||
``` | ||
|
||
This type of deployment shall be mainly used from the CI pipeline in automated way during new release. | ||
|
||
|
||
## CICD pipeline settings | ||
|
||
Please set the following secrets or environment variables for your CI provider: | ||
- `DATABRICKS_HOST` | ||
- `DATABRICKS_TOKEN` | ||
|
||
## Testing and releasing via CI pipeline | ||
|
||
- To trigger the CI pipeline, simply push your code to the repository. If CI provider is correctly set, it shall trigger the general testing pipeline | ||
- To trigger the release pipeline, get the current version from the `cicd_sample_project/__init__.py` file and tag the current code version: | ||
``` | ||
git tag -a v<your-project-version> -m "Release tag for version <your-project-version>" | ||
git push origin --tags | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__version__ = "0.0.1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
from abc import ABC, abstractmethod | ||
from argparse import ArgumentParser | ||
from logging import Logger | ||
from typing import Dict, Any | ||
import yaml | ||
import pathlib | ||
from pyspark.sql import SparkSession | ||
import sys | ||
|
||
|
||
# abstract class for jobs | ||
class Job(ABC): | ||
def __init__(self, spark=None, init_conf=None): | ||
self.spark = self._prepare_spark(spark) | ||
self.logger = self._prepare_logger() | ||
self.dbutils = self.get_dbutils() | ||
if init_conf: | ||
self.conf = init_conf | ||
else: | ||
self.conf = self._provide_config() | ||
self._log_conf() | ||
|
||
@staticmethod | ||
def _prepare_spark(spark) -> SparkSession: | ||
if not spark: | ||
return SparkSession.builder.getOrCreate() | ||
else: | ||
return spark | ||
|
||
@staticmethod | ||
def _get_dbutils(spark: SparkSession): | ||
try: | ||
from pyspark.dbutils import DBUtils # noqa | ||
|
||
if "dbutils" not in locals(): | ||
utils = DBUtils(spark) | ||
return utils | ||
else: | ||
return locals().get("dbutils") | ||
except ImportError: | ||
return None | ||
|
||
def get_dbutils(self): | ||
utils = self._get_dbutils(self.spark) | ||
|
||
if not utils: | ||
self.logger.warn("No DBUtils defined in the runtime") | ||
else: | ||
self.logger.info("DBUtils class initialized") | ||
|
||
return utils | ||
|
||
def _provide_config(self): | ||
self.logger.info("Reading configuration from --conf-file job option") | ||
conf_file = self._get_conf_file() | ||
if not conf_file: | ||
self.logger.info( | ||
"No conf file was provided, setting configuration to empty dict." | ||
"Please override configuration in subclass init method" | ||
) | ||
return {} | ||
else: | ||
self.logger.info(f"Conf file was provided, reading configuration from {conf_file}") | ||
return self._read_config(conf_file) | ||
|
||
@staticmethod | ||
def _get_conf_file(): | ||
p = ArgumentParser() | ||
p.add_argument("--conf-file", required=False, type=str) | ||
namespace = p.parse_known_args(sys.argv[1:])[0] | ||
return namespace.conf_file | ||
|
||
@staticmethod | ||
def _read_config(conf_file) -> Dict[str, Any]: | ||
config = yaml.safe_load(pathlib.Path(conf_file).read_text()) | ||
return config | ||
|
||
def _prepare_logger(self) -> Logger: | ||
log4j_logger = self.spark._jvm.org.apache.log4j # noqa | ||
return log4j_logger.LogManager.getLogger(self.__class__.__name__) | ||
|
||
def _log_conf(self): | ||
# log parameters | ||
self.logger.info("Launching job with configuration parameters:") | ||
for key, item in self.conf.items(): | ||
self.logger.info("\t Parameter: %-30s with value => %-30s" % (key, item)) | ||
|
||
@abstractmethod | ||
def launch(self): | ||
""" | ||
Main method of the job. | ||
:return: | ||
""" | ||
pass |
Empty file.
Empty file.
Oops, something went wrong.