-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #23 from databricks/sql-templates
Add examples based on default-sql & dbt-sql templates
- Loading branch information
Showing
28 changed files
with
621 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# DABs | ||
.databricks/ | ||
build/ | ||
dist/ | ||
__pycache__/ | ||
*.egg-info | ||
.venv/ | ||
scratch/** | ||
!scratch/README.md | ||
|
||
# dbt | ||
target/ | ||
dbt_packages/ | ||
logs/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Typings for Pylance in Visual Studio Code | ||
# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md | ||
from databricks.sdk.runtime import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"recommendations": [ | ||
"redhat.vscode-yaml", | ||
"innoverio.vscode-dbt-power-user", | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
{ | ||
"python.analysis.stubPath": ".vscode", | ||
"databricks.python.envFile": "${workspaceFolder}/.env", | ||
"jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", | ||
"jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", | ||
"python.testing.pytestArgs": [ | ||
"." | ||
], | ||
"python.testing.unittestEnabled": false, | ||
"python.testing.pytestEnabled": true, | ||
"python.analysis.extraPaths": ["src"], | ||
"files.exclude": { | ||
"**/*.egg-info": true, | ||
"**/__pycache__": true, | ||
".pytest_cache": true, | ||
}, | ||
"python.envFile": "${workspaceFolder}/.databricks/.databricks.env", | ||
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python", | ||
"sqltools.connections": [ | ||
{ | ||
"connectionMethod": "VS Code Extension (beta)", | ||
"catalog": "hive_metastore", | ||
"previewLimit": 50, | ||
"driver": "Databricks", | ||
"name": "databricks", | ||
"path": "/sql/1.0/warehouses/abcdef1234567890" | ||
} | ||
], | ||
"sqltools.autoConnectTo": "", | ||
"[jinja-sql]": { | ||
"editor.defaultFormatter": "innoverio.vscode-dbt-power-user" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
# dbt_sql | ||
|
||
The 'dbt_sql' project was generated by using the dbt template for | ||
Databricks Asset Bundles. It follows the standard dbt project structure | ||
and has an additional `resources` directory to define Databricks resources such as jobs | ||
that run dbt models. | ||
|
||
* Learn more about the dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects. | ||
* Learn more about Databricks Asset Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html | ||
|
||
The remainder of this file includes instructions for local development (using dbt) | ||
and deployment to production (using Databricks Asset Bundles). | ||
|
||
## Development setup | ||
|
||
1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html | ||
|
||
2. Authenticate to your Databricks workspace, if you have not done so already: | ||
``` | ||
$ databricks configure | ||
``` | ||
3. Install dbt | ||
To install dbt, you need a recent version of Python. For the instructions below, | ||
we assume `python3` refers to the Python version you want to use. On some systems, | ||
you may need to refer to a different Python version, e.g. `python` or `/usr/bin/python`. | ||
Run these instructions from the `dbt_sql` directory. We recommend making | ||
use of a Python virtual environment and installing dbt as follows: | ||
``` | ||
$ python3 -m venv .venv | ||
$ . .venv/bin/activate | ||
$ pip install -r requirements-dev.txt | ||
``` | ||
4. Initialize your dbt profile | ||
Use `dbt init` to initialize your profile. | ||
``` | ||
$ dbt init | ||
``` | ||
Note that dbt authentication uses personal access tokens by default | ||
(see https://docs.databricks.com/dev-tools/auth/pat.html). | ||
You can use OAuth as an alternative, but this currently requires manual configuration. | ||
See https://github.com/databricks/dbt-databricks/blob/main/docs/oauth.md | ||
for general instructions, or https://community.databricks.com/t5/technical-blog/using-dbt-core-with-oauth-on-azure-databricks/ba-p/46605 | ||
for advice on setting up OAuth for Azure Databricks. | ||
To setup up additional profiles, such as a 'prod' profile, | ||
see https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles. | ||
5. Activate dbt so it can be used from the terminal | ||
``` | ||
$ . .venv/bin/activate | ||
``` | ||
## Local development with dbt | ||
Use `dbt` to [run this project locally using a SQL warehouse](https://docs.databricks.com/partners/prep/dbt.html): | ||
``` | ||
$ dbt seed | ||
$ dbt run | ||
``` | ||
(Did you get an error that the dbt command could not be found? You may need | ||
to try the last step from the development setup above to re-activate | ||
your Python virtual environment!) | ||
To just evaluate a single model defined in a file called orders.sql, use: | ||
``` | ||
$ dbt run --model orders | ||
``` | ||
Use `dbt test` to run tests generated from yml files such as `models/schema.yml` | ||
and any SQL tests from `tests/` | ||
``` | ||
$ dbt test | ||
``` | ||
## Production setup | ||
Your production dbt profiles are defined in dbt_profiles/profiles.yml. | ||
These profiles define the default catalog, schema, and any other | ||
target-specific settings. Read more about dbt profiles on Databricks at | ||
https://docs.databricks.com/en/workflows/jobs/how-to/use-dbt-in-workflows.html#advanced-run-dbt-with-a-custom-profile. | ||
The target workspaces for staging and prod are defined in databricks.yml. | ||
You can manaulyl deploy based on these configurations (see below). | ||
Or you can use CI/CD to automate deployment. See | ||
https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation | ||
on CI/CD setup. | ||
## Manually deploying to to Databricks with Databricks Asset Bundles | ||
Databricks Asset Bundles can be used to deploy to Databricks and to execute | ||
dbt commands as a job using Databricks Workflows. See | ||
https://docs.databricks.com/dev-tools/bundles/index.html to learn more. | ||
Use the Databricks CLI to deploy a development copy of this project to a workspace: | ||
``` | ||
$ databricks bundle deploy --target dev | ||
``` | ||
(Note that "dev" is the default target, so the `--target` parameter | ||
is optional here.) | ||
This deploys everything that's defined for this project. | ||
For example, the default template would deploy a job called | ||
`[dev yourname] dbt_sql_job` to your workspace. | ||
You can find that job by opening your workpace and clicking on **Workflows**. | ||
You can also deploy to your production target directly from the command-line. | ||
The warehouse, catalog, and schema for that target are configured in databricks.yml. | ||
When deploying to this target, note that the default job at resources/dbt_sql_job.yml | ||
has a schedule set that runs every day. The schedule is paused when deploying in development mode | ||
(see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). | ||
To deploy a production copy, type: | ||
``` | ||
$ databricks bundle deploy --target prod | ||
``` | ||
## IDE support | ||
Optionally, install developer tools such as the Databricks extension for Visual Studio Code from | ||
https://docs.databricks.com/dev-tools/vscode-ext.html. Third-party extensions | ||
related to dbt may further enhance your dbt development experience! |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# This file defines the structure of this project and how it is deployed | ||
# to production using Databricks Asset Bundles. | ||
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. | ||
bundle: | ||
name: dbt_sql | ||
|
||
include: | ||
- resources/*.yml | ||
|
||
# Deployment targets. | ||
# The default schema, catalog, etc. for dbt are defined in dbt_profiles/profiles.yml | ||
targets: | ||
dev: | ||
default: true | ||
# We use 'mode: development' to indicate this is a personal development copy. | ||
# Any job schedules and triggers are paused by default. | ||
mode: development | ||
workspace: | ||
host: https://myworkspace.databricks.com | ||
|
||
prod: | ||
mode: production | ||
workspace: | ||
host: https://myworkspace.databricks.com | ||
# We always use /Users/[email protected] for all resources to make sure we only have a single copy. | ||
root_path: /Users/[email protected]/.bundle/${bundle.name}/${bundle.target} | ||
run_as: | ||
# This runs as [email protected] in production. We could also use a service principal here | ||
# using service_principal_name (see the Databricks documentation). | ||
user_name: [email protected] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
|
||
# This file defines dbt profiles for deployed dbt jobs. | ||
# Note that for local development you should create your own, local profile. | ||
# (see README.md). | ||
my_dbt_project: | ||
target: dev # default target | ||
outputs: | ||
|
||
dev: | ||
type: databricks | ||
method: http | ||
catalog: main | ||
schema: "{{ var('dev_schema') }}" | ||
|
||
http_path: /sql/1.0/warehouses/abcdef1234567890 | ||
|
||
# The workspace host / token are provided by Databricks | ||
# see databricks.yml for the host used for 'dev' | ||
host: "{{ env_var('DBT_HOST') }}" | ||
token: "{{ env_var('DBT_ACCESS_TOKEN') }}" | ||
|
||
prod: | ||
type: databricks | ||
method: http | ||
catalog: main | ||
schema: default | ||
|
||
http_path: /sql/1.0/warehouses/abcdef1234567890 | ||
|
||
# The workspace host / token are provided by Databricks | ||
# see databricks.yml for the host used for 'dev' | ||
host: "{{ env_var('DBT_HOST') }}" | ||
token: "{{ env_var('DBT_ACCESS_TOKEN') }}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
name: 'dbt_sql' | ||
version: '1.0.0' | ||
config-version: 2 | ||
|
||
# This setting configures which "profile" dbt uses for this project. | ||
profile: 'dbt_sql' | ||
|
||
# These configurations specify where dbt should look for different types of files. | ||
# For Databricks asset bundles, we put everything in src, as you may have | ||
# non-dbt resources in your project. | ||
model-paths: ["src/models"] | ||
analysis-paths: ["src/analyses"] | ||
test-paths: ["src/tests"] | ||
seed-paths: ["src/seeds"] | ||
macro-paths: ["src/macros"] | ||
snapshot-paths: ["src/snapshots"] | ||
|
||
clean-targets: # directories to be removed by `dbt clean` | ||
- "target" | ||
- "dbt_packages" | ||
|
||
# Configuring models | ||
# Full documentation: https://docs.getdbt.com/docs/configuring-models | ||
|
||
# In this example config, we tell dbt to build all models in the example/ | ||
# directory as views by default. These settings can be overridden in the | ||
# individual model files using the `{{ config(...) }}` macro. | ||
models: | ||
dbt_sql: | ||
# Config indicated by + and applies to all files under models/example/ | ||
example: | ||
+materialized: view |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# This file defines prompts with defaults for dbt initializaton. | ||
# It is used when the `dbt init` command is invoked. | ||
# | ||
fixed: | ||
type: databricks | ||
prompts: | ||
host: | ||
default: myworkspace.databricks.com | ||
token: | ||
hint: 'personal access token to use, dapiXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' | ||
hide_input: true | ||
http_path: | ||
hint: 'HTTP path of SQL warehouse to use' | ||
default: /sql/1.0/warehouses/abcdef1234567890 | ||
catalog: | ||
hint: 'initial catalog' | ||
default: main | ||
schema: | ||
hint: 'personal schema where dbt will build objects during development, example: user_name' | ||
threads: | ||
hint: 'threads to use during development, 1 or more' | ||
type: 'int' | ||
default: 4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
## requirements-dev.txt: dependencies for local development. | ||
|
||
dbt-databricks>=1.0.0,<2.0.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
resources: | ||
jobs: | ||
dbt_sql_job: | ||
name: dbt_sql_job | ||
|
||
schedule: | ||
# Run every day at 9:27 AM | ||
quartz_cron_expression: 21 27 9 * * ? | ||
timezone_id: UTC | ||
|
||
email_notifications: | ||
on_failure: | ||
- [email protected] | ||
|
||
tasks: | ||
- task_key: dbt | ||
|
||
dbt_task: | ||
project_directory: ../ | ||
# The default schema, catalog, etc. are defined in ../dbt_profiles/profiles.yml | ||
profiles_directory: dbt_profiles/ | ||
commands: | ||
- 'dbt deps --target=${bundle.target}' | ||
- 'dbt seed --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"' | ||
- 'dbt run --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"' | ||
|
||
libraries: | ||
- pypi: | ||
package: dbt-databricks>=1.0.0,<2.0.0 | ||
|
||
new_cluster: | ||
spark_version: 13.3.x-scala2.12 | ||
node_type_id: i3.xlarge | ||
data_security_mode: SINGLE_USER | ||
num_workers: 0 | ||
spark_conf: | ||
spark.master: "local[*, 4]" | ||
spark.databricks.cluster.profile: singleNode | ||
custom_tags: | ||
ResourceClass: SingleNode |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
|
||
-- This model file defines a materialized view called 'orders_daily' | ||
-- | ||
-- Read more about materialized at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables | ||
-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561. | ||
{{ config(materialized = 'materialized_view') }} | ||
|
||
select order_date, count(*) AS number_of_orders | ||
|
||
from {{ ref('orders_raw') }} | ||
|
||
-- During development, only process a smaller range of data | ||
{% if target.name != 'prod' %} | ||
where order_date >= '2019-08-01' and order_date < '2019-09-01' | ||
{% endif %} | ||
|
||
group by order_date |
Oops, something went wrong.