Skip to content

Commit

Permalink
Merge branch 'main' into feature/datacoves3.3
Browse files Browse the repository at this point in the history
  • Loading branch information
Mayra Peña authored and Mayra Peña committed Feb 22, 2025
2 parents 1eec64a + 874aa78 commit 6ec4d92
Show file tree
Hide file tree
Showing 74 changed files with 934 additions and 215 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/10_integrate_dbt_changes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ jobs:
##### Governance Checks
# this first runs dbt but creates enpty tables, this is enough to then run the hooks and fail fast

# We need to run observe model so that post hook works
- name: Run Observe Model
run: "dbt build --fail-fast -s L1_inlets.observe"

# There is an issue with --empty and dynamic tables so need to exclude them
- name: Governance run of dynamic tables
run: "dbt build --fail-fast -s config.materialized:dynamic_table -s test_failures --defer --state logs"
Expand Down
21 changes: 21 additions & 0 deletions .github/workflows/30_deploy_changes_to_production.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ jobs:
DATACOVES__MAIN__USER: ${{ vars.DATACOVES__MAIN__USER }}
DATACOVES__MAIN__PASSWORD: ${{ secrets.DATACOVES__MAIN__PASSWORD }}

DBT_ARTIFACTS_BUCKET: "convexa-local"

# This is used by datacoves to drop the staging database for blue/green
# deployments, most likely you don't want to set this, we use it for demos
DATACOVES__DROP_DB_ON_FAIL: ${{ vars.DATACOVES__DROP_DB_ON_FAIL }}
Expand Down Expand Up @@ -104,6 +106,25 @@ jobs:
- name: Upload dbt artifacts
run: "dbt run-operation upload_artifacts"

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-west-2

- name: Upload artifacts to S3 for Datahub
run: |
# Check and upload each file individually
for file in target/sources.json target/manifest.json target/catalog.json target/run_results.json; do
if [ -f "$file" ]; then
echo "Uploading $file to S3..."
aws s3 cp "$file" "s3://${DBT_ARTIFACTS_BUCKET}/dbt_artifacts/$(basename $file)"
else
echo "File $file does not exist, skipping..."
fi
done
- uses: fregante/setup-git-user@v2
- name: Bump dbt project and git project version
run: "../automate/dbt/bump_dbt_project.sh"
Expand Down
8 changes: 3 additions & 5 deletions load/dlt/loans_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env -S uv run --verbose --cache-dir /tmp/.uv_cache
#!/usr/bin/env -S uv run
# /// script
# dependencies = [
# "dlt[snowflake, parquet]==1.5.0",
Expand Down Expand Up @@ -28,7 +28,7 @@ def zip_coordinates():

@dlt.source
def loans_data():
return personal_loans, zip_coordinates
return [personal_loans, zip_coordinates]

if __name__ == "__main__":
datacoves_snowflake = dlt.destinations.snowflake(
Expand All @@ -46,8 +46,6 @@ def loans_data():
dataset_name="loans"
)

load_info = pipeline.run([
loans_data()
])
load_info = pipeline.run(loans_data())

print(load_info)
6 changes: 3 additions & 3 deletions load/dlt/us_population.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env -S uv run --cache-dir /tmp/.uv_cache
#!/usr/bin/env -S uv run
# /// script
# dependencies = [
# "dlt[snowflake, parquet]==1.5.0",
Expand Down Expand Up @@ -30,7 +30,7 @@ def us_population_source():
)

pipeline = dlt.pipeline(
progress = "enlighten",
progress = "log",
pipeline_name = "loans",
destination = datacoves_snowflake,
pipelines_dir = pipelines_dir,
Expand All @@ -40,7 +40,7 @@ def us_population_source():
)

load_info = pipeline.run([
us_population()
us_population_source()
])

print(load_info)
1 change: 1 addition & 0 deletions observe/osmosis/.env.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
OPENAI_API_KEY=abc....
22 changes: 22 additions & 0 deletions observe/osmosis/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# dbt-osmosis

## Set openai key from .env file
`set -a && source /config/workspace/observe/osmosis/.env && set +a`

## Run Refactor
Executes organize which syncs yaml files with database schema.
This also uses OpenAI to add descriptions to cols.

`uvx --with=dbt-snowflake~=1.8.0 --from 'dbt-osmosis[openai]' dbt-osmosis yaml refactor --synthesize`

Run without OpenAI

`uvx --with=dbt-snowflake~=1.8.0 dbt-osmosis yaml refactor`

You can pass `--fqn` with a name of a folder in `models/` to limit the refactor

`dbt-osmosis yaml refactor --fqn L3_coves`

Separate sub-folders with a `.`

`dbt-osmosis yaml refactor --fqn L3_coves.loan_analytics`
10 changes: 5 additions & 5 deletions orchestrate/dags/daily_loan_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def daily_loan_run():

@task_group(group_id="extract_and_load_airbyte", tooltip="Airbyte Extract and Load")
def extract_and_load_airbyte():

@task
def sync_airbyte():
from airflow.providers.airbyte.operators.airbyte import AirbyteTriggerSyncOperator
Expand All @@ -21,14 +21,14 @@ def sync_airbyte():
connection_id="ac02ea96-58a1-4061-be67-78900bb5aaf6",
airbyte_conn_id="airbyte_connection",
).execute({})

sync_airbyte()

tg_extract_and_load_airbyte = extract_and_load_airbyte()

@task_group(group_id="extract_and_load_fivetran", tooltip="Fivetran Extract and Load")
def extract_and_load_fivetran():

@task
def trigger_fivetran():
from fivetran_provider_async.operators import FivetranOperator
Expand All @@ -52,7 +52,7 @@ def sensor_fivetran():
trigger = trigger_fivetran()
sensor = sensor_fivetran()
trigger >> sensor # Set dependency

tg_extract_and_load_fivetran = extract_and_load_fivetran()

@task_group(group_id="extract_and_load_dlt", tooltip="dlt Extract and Load")
Expand All @@ -66,7 +66,7 @@ def load_us_population():

tg_extract_and_load_dlt = extract_and_load_dlt()

@task.datacoves_dbt(connection_id="main")
@task.datacoves_dbt(connection_id="main")
def transform():
return "dbt build -s 'tag:daily_run_airbyte+ tag:daily_run_fivetran+ -t prd'"

Expand Down
20 changes: 20 additions & 0 deletions orchestrate/dags/default_args_dag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from airflow.decorators import dag
from orchestrate.utils.default_args import default_args
from operators.datacoves.dbt import DatacovesDbtOperator


@dag(
default_args=default_args,
description="Daily dbt run",
schedule="0 12 * * *",
tags=["transform"],
catchup=False,
)
def default_args_dag():
run_dbt = DatacovesDbtOperator(
task_id="run_dbt", bash_command="dbt run -s country_codes"
)


dag = default_args_dag()
11 changes: 11 additions & 0 deletions orchestrate/utils/default_args.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# utils/default_args.py
from datetime import datetime, timedelta

default_args = {
'owner': '[email protected]',
'email_on_failure': False,
'email_on_retry': False,
'retries': 3,
'retry_delay': timedelta(minutes=5),
'start_date': datetime(2023, 12, 1),
}
4 changes: 2 additions & 2 deletions secure/databases.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
schemas:
- L1_ACCOUNT_USAGE
- L1_COUNTRY_DATA
- L1_GOOGLE_ANALYTICS_4
- L1_DBT_ARTIFACTS
- L1_COVID19_EPIDEMIOLOGICAL_DATA
- L1_GOOGLE_ANALYTICS_4
- L1_LOANS
- L1_OBSERVE
- L1_US_POPULATION

- L2_COUNTRY_DEMOGRAPHICS
- L2_COVID_OBSERVATIONS
Expand Down
51 changes: 25 additions & 26 deletions secure/roles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,15 @@

- z_schema_seeds

- z_schema_l1_dbt_artifacts
- z_schema_dbt_test__audit

- z_schema_l1_account_usage
- z_schema_l1_country_data
- z_schema_l1_google_analytics_4
- z_schema_l1_dbt_artifacts
- z_schema_l1_covid19_epidemiological_data
- z_schema_l1_google_analytics_4
- z_schema_l1_loans
- z_schema_l1_observe

- z_schema_dbt_test__audit
- z_schema_l1_us_population

- z_schema_l2_country_demographics
- z_schema_l2_covid_observations
Expand Down Expand Up @@ -355,66 +354,66 @@
read:
- raw.snapshots

# INLETS
- z_schema_l1_account_usage:
- z_schema_resources:
privileges:
schemas:
read:
- balboa.l1_account_usage
- balboa_apps.resources

- z_schema_l1_country_data:
- z_schema_seeds:
privileges:
schemas:
read:
- balboa.l1_country_data
- balboa.seeds

- z_schema_l1_google_analytics_4:
- z_schema_dbt_test__audit:
privileges:
schemas:
read:
- balboa.l1_google_analytics_4
- balboa.dbt_test__audit

- z_schema_l1_covid19_epidemiological_data:
# INLETS
- z_schema_l1_account_usage:
privileges:
schemas:
read:
- balboa.l1_covid19_epidemiological_data
- balboa.l1_account_usage

- z_schema_l1_dbt_artifacts:
- z_schema_l1_country_data:
privileges:
schemas:
read:
- balboa.l1_dbt_artifacts
- balboa.l1_country_data

- z_schema_l1_loans:
- z_schema_l1_covid19_epidemiological_data:
privileges:
schemas:
read:
- balboa.l1_loans
- balboa.l1_covid19_epidemiological_data

- z_schema_l1_observe:
- z_schema_l1_google_analytics_4:
privileges:
schemas:
read:
- balboa.l1_observe
- balboa.l1_google_analytics_4

- z_schema_resources:
- z_schema_l1_loans:
privileges:
schemas:
read:
- balboa_apps.resources
- balboa.l1_loans

- z_schema_seeds:
- z_schema_l1_observe:
privileges:
schemas:
read:
- balboa.seeds
- balboa.l1_observe

- z_schema_dbt_test__audit:
- z_schema_l1_us_population:
privileges:
schemas:
read:
- balboa.dbt_test__audit
- balboa.l1_us_population

# BAYS
- z_schema_l2_country_demographics:
Expand Down
2 changes: 2 additions & 0 deletions secure/users.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
can_login: true
member_of:
- analyst
- accountadmin
- securityadmin

- sebastian:
can_login: true
Expand Down
1 change: 1 addition & 0 deletions training_and_demos/uv_slack/.env.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SLACK_API_TOKEN=xyz...
31 changes: 31 additions & 0 deletions training_and_demos/uv_slack/msg_slack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env -S uv run --cache-dir /tmp/.uv_cache
# /// script
# dependencies = [
# "slack_sdk==3.34.0",
# "python-dotenv"
# ]
# ///
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
from dotenv import load_dotenv
import os

load_dotenv()
SLACK_TOKEN = os.getenv("SLACK_API_TOKEN")

def send_hello_world():
# Initialize the Slack client with your bot token
client = WebClient(token=SLACK_TOKEN)
print("TEST")
try:
response = client.chat_postMessage(
channel='#bot-aks-notifications',
text='Hello, World! 👋'
)
print(f"Message sent successfully: {response['ts']}")

except SlackApiError as e:
print(f"Error sending message: {e.response['error']}")

if __name__ == "__main__":
send_hello_world()
4 changes: 2 additions & 2 deletions transform/.dbt_coves/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ generate:
# schemas: # List of schema names where to look for source tables
# - RAW
sources_destination: "models/L1_inlets/{{schema}}/_{{schema}}.yml" # Where sources yml files will be generated
models_destination: "models/L1_inlets/{{schema}}/{{relation}}.sql" # Where models sql files will be generated
model_props_destination: "models/L1_inlets/{{schema}}/{{relation}}.yml" # Where models yml files will be generated
models_destination: "models/L1_inlets/{{schema}}/stg_{{relation}}.sql" # Where models sql files will be generated
model_props_destination: "models/L1_inlets/{{schema}}/stg_{{relation}}.yml" # Where models yml files will be generated
update_strategy: update # Action to perform when a property file exists. Options: update, recreate, fail, ask
templates_folder: ".dbt_coves/templates" # Folder where source generation jinja templates are located.
flatten_json_fields: "no" # Action to perform when VARIANT / JSON field is encounted
Expand Down
2 changes: 1 addition & 1 deletion transform/.dbt_coves/templates/staging_model_props.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: 2

models:
- name: {{model}}
- name: stg_{{ model | lower }}
description: ''
columns:
{%- for cols in nested.values() %}
Expand Down
Loading

0 comments on commit 6ec4d92

Please sign in to comment.