Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/mayrapena1324 #2

Merged
merged 30 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/pull_request_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ jobs:
run: "dbt build --fail-fast --empty"

- name: Generate Docs Combining Prod and branch catalog.json
if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' && contains(github.event.pull_request.labels.*.name, 'full-refresh') != true }}
run: "dbt-coves generate docs --merge-deferred --state logs"

- name: Run governance checks
Expand Down
23 changes: 10 additions & 13 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
files: ^transform/models/

repos:

- repo: https://github.com/dbt-checkpoint/dbt-checkpoint
rev: v2.0.1
rev: v2.0.5

hooks:
- id: check-source-table-has-description
Expand All @@ -12,32 +11,30 @@ repos:
- id: check-script-ref-and-source
- id: check-model-has-description
- id: check-model-has-properties-file
- id: check-model-has-all-columns

# - id: check-model-has-all-columns
# - id: check-database-casing-consistency
always_run: true

- repo: https://github.com/sqlfluff/sqlfluff
# this is the version of sqlfluff, needs to be updated when using a new sqlfluff version (pip show sqlfluff)
rev: 2.3.2
rev: 3.1.1
hooks:
- id: sqlfluff-lint
language: python
# Need these two dependencies.
# sqlfluff-templater-dbt should match the version of sqlfluff above in rev (pip show sqlfluff-templater-dbt)
# dbt-snowflake needs to match the version in transform tab of Datacoves (pip show dbt-snowflake)
additional_dependencies:
# ["sqlfluff-templater-dbt==2.3.2", "dbt-snowflake==1.6.8", dbt-core==1.6.9]
[
"sqlfluff-templater-dbt==2.3.2",
"dbt-redshift==1.6.7",
dbt-core==1.6.9,
"sqlfluff-templater-dbt==3.1.1",
"dbt-core==1.8.7",
"dbt-snowflake==1.8.4",
]
args: [--config, "transform/.sqlfluff"]

args: [--config, transform/.sqlfluff]

- repo: https://github.com/adrienverge/yamllint.git
rev: v1.17.0
rev: v1.35.1
hooks:
- id: yamllint
args: [-c=.yamllint]
exclude: ^transform/.dbt_coves/templates

File renamed without changes.
7 changes: 6 additions & 1 deletion automate/dbt/get_artifacts.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

#! /bin/bash

# Cause script to exit on error
Expand All @@ -9,7 +8,13 @@ cd $DATACOVES__DBT_HOME
mkdir -p logs

dbt run-operation get_last_artifacts

# Check if manifest,son exist, count lines if does or set to 0
if [ -e "logs/manifest.json" ]; then
LINES_IN_MANIFEST="$(grep -c '^' logs/manifest.json)"
else
LINES_IN_MANIFEST="0"
fi

if [ $LINES_IN_MANIFEST -eq 0 ]
then
Expand Down
3 changes: 1 addition & 2 deletions automate/dbt/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@ default:
type: snowflake
threads: 16
client_session_keep_alive: true

account: "{{ env_var('DATACOVES__MAIN__ACCOUNT') }}"
database: "{{ env_var('DATACOVES__MAIN__DATABASE') }}"
schema: "{{ env_var('DATACOVES__MAIN__SCHEMA') }}"
user: "{{ env_var('DATACOVES__MAIN__USER') }}"
password: "{{ env_var('DATACOVES__MAIN__PASSWORD') }}"
role: "{{ env_var('DATACOVES__MAIN__ROLE') }}"
warehouse: "{{ env_var('DATACOVES__MAIN__WAREHOUSE') }}"

Empty file modified automate/dbt/remove_test_databases.sh
100644 → 100755
Empty file.
4 changes: 4 additions & 0 deletions transform/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ models:
datacoves_starter_project:
L1_staging:
+materialized: view
loans:
+materialized: view
country_data:
+materialized: view
L2_core:
+materialized: view
L3_marts:
Expand Down
40 changes: 29 additions & 11 deletions transform/macros/cicd/get_last_artifacts.sql
Original file line number Diff line number Diff line change
@@ -1,30 +1,48 @@
{# Macro for returning dbt manifest from a snowflake stage. #}
{#
dbt run-operation get_last_artifacts
#}
#}
{# Once this is completed, deferral and state modifiers are available using --state logs #}

{% macro get_last_artifacts(stage = 'RAW.DBT_ARTIFACTS.ARTIFACTS') %}
{# we will put the manifest.json in the log directory and use the with the --state param in dbt #}
{% macro get_last_artifacts() %}
{# Fallback variable used to run/debug macro in vscode #}
{% set stage_name = 'RAW.DBT_ARTIFACTS.ARTIFACTS' %}

{# We will put the manifest.json in the log directory and use it with the --state param in dbt #}
{% set logs_dir = env_var('DATACOVES__DBT_HOME') ~ "/logs/" %}

{# List only the .json files in the root folder (excludes archive dir) #}
{% set list_stage_query %}
LIST @{{ stage }} PATTERN = '^((?!(archive/)).)*.json$';
LIST @{{ stage_name }} PATTERN = '^((?!(archive/)).)*.json$';
{% endset %}

{{ print("\nCurrent items in stage " ~ stage) }}
{{ print("\nCurrent items in stage " ~ stage_name) }}
{% set results = run_query(list_stage_query) %}
{{ results.exclude('md5').print_table(max_column_width=40) }}
{{ print("\n" ~ "="*85) }}

{% set artifacts_destination = "file://" + logs_dir %}
{% if results and results.rows %}

{% set get_query %}
get @{{ stage }}/manifest.json {{ artifacts_destination }};
get @{{ stage }}/catalog.json {{ artifacts_destination }};
{% endset %}
{% set artifacts_destination = "file://" + logs_dir %}

{# Download and print manifest.json #}
{% set get_manifest_query %}
get @{{ stage_name }}/manifest.json {{ artifacts_destination }};
{% endset %}
{% set download_manifest_results = run_query(get_manifest_query) %}x
{{ print("Manifest Downloaded") }}
{{ download_manifest_results.print_table(max_column_width=40) }}

{# Download and print catalog.json #}
{% set get_catalog_query %}
get @{{ stage_name }}/catalog.json {{ artifacts_destination }};
{% endset %}
{% set download_catalog_results = run_query(get_catalog_query) %}
{{ print("Catalog Downloaded") }}
{{ download_catalog_results.print_table(max_column_width=40) }}

{% set results = run_query(get_query) %}
{% else %}
{{ print("No artifacts found in stage " ~ stage_name ~ ". Skipping file download.") }}
{% endif %}

{% endmacro %}
11 changes: 6 additions & 5 deletions transform/macros/cicd/grant_access_to_pr_database.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,29 @@
#}

{%- macro grant_access_to_pr_database() -%}
{% set db_role_name = 'z_db_balboa_tst' %}
{% set db_role_name = 'analyst' %}
{% set db_name = target.database %}

{% set apply_db_grants_sql %}
grant usage on database {{ db_name }} to role {{db_role_name}};
{% endset %}

{% do run_query(apply_db_grants_sql) %}

{% set schemas_list %}

select schema_name
from {{ db_name }}.information_schema.schemata
where schema_name not in ('INFORMATION_SCHEMA','PUBLIC','DBT_TEST__AUDIT')
{{print(schema_list)}}
{% endset %}

{% set schemas = run_query(schemas_list) %}
{% for schema in schemas %}

{% set apply_schema_grants_sql %}
grant usage on schema {{db_name}}.{{ schema[0] }} to z_schema_{{schema[0]}};
grant select on all tables in schema {{db_name}}.{{ schema[0] }} to role z_tables_views_general;
grant select on all views in schema {{db_name}}.{{ schema[0] }} to role z_tables_views_general;
grant usage on schema {{db_name}}.{{ schema[0] }} to {{db_role_name}};
grant select on all tables in schema {{db_name}}.{{ schema[0] }} to role {{db_role_name}};
grant select on all views in schema {{db_name}}.{{ schema[0] }} to role {{db_role_name}};
{% endset %}

{% do run_query(apply_schema_grants_sql) %}
Expand Down
2 changes: 1 addition & 1 deletion transform/macros/create_database.sql
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
identifier="tables") -%}
{% if not database_exists %}
{% set create_db_sql %}
use role transformer_dbt;
use role analyst;
create database {{ target.database }};
grant ownership on database {{ target.database }} to role {{ target.role }};
use role {{ target.role }};
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
version: 2

models:
- name: COUNTRY_POPULATIONS
- name: stg_country_populations
description: 'Raw population information from Github Datasets repository'
columns:
- name: year
description: The year for which the population value is recorded
data_tests:
- not_null
- name: country_name
description: The name of the country
data_tests:
- not_null
- name: value
description: The population value for a particular year and country
- name: country_code
Expand Down
8 changes: 8 additions & 0 deletions transform/models/L1_staging/loans/_loans.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
version: 2

sources:
- name: MAYRAPENA1324
database: RAW
tables:
- name: PERSONAL_LOANS
description: 'A personal loans source table'
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
with raw_source as (

select *
from {{ source('MAYRAPENA1324', 'PERSONAL_LOANS') }}

),

final as (

select
"_AIRBYTE_RAW_ID"::varchar as airbyte_raw_id,
"_AIRBYTE_EXTRACTED_AT"::timestamp_tz as airbyte_extracted_at,
"_AIRBYTE_META"::variant as airbyte_meta,
"TOTAL_ACC"::float as total_acc,
"ANNUAL_INC"::float as annual_inc,
"EMP_LENGTH"::varchar as emp_length,
"DESC"::varchar as desc,
"TOTAL_PYMNT"::float as total_pymnt,
"LAST_PYMNT_D"::varchar as last_pymnt_d,
"ADDR_STATE"::varchar as addr_state,
"NEXT_PYMNT_D"::varchar as next_pymnt_d,
"EMP_TITLE"::varchar as emp_title,
"COLLECTION_RECOVERY_FEE"::float as collection_recovery_fee,
"MTHS_SINCE_LAST_MAJOR_DEROG"::float as mths_since_last_major_derog,
"INQ_LAST_6MTHS"::float as inq_last_6mths,
"SUB_GRADE"::varchar as sub_grade,
"FUNDED_AMNT_INV"::float as funded_amnt_inv,
"DELINQ_2YRS"::float as delinq_2yrs,
"LOAN_ID"::varchar as loan_id,
"FUNDED_AMNT"::float as funded_amnt,
"VERIFICATION_STATUS"::varchar as verification_status,
"DTI"::float as dti,
"TOTAL_REC_PRNCP"::float as total_rec_prncp,
"GRADE"::varchar as grade,
"HOME_OWNERSHIP"::varchar as home_ownership,
"ISSUE_D"::varchar as issue_d,
"MTHS_SINCE_LAST_DELINQ"::float as mths_since_last_delinq,
"OUT_PRNCP"::float as out_prncp,
"PUB_REC"::float as pub_rec,
"INT_RATE"::float as int_rate,
"ZIP_CODE"::varchar as zip_code,
"OPEN_ACC"::float as open_acc,
"TERM"::varchar as term,
"PYMNT_PLAN"::varchar as pymnt_plan,
"URL"::varchar as url,
"REVOL_BAL"::float as revol_bal,
"RECOVERIES"::float as recoveries,
"LAST_PYMNT_AMNT"::float as last_pymnt_amnt,
"LOAN_AMNT"::float as loan_amnt,
"PURPOSE"::varchar as purpose,
"INITIAL_LIST_STATUS"::varchar as initial_list_status,
"TOTAL_REC_INT"::float as total_rec_int,
"TOTAL_PYMNT_INV"::float as total_pymnt_inv,
"MTHS_SINCE_LAST_RECORD"::float as mths_since_last_record,
"LAST_CREDIT_PULL_D"::varchar as last_credit_pull_d,
"TOTAL_REC_LATE_FEE"::float as total_rec_late_fee,
"MEMBER_ID"::float as member_id,
"POLICY_CODE"::float as policy_code,
"TITLE"::varchar as title,
"LOAN_STATUS"::varchar as loan_status,
"INSTALLMENT"::float as installment,
"EARLIEST_CR_LINE"::varchar as earliest_cr_line,
"REVOL_UTIL"::varchar as revol_util,
"OUT_PRNCP_INV"::float as out_prncp_inv,
"COLLECTIONS_12_MTHS_EX_MED"::float as collections_12_mths_ex_med

from raw_source

)

select * from final
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
version: 2

models:
- name: stg_mayrapena1324_personal_loans
description: 'A staging model for personal loans'
columns:
- name: airbyte_raw_id
- name: airbyte_extracted_at
- name: airbyte_meta
- name: total_acc
- name: annual_inc
- name: emp_length
- name: desc
- name: total_pymnt
- name: last_pymnt_d
- name: addr_state
- name: next_pymnt_d
- name: emp_title
- name: collection_recovery_fee
- name: mths_since_last_major_derog
- name: inq_last_6mths
- name: sub_grade
- name: funded_amnt_inv
- name: delinq_2yrs
- name: loan_id
- name: funded_amnt
- name: verification_status
- name: dti
- name: total_rec_prncp
- name: grade
- name: home_ownership
- name: issue_d
- name: mths_since_last_delinq
- name: out_prncp
- name: pub_rec
- name: int_rate
- name: zip_code
- name: open_acc
- name: term
- name: pymnt_plan
- name: url
- name: revol_bal
- name: recoveries
- name: last_pymnt_amnt
- name: loan_amnt
- name: purpose
- name: initial_list_status
- name: total_rec_int
- name: total_pymnt_inv
- name: mths_since_last_record
- name: last_credit_pull_d
- name: total_rec_late_fee
- name: member_id
- name: policy_code
- name: title
- name: loan_status
- name: installment
- name: earliest_cr_line
- name: revol_util
- name: out_prncp_inv
- name: collections_12_mths_ex_med
20 changes: 20 additions & 0 deletions transform/models/L2_core/mayrapena1324_avg_by_grade.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
with raw_source as (

select * from {{ ref('stg_mayrapena1324_personal_loans') }}

),

final as (

select
grade,
avg(loan_amnt) as avg_loan_amount,
count(*) as total_loans
from raw_source
where loan_status = 'Fully Paid'
group by grade
order by grade

)

select * from final
Loading
Loading