diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1e690c5..9c0e45d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -33,9 +33,12 @@ jobs: dbname: materialize schema: public EOF - - name: Test + - name: Test dbt_utils run: make test-materialize working-directory: integration_tests/dbt_utils + - name: Test dbt_audit_helper + run: make test-materialize + working-directory: integration_tests/dbt_audit_helper - name: Release uses: softprops/action-gh-release@v1 if: startsWith(github.ref, 'refs/tags/') diff --git a/.gitmodules b/.gitmodules index e342c16..02d5aa6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "dbt-utils"] path = dbt-utils url = https://github.com/dbt-labs/dbt-utils +[submodule "dbt-audit-helper"] + path = dbt-audit-helper + url = https://github.com/dbt-labs/dbt-audit-helper diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ebb9cc..d942a2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ # materialize-dbt-utils Changelog -## 0.3.0 - 2021-03-03 +## 0.4.0 - 2022-05-05 + +* Shim `dbt-audit-helper` ([v0.5.0](https://github.com/dbt-labs/dbt-audit-helper/releases/tag/0.5.0)). + +## 0.3.0 - 2022-03-03 * Bump the minimum supported version of dbt-materialize to v1.0.3. @@ -10,7 +14,7 @@ * Correctly mark the `insert_by_period` macro as unsupported. -## 0.2.0 - 2021-02-14 +## 0.2.0 - 2022-02-14 * Fix the `last_day` macro. diff --git a/MAINTAINER.md b/MAINTAINER.md index 376a5b0..eccc260 100644 --- a/MAINTAINER.md +++ b/MAINTAINER.md @@ -63,6 +63,52 @@ git commit -am 'Update dbt-utils' 11. Verify that CI passes on the tag and publishes a GitHub release. +## Testing + +To run the suite of integration tests: + +1. Add the following profile to your list of dbt profiles (run `dbt debug` to locate + your local profiles): + + ```yaml + integration_tests: + outputs: + materialize: + type: materialize + threads: 1 + host: localhost + port: 6875 + user: materialize + password: password + dbname: materialize + schema: public + target: materialize + ``` + +1. Clone this repository and navigate to the `integration_tests` subdirectory: + + ```nofmt + git clone https://github.com/MaterializeInc/materialize-dbt-utils.git + + cd integration_tests + ``` + +1. To run the tests for `dbt-utils`: + + ```nofmt + cd dbt_utils + + make test-materialize + ``` + +1. To run the tests for `dbt-audit-helper`: + + ```noftm + cd dbt_audit_helper + + make test-materialize + ``` + [.github/workflows/main.yml]: .github/workflows/main.yml [dbt_project.yml]: dbt_project.yml [CHANGELOG.md]: CHANGELOG.md diff --git a/README.md b/README.md index 35f77a4..775ba38 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ This [dbt](https://github.com/dtb-labs/dbt) package provides shims for using the following packages with [Materialize]: - [dbt-utils](https://github.com/dbt-labs/dbt-utils) +- [dbt-audit-helper](https://github.com/dbt-labs/dbt-audit-helper) ## Getting started with dbt + Materialize @@ -15,7 +16,7 @@ following packages with [Materialize]: ## Installation Requirements: -- [dbt-materialize](https://pypi.org/project/dbt-materialize/) v1.0.3 +- [dbt-materialize](https://pypi.org/project/dbt-materialize/) v1.0.3+ - [Materialize](https://materialize.com/docs/install/) v0.22.0+ Install this package by adding the following to the `packages.yml` file in your @@ -26,15 +27,17 @@ packages: - package: dbt-labs/dbt_utils version: 0.8.0 - package: MaterializeInc/materialize_dbt_utils - version: 0.3.0 + version: 0.4.0 ``` -Then set a `dispatch` config in your `dbt_project.yml: +Then set a `dispatch` config in your `dbt_project.yml`: ```yml dispatch: - macro_namespace: dbt_utils search_order: [materialize_dbt_utils, dbt_utils] + - macro_namespace: audit_helper + search_order: [materialize_dbt_utils, audit_helper] ``` With this project setting, dbt will first search for macro implementations in @@ -131,9 +134,22 @@ Name | Supported? | Notes ---------------------|--------------------|------ [`insert_by_period`] | :x: | Materialize natively provides incremental view maintenance. +### [`dbt-audit-helper` macros] + +Name | Supported? | Notes +------------------------------|--------------------|------ +[`compare_relations`] | :white_check_mark: | +[`compare_queries`] | :white_check_mark: | +[`compare_column_values`] | :white_check_mark: | +[`compare_relation_columns`] | :white_check_mark: | + [`accepted_range`]: https://github.com/dbt-labs/dbt-utils#accepted_range-source [`at_least_one`]: https://github.com/dbt-labs/dbt-utils#at_least_one-source [`cardinality_equality`]: https://github.com/dbt-labs/dbt-utils#cardinality_equality-source +[`compare_column_values`]: https://github.com/dbt-labs/dbt-audit-helper#compare_column_values-source +[`compare_queries`]: https://github.com/dbt-labs/dbt-audit-helper#compare_queries-source +[`compare_relation_columns`]: https://github.com/dbt-labs/dbt-audit-helper#compare_relation_columns-source +[`compare_relations`]: https://github.com/dbt-labs/dbt-audit-helper#compare_relations-source [`current_timestamp`]: https://github.com/dbt-labs/dbt-utils#current_timestamp-source [`date_spine`]: https://github.com/dbt-labs/dbt-utils#date_spine-source [`date_trunc`]: https://github.com/dbt-labs/dbt-utils#date_trunc-source diff --git a/dbt-audit-helper b/dbt-audit-helper new file mode 160000 index 0000000..d76a87f --- /dev/null +++ b/dbt-audit-helper @@ -0,0 +1 @@ +Subproject commit d76a87fde6a9786e04ffa2b9c4afd18cc9ea22fe diff --git a/integration_tests/dbt_audit_helper/Makefile b/integration_tests/dbt_audit_helper/Makefile new file mode 100644 index 0000000..1198b46 --- /dev/null +++ b/integration_tests/dbt_audit_helper/Makefile @@ -0,0 +1,7 @@ +TARGET ?= materialize + +test-materialize: + dbt deps + dbt seed --target $(TARGET) + dbt run --target $(TARGET) + dbt test --target $(TARGET) diff --git a/integration_tests/dbt_audit_helper/dbt_project.yml b/integration_tests/dbt_audit_helper/dbt_project.yml new file mode 100644 index 0000000..ae0d189 --- /dev/null +++ b/integration_tests/dbt_audit_helper/dbt_project.yml @@ -0,0 +1,9 @@ +name: materialize_dbt_utils_dbt_audit_helper_integration_tests +version: '1.0' +config-version: 2 + +profile: integration_tests + +dispatch: + - macro_namespace: audit_helper + search_order: [materialize_dbt_utils, audit_helper] diff --git a/integration_tests/dbt_audit_helper/packages.yml b/integration_tests/dbt_audit_helper/packages.yml new file mode 100644 index 0000000..cebb8fe --- /dev/null +++ b/integration_tests/dbt_audit_helper/packages.yml @@ -0,0 +1,4 @@ +packages: + - local: ../../ + - local: ../../dbt-audit-helper + - local: ../../dbt-audit-helper/integration_tests diff --git a/macros/dbt_audit_helper/compare_column_values.sql b/macros/dbt_audit_helper/compare_column_values.sql new file mode 100644 index 0000000..e3af0bd --- /dev/null +++ b/macros/dbt_audit_helper/compare_column_values.sql @@ -0,0 +1,63 @@ +{% macro materialize__compare_column_values(a_query, b_query, primary_key, column_to_compare) -%} +with a_query as ( + {{ a_query }} +), + +b_query as ( + {{ b_query }} +), + +joined as ( + select + coalesce(a_query.{{ primary_key }}, b_query.{{ primary_key }}) as {{ primary_key }}, + a_query.{{ column_to_compare }} as a_query_value, + b_query.{{ column_to_compare }} as b_query_value, + case + when a_query.{{ column_to_compare }} = b_query.{{ column_to_compare }} then '✅: perfect match' + when a_query.{{ column_to_compare }} is null and b_query.{{ column_to_compare }} is null then '✅: both are null' + when a_query.{{ primary_key }} is null then '🤷: ‍missing from a' + when b_query.{{ primary_key }} is null then '🤷: missing from b' + when a_query.{{ column_to_compare }} is null then '🤷: value is null in a only' + when b_query.{{ column_to_compare }} is null then '🤷: value is null in b only' + when a_query.{{ column_to_compare }} != b_query.{{ column_to_compare }} then '🙅: ‍values do not match' + else 'unknown' -- this should never happen + end as match_status, + case + when a_query.{{ column_to_compare }} = b_query.{{ column_to_compare }} then 0 + when a_query.{{ column_to_compare }} is null and b_query.{{ column_to_compare }} is null then 1 + when a_query.{{ primary_key }} is null then 2 + when b_query.{{ primary_key }} is null then 3 + when a_query.{{ column_to_compare }} is null then 4 + when b_query.{{ column_to_compare }} is null then 5 + when a_query.{{ column_to_compare }} != b_query.{{ column_to_compare }} then 6 + else 7 -- this should never happen + end as match_order + + from a_query + + full outer join b_query on a_query.{{ primary_key }} = b_query.{{ primary_key }} +), + +aggregated as ( + select + match_status, + match_order, + count(*) as count_records + from joined + + group by match_status, match_order +) + +select + match_status, + count_records, + -- TODO(morsapaes): Materialize doesn't support window functions yet, + -- so adding a ugly hack. Once we do, revert to the original: + -- round(100.0 * count_records / sum(count_records) over (), 2) as percent_of_total + round(100.0 * count_records / (select sum(count_records) from aggregated), 2) as percent_of_total + +from aggregated + +order by match_order + +{% endmacro %} diff --git a/macros/dbt_audit_helper/compare_queries.sql b/macros/dbt_audit_helper/compare_queries.sql new file mode 100644 index 0000000..421ddd1 --- /dev/null +++ b/macros/dbt_audit_helper/compare_queries.sql @@ -0,0 +1,88 @@ +{% macro materialize__compare_queries(a_query, b_query, primary_key=None) %} + +with a as ( + + {{ a_query }} + +), + +b as ( + + {{ b_query }} + +), + +a_intersect_b as ( + + select * from a + {{ dbt_utils.intersect() }} + select * from b + +), + +a_except_b as ( + + select * from a + {{ dbt_utils.except() }} + select * from b + +), + +b_except_a as ( + + select * from b + {{ dbt_utils.except() }} + select * from a + +), + +all_records as ( + + select + *, + true as in_a, + true as in_b + from a_intersect_b + + union all + + select + *, + true as in_a, + false as in_b + from a_except_b + + union all + + select + *, + false as in_a, + true as in_b + from b_except_a + +), + +summary_stats as ( + select + in_a, + in_b, + count(*) as count + from all_records + + group by 1, 2 +) +-- select * from all_records +-- where not (in_a and in_b) +-- order by {{ primary_key ~ ", " if primary_key is not none }} in_a desc, in_b desc + +select + *, + -- TODO(morsapaes): Materialize doesn't support window functions yet, + -- so adding a ugly hack. Once we do, revert to the original: + -- round(100.0 * count / sum(count) over (), 2) as percent_of_total + round(100.0 * count / (select sum(count) from summary_stats), 2) as percent_of_total + +from summary_stats +order by in_a desc, in_b desc + +{% endmacro %} diff --git a/macros/dbt_audit_helper/compare_relation_columns.sql b/macros/dbt_audit_helper/compare_relation_columns.sql new file mode 100644 index 0000000..0e4bcae --- /dev/null +++ b/macros/dbt_audit_helper/compare_relation_columns.sql @@ -0,0 +1,45 @@ +{% macro materialize__compare_relation_columns(a_relation, b_relation) %} + +with a_cols as ( + {{ audit_helper.get_columns_in_relation_sql(a_relation) }} +), + +b_cols as ( + {{ audit_helper.get_columns_in_relation_sql(b_relation) }} +) + +select + column_name, + a_cols.ordinal_position as a_ordinal_position, + b_cols.ordinal_position as b_ordinal_position, + a_cols.data_type as a_data_type, + b_cols.data_type as b_data_type, + coalesce(a_cols.ordinal_position = b_cols.ordinal_position, false) as has_ordinal_position_match, + coalesce(a_cols.data_type = b_cols.data_type, false) as has_data_type_match +from a_cols +full outer join b_cols using (column_name) +order by coalesce(a_cols.ordinal_position, b_cols.ordinal_position) + +{% endmacro %} + + +{% macro materialize__get_columns_in_relation_sql(relation) %} +{#- +From: https://github.com/dbt-labs/dbt/blob/23484b18b71010f701b5312f920f04529ceaa6b2/plugins/postgres/dbt/include/postgres/macros/adapters.sql#L32 +Edited to include ordinal_position +-#} + select + ordinal_position, + column_name, + data_type, + character_maximum_length, + numeric_precision, + numeric_scale + + from {{ relation.information_schema('columns') }} + where table_name = '{{ relation.identifier }}' + {% if relation.schema %} + and table_schema = '{{ relation.schema }}' + {% endif %} + order by ordinal_position +{% endmacro %} diff --git a/macros/dbt_audit_helper/compare_relations.sql b/macros/dbt_audit_helper/compare_relations.sql new file mode 100644 index 0000000..ff4e189 --- /dev/null +++ b/macros/dbt_audit_helper/compare_relations.sql @@ -0,0 +1,25 @@ +{% macro materialize__compare_relations(a_relation, b_relation, exclude_columns=[], primary_key=None) %} + +{%- set a_columns = adapter.get_columns_in_relation(a_relation) -%} + +{% set check_columns=audit_helper.pop_columns(a_columns, exclude_columns) %} + +{% set check_cols_csv = check_columns | map(attribute='quoted') | join(', ') %} + +{% set a_query %} +select + {{ check_cols_csv }} + +from {{ a_relation }} +{% endset %} + +{% set b_query %} +select + {{ check_cols_csv }} + +from {{ b_relation }} +{% endset %} + +{{ dbt_materialize_utils.materialize__compare_queries(a_query, b_query, primary_key) }} + +{% endmacro %}