From 3188309e3a44f9034411e0d3d7438c9578ff412f Mon Sep 17 00:00:00 2001 From: Elize Papineau Date: Tue, 10 May 2022 22:52:39 -0700 Subject: [PATCH 01/10] Add optional where clause to get_column_values --- macros/sql/get_column_values.sql | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/macros/sql/get_column_values.sql b/macros/sql/get_column_values.sql index f70890e2..5543bb9f 100644 --- a/macros/sql/get_column_values.sql +++ b/macros/sql/get_column_values.sql @@ -1,8 +1,8 @@ -{% macro get_column_values(table, column, order_by='count(*) desc', max_records=none, default=none) -%} - {{ return(adapter.dispatch('get_column_values', 'dbt_utils')(table, column, order_by, max_records, default)) }} +{% macro get_column_values(table, column, where=none, order_by='count(*) desc', max_records=none, default=none) -%} + {{ return(adapter.dispatch('get_column_values', 'dbt_utils')(table, column, where, order_by, max_records, default)) }} {% endmacro %} -{% macro default__get_column_values(table, column, order_by='count(*) desc', max_records=none, default=none) -%} +{% macro default__get_column_values(table, column, where, order_by='count(*) desc', max_records=none, default=none) -%} {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} {%- if not execute -%} {% set default = [] if not default %} @@ -37,6 +37,11 @@ {{ column }} as value from {{ target_relation }} + + {% if where is not none %} + where {{ where }} + {% endif %} + group by {{ column }} order by {{ order_by }} From 42332efc51ab9a4f91c948823e9285d632126eac Mon Sep 17 00:00:00 2001 From: Elize Papineau Date: Tue, 10 May 2022 22:55:29 -0700 Subject: [PATCH 02/10] Add new argument for get_column_values to README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 3ca3bb86..5fbd2e5e 100644 --- a/README.md +++ b/README.md @@ -553,6 +553,7 @@ This macro returns the unique values for a column in a given [relation](https:// **Args:** - `table` (required): a [Relation](https://docs.getdbt.com/reference/dbt-classes#relation) (a `ref` or `source`) that contains the list of columns you wish to select from - `column` (required): The name of the column you wish to find the column values of +- `where` (optiona, default=`none`): A where clause to filter the column values by. - `order_by` (optional, default=`'count(*) desc'`): How the results should be ordered. The default is to order by `count(*) desc`, i.e. decreasing frequency. Setting this as `'my_column'` will sort alphabetically, while `'min(created_at)'` will sort by when thevalue was first observed. - `max_records` (optional, default=`none`): The maximum number of column values you want to return - `default` (optional, default=`[]`): The results this macro should return if the relation has not yet been created (and therefore has no column values). From 96d68bee571f19946b169b85e7f0b69c7d6a0e4a Mon Sep 17 00:00:00 2001 From: Elize Papineau Date: Tue, 10 May 2022 23:00:27 -0700 Subject: [PATCH 03/10] Add an entry to CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8375d2a8..fe644460 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - [@graciegoheen](https://github.com/graciegoheen) (#545) - [@judahrand](https://github.com/judahrand) (#552) - [@clausherther](https://github.com/clausherther) (#555) +- [@epapineau](https://github.com/epapineau) (#582) # dbt-utils v0.8.4 ## Fixes @@ -22,6 +23,7 @@ - A macro for deduplicating data, `deduplicate()` ([#335](https://github.com/dbt-labs/dbt-utils/issues/335), [#512](https://github.com/dbt-labs/dbt-utils/pull/512)) - A cross-database implementation of `listagg()` ([#530](https://github.com/dbt-labs/dbt-utils/pull/530)) - A new macro to get the columns in a relation as a list, `get_filtered_columns_in_relation()`. This is similar to the `star()` macro, but creates a Jinja list instead of a comma-separated string. ([#516](https://github.com/dbt-labs/dbt-utils/pull/516)) +- Add an optional `where` clause parameter to `get_column_values()` to filter vaklues returned ([#511](https://github.com/dbt-labs/dbt-utils/issues/511), [#583](https://github.com/dbt-labs/dbt-utils/pull/583)) ## Fixes - `get_column_values()` once more raises an error when the model doesn't exist and there is no default provided ([#531](https://github.com/dbt-labs/dbt-utils/issues/531), [#533](https://github.com/dbt-labs/dbt-utils/pull/533)) From 0c007010bcdbef5acec5a13a6bebbb7b2cce5e49 Mon Sep 17 00:00:00 2001 From: Elize Papineau Date: Wed, 11 May 2022 14:05:30 -0700 Subject: [PATCH 04/10] Reorder args --- macros/sql/get_column_values.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/macros/sql/get_column_values.sql b/macros/sql/get_column_values.sql index 5543bb9f..d1dcf5d4 100644 --- a/macros/sql/get_column_values.sql +++ b/macros/sql/get_column_values.sql @@ -1,8 +1,8 @@ -{% macro get_column_values(table, column, where=none, order_by='count(*) desc', max_records=none, default=none) -%} - {{ return(adapter.dispatch('get_column_values', 'dbt_utils')(table, column, where, order_by, max_records, default)) }} +{% macro get_column_values(table, column, order_by='count(*) desc', max_records=none, default=none, where=none) -%} + {{ return(adapter.dispatch('get_column_values', 'dbt_utils')(table, column, order_by, max_records, default, where)) }} {% endmacro %} -{% macro default__get_column_values(table, column, where, order_by='count(*) desc', max_records=none, default=none) -%} +{% macro default__get_column_values(table, column, order_by='count(*) desc', max_records=none, default=none, where=none) -%} {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} {%- if not execute -%} {% set default = [] if not default %} From 7fbb75210972e4b85c4dbfdc0708fc0775058351 Mon Sep 17 00:00:00 2001 From: Elize Papineau Date: Wed, 11 May 2022 15:10:38 -0700 Subject: [PATCH 05/10] Fix typo0 --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe644460..b6fd7194 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,7 +23,7 @@ - A macro for deduplicating data, `deduplicate()` ([#335](https://github.com/dbt-labs/dbt-utils/issues/335), [#512](https://github.com/dbt-labs/dbt-utils/pull/512)) - A cross-database implementation of `listagg()` ([#530](https://github.com/dbt-labs/dbt-utils/pull/530)) - A new macro to get the columns in a relation as a list, `get_filtered_columns_in_relation()`. This is similar to the `star()` macro, but creates a Jinja list instead of a comma-separated string. ([#516](https://github.com/dbt-labs/dbt-utils/pull/516)) -- Add an optional `where` clause parameter to `get_column_values()` to filter vaklues returned ([#511](https://github.com/dbt-labs/dbt-utils/issues/511), [#583](https://github.com/dbt-labs/dbt-utils/pull/583)) +- Add an optional `where` clause parameter to `get_column_values()` to filter values returned ([#511](https://github.com/dbt-labs/dbt-utils/issues/511), [#583](https://github.com/dbt-labs/dbt-utils/pull/583)) ## Fixes - `get_column_values()` once more raises an error when the model doesn't exist and there is no default provided ([#531](https://github.com/dbt-labs/dbt-utils/issues/531), [#533](https://github.com/dbt-labs/dbt-utils/pull/533)) From 51070c10f2c7f3b4d6b9b4207612ec2df55e6daf Mon Sep 17 00:00:00 2001 From: Elize Papineau Date: Wed, 11 May 2022 15:11:28 -0700 Subject: [PATCH 06/10] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5fbd2e5e..ac238b5f 100644 --- a/README.md +++ b/README.md @@ -553,7 +553,7 @@ This macro returns the unique values for a column in a given [relation](https:// **Args:** - `table` (required): a [Relation](https://docs.getdbt.com/reference/dbt-classes#relation) (a `ref` or `source`) that contains the list of columns you wish to select from - `column` (required): The name of the column you wish to find the column values of -- `where` (optiona, default=`none`): A where clause to filter the column values by. +- `where` (optional, default=`none`): A where clause to filter the column values by. - `order_by` (optional, default=`'count(*) desc'`): How the results should be ordered. The default is to order by `count(*) desc`, i.e. decreasing frequency. Setting this as `'my_column'` will sort alphabetically, while `'min(created_at)'` will sort by when thevalue was first observed. - `max_records` (optional, default=`none`): The maximum number of column values you want to return - `default` (optional, default=`[]`): The results this macro should return if the relation has not yet been created (and therefore has no column values). From 224cbdb751f958da179e5c9ce1cba7f09ddba2be Mon Sep 17 00:00:00 2001 From: Elize Papineau Date: Wed, 11 May 2022 16:20:07 -0700 Subject: [PATCH 07/10] Add integration test for get_column_values where argument --- .../data/sql/data_get_column_values_where.csv | 12 +++++++ integration_tests/models/sql/schema.yml | 27 ++++++++++++++++ .../sql/test_get_column_values_where.sql | 31 +++++++++++++++++++ 3 files changed, 70 insertions(+) create mode 100644 integration_tests/data/sql/data_get_column_values_where.csv create mode 100644 integration_tests/models/sql/test_get_column_values_where.sql diff --git a/integration_tests/data/sql/data_get_column_values_where.csv b/integration_tests/data/sql/data_get_column_values_where.csv new file mode 100644 index 00000000..0295f559 --- /dev/null +++ b/integration_tests/data/sql/data_get_column_values_where.csv @@ -0,0 +1,12 @@ +field,condition +a,left +b,right +c,left +d,right +e,left +f,right +g,left +g,right +g,left +g,right +g,left \ No newline at end of file diff --git a/integration_tests/models/sql/schema.yml b/integration_tests/models/sql/schema.yml index c26e5db7..26d5538d 100644 --- a/integration_tests/models/sql/schema.yml +++ b/integration_tests/models/sql/schema.yml @@ -50,6 +50,33 @@ models: values: - '5' + - name: test_get_column_values_where + columns: + - name: count_a + tests: + - accepted_values: + values: + - '1' + + - name: count_c + tests: + - accepted_values: + values: + - '1' + + - name: count_e + tests: + - accepted_values: + values: + - '1' + + - name: count_g + tests: + - accepted_values: + values: + - '3' + # I'd like to add a column test here also? (https://github.com/calogica/dbt-expectations#expect_table_columns_to_not_contain_set) + - name: test_get_filtered_columns_in_relation tests: - dbt_utils.equality: diff --git a/integration_tests/models/sql/test_get_column_values_where.sql b/integration_tests/models/sql/test_get_column_values_where.sql new file mode 100644 index 00000000..8bdbce42 --- /dev/null +++ b/integration_tests/models/sql/test_get_column_values_where.sql @@ -0,0 +1,31 @@ + +{% set column_values = dbt_utils.get_column_values(ref('data_get_column_values_where'), 'field', default=[], order_by="field", where="condition='left'") %} + + +{% if target.type == 'snowflake' %} + +select + {% for val in column_values -%} + + sum(case when field = '{{ val }}' then 1 else 0 end) as count_{{ val }} + {%- if not loop.last %},{% endif -%} + + {%- endfor %} + +from {{ ref('data_get_column_values_where') }} +where condition = 'left' + +{% else %} + +select + {% for val in column_values -%} + + {{dbt_utils.safe_cast("sum(case when field = '" ~ val ~ "' then 1 else 0 end)", dbt_utils.type_string()) }} as count_{{ val }} + {%- if not loop.last %},{% endif -%} + + {%- endfor %} + +from {{ ref('data_get_column_values_where') }} +where condition = 'left' + +{% endif %} From 619dc4a69274234742961b08f7ea43b10b0dd367 Mon Sep 17 00:00:00 2001 From: Elize Papineau Date: Wed, 11 May 2022 17:08:03 -0700 Subject: [PATCH 08/10] Update where arg test to use data_get_column_values_where_expected --- .../data_get_column_values_where_expected.csv | 5 +++ integration_tests/models/sql/schema.yml | 28 ++------------- .../sql/test_get_column_values_where.sql | 35 +++---------------- 3 files changed, 13 insertions(+), 55 deletions(-) create mode 100644 integration_tests/data/sql/data_get_column_values_where_expected.csv diff --git a/integration_tests/data/sql/data_get_column_values_where_expected.csv b/integration_tests/data/sql/data_get_column_values_where_expected.csv new file mode 100644 index 00000000..e821706a --- /dev/null +++ b/integration_tests/data/sql/data_get_column_values_where_expected.csv @@ -0,0 +1,5 @@ +field +a +c +e +g \ No newline at end of file diff --git a/integration_tests/models/sql/schema.yml b/integration_tests/models/sql/schema.yml index 26d5538d..83733596 100644 --- a/integration_tests/models/sql/schema.yml +++ b/integration_tests/models/sql/schema.yml @@ -51,31 +51,9 @@ models: - '5' - name: test_get_column_values_where - columns: - - name: count_a - tests: - - accepted_values: - values: - - '1' - - - name: count_c - tests: - - accepted_values: - values: - - '1' - - - name: count_e - tests: - - accepted_values: - values: - - '1' - - - name: count_g - tests: - - accepted_values: - values: - - '3' - # I'd like to add a column test here also? (https://github.com/calogica/dbt-expectations#expect_table_columns_to_not_contain_set) + tests: + - dbt_utils.equality: + compare_model: ref('data_get_column_values_where_expected') - name: test_get_filtered_columns_in_relation tests: diff --git a/integration_tests/models/sql/test_get_column_values_where.sql b/integration_tests/models/sql/test_get_column_values_where.sql index 8bdbce42..a85a23aa 100644 --- a/integration_tests/models/sql/test_get_column_values_where.sql +++ b/integration_tests/models/sql/test_get_column_values_where.sql @@ -1,31 +1,6 @@ +{% set column_values = dbt_utils.get_column_values(ref('data_get_column_values_where'), 'field', where="condition = 'left'") %} -{% set column_values = dbt_utils.get_column_values(ref('data_get_column_values_where'), 'field', default=[], order_by="field", where="condition='left'") %} - - -{% if target.type == 'snowflake' %} - -select - {% for val in column_values -%} - - sum(case when field = '{{ val }}' then 1 else 0 end) as count_{{ val }} - {%- if not loop.last %},{% endif -%} - - {%- endfor %} - -from {{ ref('data_get_column_values_where') }} -where condition = 'left' - -{% else %} - -select - {% for val in column_values -%} - - {{dbt_utils.safe_cast("sum(case when field = '" ~ val ~ "' then 1 else 0 end)", dbt_utils.type_string()) }} as count_{{ val }} - {%- if not loop.last %},{% endif -%} - - {%- endfor %} - -from {{ ref('data_get_column_values_where') }} -where condition = 'left' - -{% endif %} +-- Create a relation using the values +{% for val in column_values -%} +select {{ dbt_utils.string_literal(val) }} as field {% if not loop.last %}union all{% endif %} +{% endfor %} \ No newline at end of file From 82a10d151d72ab1f6fa58aefea5e63f23c32a39c Mon Sep 17 00:00:00 2001 From: Elize Papineau Date: Thu, 12 May 2022 10:12:56 -0700 Subject: [PATCH 09/10] Update CHANGELOG.md Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b6fd7194..79641dda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ - [@graciegoheen](https://github.com/graciegoheen) (#545) - [@judahrand](https://github.com/judahrand) (#552) - [@clausherther](https://github.com/clausherther) (#555) -- [@epapineau](https://github.com/epapineau) (#582) +- [@epapineau](https://github.com/epapineau) (#583) # dbt-utils v0.8.4 ## Fixes From f48f17acf621bb94748778e779e50eb7db13fb09 Mon Sep 17 00:00:00 2001 From: Elize Papineau Date: Thu, 12 May 2022 10:16:30 -0700 Subject: [PATCH 10/10] Reorder changelog entry --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 79641dda..44549573 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,6 @@ # Unreleased +## New features +- Add an optional `where` clause parameter to `get_column_values()` to filter values returned ([#511](https://github.com/dbt-labs/dbt-utils/issues/511), [#583](https://github.com/dbt-labs/dbt-utils/pull/583)) ## Quality of life - Documentation about listagg macro ([#544](https://github.com/dbt-labs/dbt-utils/issues/544), [#560](https://github.com/dbt-labs/dbt-utils/pull/560)) @@ -23,7 +25,6 @@ - A macro for deduplicating data, `deduplicate()` ([#335](https://github.com/dbt-labs/dbt-utils/issues/335), [#512](https://github.com/dbt-labs/dbt-utils/pull/512)) - A cross-database implementation of `listagg()` ([#530](https://github.com/dbt-labs/dbt-utils/pull/530)) - A new macro to get the columns in a relation as a list, `get_filtered_columns_in_relation()`. This is similar to the `star()` macro, but creates a Jinja list instead of a comma-separated string. ([#516](https://github.com/dbt-labs/dbt-utils/pull/516)) -- Add an optional `where` clause parameter to `get_column_values()` to filter values returned ([#511](https://github.com/dbt-labs/dbt-utils/issues/511), [#583](https://github.com/dbt-labs/dbt-utils/pull/583)) ## Fixes - `get_column_values()` once more raises an error when the model doesn't exist and there is no default provided ([#531](https://github.com/dbt-labs/dbt-utils/issues/531), [#533](https://github.com/dbt-labs/dbt-utils/pull/533))