Skip to content

Commit

Permalink
Merge pull request #29 from kgmcquate/develop
Browse files Browse the repository at this point in the history
Updated docs and fixed dbt config merge
  • Loading branch information
kgmcquate authored Jan 6, 2024
2 parents 9cb47ad + 0781bb2 commit 553aef9
Show file tree
Hide file tree
Showing 5 changed files with 262 additions and 28 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/dbt_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ name: Test DBT package
on:
push:
branches: [ "main", "develop" ]
paths:
- macros/
- dbt_project.yml
paths-ignore:
- 'integration_tests/docs/**'

Expand Down
78 changes: 69 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ The following databases are supported:
Integration tests are run for each of these databases in [Actions](https://github.com/kgmcquate/dbt-testgen/actions).
# Test types
dbt-testgen can generate these types of tests:
- [uniqueness](#uniqueness)
- [not_null](#not-null)
- [string length](#string-length)
- [range](#range)
- [accepted_values](#accepted-values)
- [recency](#recency)
dbt-testgen can generate these types of tests, using [built-in tests](https://docs.getdbt.com/reference/resource-properties/data-tests), [dbt_utils](https://github.com/dbt-labs/dbt-utils), and [dbt-expectations](https://github.com/calogica/dbt-expectations/):
- [uniqueness](https://github.com/dbt-labs/dbt-utils?tab=readme-ov-file#unique_combination_of_columns-source)
- [not_null](https://docs.getdbt.com/reference/resource-properties/data-tests#not_null)
- [string length](https://github.com/calogica/dbt-expectations/tree/main?tab=readme-ov-file#expect_column_value_lengths_to_be_between)
- [range](https://github.com/dbt-labs/dbt-utils?tab=readme-ov-file#accepted_range-source)
- [accepted_values](https://docs.getdbt.com/reference/resource-properties/data-tests#accepted_values)
- [recency](https://github.com/dbt-labs/dbt-utils?tab=readme-ov-file#recency-source)
# Macros
Expand Down Expand Up @@ -99,9 +99,51 @@ models:
```

<hr>
<br>

You can output to a file like this:
```yaml
dbt compile -q --inline "{{ testgen.get_test_suggestions(ref('mymodel')) }}" >> models/schema.yml
```

<hr>
<br>

You can also merge with an existing properties YAML file:
```bash
EXISTING_YAML_BODY=`cat models/schema.yml`
dbt compile -q --inline "{{ testgen.get_test_suggestions(ref('users'), dbt_config=fromyaml(\"${EXISTING_YAML_BODY}\")) }}"
```

<hr>
<br>

Here's an example of more advanced usage:
```bash
EXISTING_YAML_BODY=$(cat <<EOF
models:
- name: stg_jaffle_shop__customers
config:
tags:
- pii
columns:
- name: customer_id
tests:
- unique
- not_null
seeds:
- name: stg_jaffle_shop__orders
config:
materialized: view
columns:
- name: order_id
tests:
- unique
- not_null
EOF
)

JINJA_TEMPLATE=$(cat <<EOF
{{ testgen.get_test_suggestions(
ref('users'),
Expand All @@ -113,23 +155,41 @@ JINJA_TEMPLATE=$(cat <<EOF
tests = ['uniqueness', 'accepted_values', 'range'],
uniqueness_composite_key_length = 2,
accepted_values_max_cardinality = 10,
range_stddevs = 1
range_stddevs = 1,
dbt_config=fromyaml("${EXISTING_YAML_BODY}")
) }}
EOF
)

dbt compile -q --inline "${JINJA_TEMPLATE}"
```
Output:
```yaml
models:
- name: stg_jaffle_shop__customers
config:
tags:
- pii
columns:
- name: customer_id
tests:
- unique
- not_null
seeds:
- name: stg_jaffle_shop__orders
columns:
- name: order_id
tests:
- unique
- not_null
- name: users
columns:
- name: user_id
tests:
- unique
- not_null
- dbt_utils.accepted_range:
min_value: -3.4017042154147514
min_value: -3.4017042154147523
max_value: 34.40170421541475
quote: true
tags:
Expand Down
146 changes: 146 additions & 0 deletions integration_tests/tests/merge_dbt_configs/merge_dbt_configs_3.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
{% set input_yaml_1 %}
version: 2

sources:
- name: raw_jaffle_shop
description: A replica of the postgres database used to power the jaffle_shop app.
tables:
- name: customers
columns:
- name: id
description: Primary key of the table
tests:
- unique
- not_null

- name: orders
columns:
- name: id
description: Primary key of the table
tests:
- unique
- not_null

- name: user_id
description: Foreign key to customers

- name: status
tests:
- accepted_values:
values: ['placed', 'shipped', 'completed', 'return_pending', 'returned']


models:
- name: stg_jaffle_shop__customers
config:
tags: ['pii']
columns:
- name: customer_id
tests:
- unique
- not_null

- name: stg_jaffle_shop__orders
config:
materialized: view
columns:
- name: order_id
tests:
- unique
- not_null
- name: status
tests:
- accepted_values:
values: ['placed', 'shipped', 'completed', 'return_pending', 'returned']
config:
severity: warn

{% endset %}

{% set input_yaml_2 %}
models:
- name: users
tests: []
columns:
- name: user_id
tests:
- accepted_values:
values:
- active
- inactive
{% endset %}

{% set actual_yaml = toyaml(
testgen.merge_dbt_configs(
fromyaml(input_yaml_1),
fromyaml(input_yaml_2)
)
)
%}


{% set expected_yaml %}
sources:
- name: raw_jaffle_shop
description: A replica of the postgres database used to power the jaffle_shop app.
tables:
- name: customers
columns:
- name: id
description: Primary key of the table
tests:
- unique
- not_null
- name: orders
columns:
- name: id
description: Primary key of the table
tests:
- unique
- not_null
- name: user_id
description: Foreign key to customers
- name: status
tests:
- accepted_values:
values:
- placed
- shipped
- completed
- return_pending
- returned
models:
- name: stg_jaffle_shop__customers
columns:
- name: customer_id
tests:
- unique
- not_null
- name: stg_jaffle_shop__orders
columns:
- name: order_id
tests:
- unique
- not_null
- name: status
tests:
- accepted_values:
values:
- placed
- shipped
- completed
- return_pending
- returned
config:
severity: warn
- name: users
columns:
- name: user_id
tests:
- accepted_values:
values:
- active
- inactive
{% endset %}

{{ assert_equal (actual_yaml | trim, expected_yaml | trim) }}
61 changes: 42 additions & 19 deletions macros/helpers/merge_dbt_configs.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@

{# Want to preserve ordering, so don't use sets #}
{% set resource_types = [] %}
{# {{ print(dbt_config_1) }} #}
{% for resource_type in dbt_config_1.keys()|list + dbt_config_2.keys()|list %}
{% if resource_type not in resource_types %}

{% if resource_type not in resource_types and resource_type in ["models", "sources", "seeds"] %}
{# {{ print(resource_type) }} #}
{% do resource_types.append(resource_type) %}
{% endif %}
{% endfor %}
Expand All @@ -24,6 +27,7 @@

{% if resource_type not in dbt_config_1.keys() %}
{% do new_config.update({resource_type: dbt_config_2[resource_type]}) %}

{% elif resource_type not in dbt_config_2.keys() %}
{% do new_config.update({resource_type: dbt_config_1[resource_type]}) %}
{% else %}
Expand All @@ -43,18 +47,29 @@
{% do config_2_model_lookup.update({model["name"]: model}) %}
{% endfor %}

{# {{ print(model_names) }} #}

{# {{ print(config_1_model_lookup) }} #}

{# {{ print(config_2_model_lookup) }} #}

{% set new_models = [] %}
{% for model_name in model_names %}
{% set model_tests = [] %}
{% if "tests" in config_1_model_lookup[model_name].keys() %}
{% for model_test in config_1_model_lookup[model_name]["tests"] %}
{% do model_tests.append(model_test) %}
{% endfor %}
{% if model_name in config_1_model_lookup.keys() %}
{% if "tests" in config_1_model_lookup[model_name].keys() %}
{% for model_test in config_1_model_lookup[model_name]["tests"] %}
{% do model_tests.append(model_test) %}
{% endfor %}
{% endif %}
{% endif %}
{% if "tests" in config_2_model_lookup[model_name].keys() %}
{% for model_test in config_2_model_lookup[model_name]["tests"] %}
{% do model_tests.append(model_test) %}
{% endfor %}

{% if model_name in config_2_model_lookup.keys() %}
{% if "tests" in config_2_model_lookup[model_name].keys() %}
{% for model_test in config_2_model_lookup[model_name]["tests"] %}
{% do model_tests.append(model_test) %}
{% endfor %}
{% endif %}
{% endif %}

{% set model = {"name": model_name} %}
Expand All @@ -66,18 +81,22 @@
{% set col_names = [] %}

{% set config_1_col_lookup = {} %}
{% for col in config_1_model_lookup[model_name]["columns"] %}
{% do col_names.append(col["name"]) %}
{% do config_1_col_lookup.update({col["name"]: col}) %}
{% endfor %}
{% if model_name in config_1_model_lookup.keys() %}
{% for col in config_1_model_lookup[model_name]["columns"] %}
{% do col_names.append(col["name"]) %}
{% do config_1_col_lookup.update({col["name"]: col}) %}
{% endfor %}
{% endif %}

{% set config_2_col_lookup = {} %}
{% for col in config_2_model_lookup[model_name]["columns"] %}
{% if col["name"] not in col_names %}
{% do col_names.append(col["name"]) %}
{% endif %}
{% do config_2_col_lookup.update({col["name"]: col}) %}
{% endfor %}
{% if model_name in config_2_model_lookup.keys() %}
{% for col in config_2_model_lookup[model_name]["columns"] %}
{% if col["name"] not in col_names %}
{% do col_names.append(col["name"]) %}
{% endif %}
{% do config_2_col_lookup.update({col["name"]: col}) %}
{% endfor %}
{% endif %}

{% set new_columns = [] %}
{% for col_name in col_names %}
Expand Down Expand Up @@ -115,6 +134,10 @@
{% endif %}
{% endfor %}

{# {{ print(new_config) }} #}



{{ return(new_config) }}
{% endmacro %}

2 changes: 2 additions & 0 deletions macros/test_generation/get_uniqueness_test_suggestions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@

{% set new_dbt_config = {resource_type: [model]} %}

{# {{ print(dbt_config) }} #}

{% set merged_dbt_config = testgen.merge_dbt_configs(dbt_config, new_dbt_config) %}

{% do return(merged_dbt_config) %}
Expand Down

0 comments on commit 553aef9

Please sign in to comment.