Skip to content

Commit

Permalink
Merge pull request #7 from kgmcquate/main
Browse files Browse the repository at this point in the history
Added Redshift and Snowflake support
  • Loading branch information
kgmcquate authored Jan 3, 2024
2 parents f8eea52 + c082a4c commit 04f33b9
Show file tree
Hide file tree
Showing 13 changed files with 152 additions and 43 deletions.
85 changes: 73 additions & 12 deletions .github/workflows/dbt_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ jobs:
matrix:
python-version: ["3.9", "3.10", "3.11"]

env:
DBT_TARGET: duckdb

steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
Expand All @@ -27,19 +30,19 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: pip install
run: pip install dbt-core duckdb dbt-duckdb
run: pip install dbt-core duckdb dbt-${DBT_TARGET}

- name: Install DBT deps
run: dbt deps --target duckdb
run: dbt deps --target ${DBT_TARGET}

- name: load test data
run: dbt seed --target duckdb
run: dbt seed --target ${DBT_TARGET}

- name: Create seed tests
run: bash tests/create_seeds_tests_yml.sh

- name: dbt test
run: dbt test --target duckdb
run: dbt test --target ${DBT_TARGET}

test-postgres:
name: Test on Postgres
Expand All @@ -53,21 +56,79 @@ jobs:
env:
POSTGRES_PASSWORD: postgres

env:
DBT_TARGET: postgres

steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: pip install
run: pip install dbt-core duckdb dbt-postgres
run: pip install dbt-core dbt-${DBT_TARGET}

- name: Install DBT deps
run: dbt deps --target postgres
run: dbt deps --target ${DBT_TARGET}

- name: load test data
run: dbt seed --target postgres
run: dbt seed --target ${DBT_TARGET}

- name: Create seed tests
run: bash tests/create_seeds_tests_yml.sh

- name: dbt test
run: dbt test --target ${DBT_TARGET}

test-snowflake:
name: Test on Snowflake
runs-on: ubuntu-latest
container:
image: python:3.11

env:
SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }}
DBT_TARGET: snowflake

steps:
- uses: actions/checkout@v4

- name: pip install
run: pip install dbt-core dbt-${DBT_TARGET}

- name: Install DBT deps
run: dbt deps --target ${DBT_TARGET}

- name: load test data
run: dbt seed --target ${DBT_TARGET}

- name: Create seed tests
run: bash tests/create_seeds_tests_yml.sh

- name: dbt test
run: dbt test --target ${DBT_TARGET}

test-redshift:
name: Test on RedShift
runs-on: ubuntu-latest
container:
image: python:3.11

env:
REDSHIFT_PASSWORD: ${{ secrets.REDSHIFT_PASSWORD }}
DBT_TARGET: redshift

steps:
- uses: actions/checkout@v4

- name: pip install
run: pip install dbt-core dbt-${DBT_TARGET}

- name: Install DBT deps
run: dbt deps --target ${DBT_TARGET}

- name: load test data
run: dbt seed --target ${DBT_TARGET}

- name: Create seed tests
run: bash tests/create_seeds_tests_yml.sh

- name: dbt test
run: dbt test --target postgres
run: dbt test --target ${DBT_TARGET}
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ target/
dbt_packages/
logs/
dbt-env/
integration_tests/seeds/*.yml

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ models:
max_value: 30
```
## Supported Databases
The following databases are supported:
- Snowflake
- Postgres
- DuckDB
## Test types
dbt-testgen can generate these types of tests:
- [uniqueness](#uniqueness)
Expand Down
24 changes: 23 additions & 1 deletion integration_tests/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,26 @@ integration_tests:
password: postgres
port: 5432
dbname: postgres # or database instead of dbname
schema: public
schema: public
snowflake:
type: snowflake
account: ircmtcn-ekb34223
# User/password auth
user: DBT_TESTGEN
password: "{{ env_var('SNOWFLAKE_PASSWORD') }}"
role: DBT_TESTGEN
database: DBT_TESTGEN
warehouse: DBT_TESTGEN
schema: DBT_TESTGEN
threads: 1
client_session_keep_alive: False
query_tag: DBT_TESTGEN
outputs:
redshift:
type: redshift
host: dbt-testgen.117819748843.us-east-1.redshift-serverless.amazonaws.com
user: dbt_testgen
password: mw*gXe9JMvp!0v%E #"{{ env_var('REDSHIFT_PASSWORD') }}"
dbname: dbt_testgen
schema: dbt_testgen
port: 5439
2 changes: 1 addition & 1 deletion integration_tests/seeds/colnames_with_spaces.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
First Name,Age (Years),Current City
first name,age (years),current city
John,25,New York
Alice,30,San Francisco
Bob,22,Chicago
6 changes: 3 additions & 3 deletions integration_tests/tests/create_seeds_tests_yml.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
dbt compile -q \
--inline "{{ testgen.get_test_suggestions(ref('users'), resource_type='seeds') }}" \
dbt compile --target ${DBT_TARGET} -q \
--inline "{{ testgen.get_test_suggestions(ref('users'), resource_type='seeds', column_config={'quote': true} ) }}" \
> seeds/users_test_suggestions.yml

dbt compile -q \
dbt compile --target ${DBT_TARGET} -q \
--inline "{{ testgen.get_test_suggestions(ref('colnames_with_spaces'), resource_type='seeds', column_config={'quote': true}) }}" \
> seeds/colnames_with_spaces_test_suggestions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,23 @@
models:
- name: colnames_with_spaces
columns:
- name: First Name
- name: first name
description: Accepted values test generated by dbt-testgen
tests:
- accepted_values:
values:
- Alice
- Bob
- John
- name: Age (Years)
- name: age (years)
description: Accepted values test generated by dbt-testgen
tests:
- accepted_values:
values:
- '22'
- '25'
- '30'
- name: Current City
- name: current city
description: Accepted values test generated by dbt-testgen
tests:
- accepted_values:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
models:
- name: colnames_with_spaces
columns:
- name: Age (Years)
- name: age (years)
description: Numeric range test generated by dbt-testgen
tests:
- dbt_utils.accepted_range:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,19 @@
models:
- name: colnames_with_spaces
columns:
- name: First Name
- name: first name
description: Uniqueness test generated by dbt-testgen
tests:
- unique
- not_null
quote: true
- name: Age (Years)
- name: age (years)
description: Uniqueness test generated by dbt-testgen
tests:
- unique
- not_null
quote: true
- name: Current City
- name: current city
description: Uniqueness test generated by dbt-testgen
tests:
- unique
Expand Down
2 changes: 1 addition & 1 deletion macros/helpers/columns.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{% macro exclude_column_types(columns, exclude_types) %}
{% macro exclude_column_types(columns, exclude_types) %}
{% set filtered_columns = [] %}
{% for col in columns %}
{% if col.is_string() and "string" not in exclude_types %}
Expand Down
31 changes: 23 additions & 8 deletions macros/test_generation/get_accepted_values_test_suggestions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
{{ return("array_agg(" ~ adapter.quote(colname) ~ "::VARCHAR)") }}
{% endmacro %}

{% macro redshift__array_agg(colname) %}
{{ return("split_to_array(listagg(" ~ adapter.quote(colname) ~ "::VARCHAR, '|'), '|') ") }}
{% endmacro %}



{% macro get_accepted_values_test_suggestions(
table_relation,
sample = false,
Expand Down Expand Up @@ -64,12 +70,13 @@

{% set count_distinct_exprs = [] %}
{% for column in columns %}
{# Use capitals for colnames because of snowflake #}
{% do count_distinct_exprs.append(
"
select " ~ loop.index ~ " AS ordering,
'" ~ column.column ~ "' AS colname,
count(1) as cardinality, " ~
testgen.array_agg(column.column) ~ " AS unique_values
select " ~ loop.index ~ " AS ORDERING,
'" ~ column.column ~ "' AS COLNAME,
count(1) as CARDINALITY, " ~
testgen.array_agg(column.column) ~ " AS UNIQUE_VALUES
from (
select " ~ adapter.quote(column.column) ~ "
from " ~ table_relation ~ "
Expand All @@ -83,20 +90,26 @@
SELECT * FROM (
{{ count_distinct_exprs | join("\nUNION ALL\n") }}
) t2
WHERE cardinality <= {{ max_cardinality|string }}
ORDER BY ordering ASC
WHERE CARDINALITY <= {{ max_cardinality|string }}
ORDER BY ORDERING ASC
{% endset %}

{# {{ print(count_distinct_sql) }} #}

{% set cardinality_results = testgen.query_as_list(count_distinct_sql) %}

{# {{ print(cardinality_results) }} #}

{% set column_tests = [] %}
{% for cardinality_result in cardinality_results %}

{# {{ print(cardinality_result.COLNAME) }} #}

{% set col_config = {
"name": cardinality_result.colname,
"name": cardinality_result[1],
"description": "Accepted values test generated by dbt-testgen",
"tests": [
{"accepted_values": {"values": fromjson(cardinality_result.unique_values)|sort}}
{"accepted_values": {"values": fromjson(cardinality_result[3])|sort}}
]
}
%}
Expand All @@ -109,6 +122,8 @@

{% set new_dbt_config = {resource_type: [{"name": table_relation.identifier, "columns": column_tests}]} %}

{# {{ print(new_dbt_config) }} #}

{% set merged_dbt_config = testgen.merge_dbt_configs(dbt_config, new_dbt_config) %}

{% do return(merged_dbt_config) %}
Expand Down
12 changes: 6 additions & 6 deletions macros/test_generation/get_range_test_suggestions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@
{% set min_max_exprs = [] %}
{% for column in number_cols %}
{% do min_max_exprs.append(
"SELECT '" ~ column.column ~ "' AS colname, " ~
"MIN(" ~ adapter.quote(column.column) ~ ") as col_min, " ~
"MAX(" ~ adapter.quote(column.column) ~ ") as col_max " ~
"SELECT '" ~ column.column ~ "' AS COLNAME, " ~
"MIN(" ~ adapter.quote(column.column) ~ ") as COL_MIN, " ~
"MAX(" ~ adapter.quote(column.column) ~ ") as COL_MAX " ~
"FROM " ~ table_relation
) %}
{% endfor %}
Expand All @@ -67,13 +67,13 @@
{% set column_tests = [] %}
{% for result in results %}
{% set col_config = {
"name": result.colname,
"name": result[0],
"description": "Numeric range test generated by dbt-testgen",
"tests": [
{
"dbt_utils.accepted_range": {
"min_value": result.col_min,
"max_value": result.col_max
"min_value": result[1],
"max_value": result[2]
}
}
]
Expand Down
12 changes: 8 additions & 4 deletions macros/test_generation/get_uniqueness_test_suggestions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
{% do column_combo_quoted.append(adapter.quote(col))%}
{% endfor %}
{% do count_distinct_exprs.append(
"SELECT " ~ loop.index ~ " AS ordering, count(1) AS cardinality
"SELECT " ~ loop.index ~ " AS ORDERING, count(1) AS CARDINALITY
from (
SELECT 1 FROM " ~ table_relation ~ "
GROUP BY " ~ column_combo_quoted|join(", ") ~ "
Expand All @@ -97,16 +97,20 @@
{% endset %}

{% set count_sql %}
{{ "SELECT count(1) AS table_count FROM " ~ table_relation }}
{{ "SELECT count(1) AS TABLE_COUNT FROM " ~ table_relation }}
{% endset%}

{% set table_count = testgen.query_as_list(count_sql)[0].table_count %}
{% set table_count = testgen.query_as_list(count_sql)[0][0] %}

{% set cardinality_results = zip(column_combinations, testgen.query_as_list(count_distinct_sql)) %}

{# {{ print(table_count) }}
{{ print(cardinality_results|list) }} #}

{% set unique_keys = [] %}
{% for cardinality_result in cardinality_results %}
{% if cardinality_result[1].cardinality == table_count %}
{% if cardinality_result[1][1] == table_count %}
{# {{ print(cardinality_result) }} #}
{% do unique_keys.append(cardinality_result[0]) %}
{% endif %}
{% endfor %}
Expand Down

0 comments on commit 04f33b9

Please sign in to comment.