Skip to content

Commit

Permalink
Merge pull request #29 from re-data/feature/new-dbt
Browse files Browse the repository at this point in the history
Couple changes:
  - Update dbt version
  - Add more tests
  - Update to snowflake conf -> upper case snowflake schema
  - 2 additional helping macros
  • Loading branch information
mateuszklimek authored Oct 13, 2021
2 parents 9fcd353 + 20d4a6a commit f310d50
Show file tree
Hide file tree
Showing 12 changed files with 156 additions and 21 deletions.
4 changes: 2 additions & 2 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_modules"
vars:
re_data:max_columns_in_query: 12
re_data:max_columns_in_query: 10
re_data:time_window_end: '{{ run_started_at.strftime("%Y-%m-%d 00:00:00") }}'
re_data:time_window_start: '{{ (run_started_at - modules.datetime.timedelta(1)).strftime("%Y-%m-%d 00:00:00") }}'
re_data:anomaly_detection_look_back_days: 30
Expand Down Expand Up @@ -66,7 +66,7 @@ vars:
# as tables. These settings can be overridden in the individual model files
# using the `{{ config(...) }}` macro.

require-dbt-version: [">=0.20.0", "<0.21.0"]
require-dbt-version: [">=0.20.0", "<0.22.0"]

models:
re_data:
Expand Down
91 changes: 91 additions & 0 deletions integration_tests/data/expected/expected_sample_data_metrics.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
table_name,column_name,metric,value,time_window_start,time_window_end,interval_length_sec
"""postgres"".""dq_raw"".""sample_table""",value1,min,100,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value1,max,200,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value1,avg,127.5,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value1,stddev,48.562674281111555,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value1,variance,2358.3333333333335,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value1,nulls_count,0,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value1,nulls_percent,0,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value1,diff,100,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value2,min,109,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value2,max,209,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value2,avg,180.75,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value2,stddev,47.97481978427155,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value2,variance,2301.5833333333335,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value2,nulls_count,0,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value2,nulls_percent,0,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value2,diff,100,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",null_value,min,,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",null_value,max,,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",null_value,avg,,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",null_value,stddev,,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",null_value,variance,,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",null_value,nulls_count,4,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",null_value,nulls_percent,100,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",null_value,diff,,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,min_length,3,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,max_length,4,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,avg_length,3.25,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,nulls_count,0,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,nulls_percent,0,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,missing_percent,0,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,missing_count,0,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,regex_test,4,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,match_regex,1,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,match_regex_percent,25,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,not_match_regex,1,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,not_match_regex_percent,25,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,distinct_values,2,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,duplicate_values,1,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,duplicate_rows,3,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,unique_rows,1,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""","",row_count,4,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""","",freshness,40765,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""","",buy_count,3,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""","",my_custom_table_metric,1000,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""","",distinct_table_rows,4,2021-05-01 00:00:00,2021-05-02 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value1,min,100,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value1,max,210,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value1,avg,142.5,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value1,stddev,49.91659710623979,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value1,variance,2491.6666666666665,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value1,nulls_count,0,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value1,nulls_percent,0,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value1,diff,110,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value2,min,109,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value2,max,209,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value2,avg,180.75,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value2,stddev,47.97481978427155,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value2,variance,2301.5833333333335,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value2,nulls_count,0,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value2,nulls_percent,0,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",value2,diff,100,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",null_value,min,,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",null_value,max,,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",null_value,avg,,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",null_value,stddev,,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",null_value,variance,,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",null_value,nulls_count,4,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",null_value,nulls_percent,100,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",null_value,diff,,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,min_length,3,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,max_length,3,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,avg_length,3,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,nulls_count,0,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,nulls_percent,0,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,missing_percent,0,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,missing_count,0,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,regex_test,4,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,match_regex,0,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,match_regex_percent,0,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,not_match_regex,0,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,not_match_regex_percent,0,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,distinct_values,1,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,duplicate_values,1,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,duplicate_rows,4,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""",event_type,unique_rows,0,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""","",row_count,4,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""","",freshness,40765,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""","",buy_count,4,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""","",my_custom_table_metric,1000,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
"""postgres"".""dq_raw"".""sample_table""","",distinct_table_rows,4,2021-05-02 00:00:00,2021-05-03 00:00:00,86400
5 changes: 5 additions & 0 deletions integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,8 @@ seeds:
expected_sample_data_z_score:
+column_types:
time_window_end: "TIMESTAMP"

expected_sample_data_metrics:
+column_types:
time_window_start: "TIMESTAMP"
time_window_end: "TIMESTAMP"
15 changes: 15 additions & 0 deletions integration_tests/models/expected/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ models:
compare_model: ref('expected_sample_data_max')
compare_columns:
- right(upper(replace(replace(table_name, '"', ''), '`', '')), 10) as table_name
- coalesce(upper(column_name), '') as column_name
- metric
- value
- time_window_start
Expand All @@ -41,6 +42,7 @@ models:
compare_model: ref('expected_sample_data_max_length')
compare_columns:
- right(upper(replace(replace(table_name, '"', ''), '`', '')), 10) as table_name
- coalesce(upper(column_name), '') as column_name
- metric
- value
- time_window_start
Expand All @@ -53,6 +55,7 @@ models:
compare_model: ref('expected_sample_data_z_score')
compare_columns:
- right(upper(replace(replace(table_name, '"', ''), '`', '')), 10) as table_name
- coalesce(upper(column_name), '') as column_name
- metric
- cast (z_score_value * 1000 as integer) as z_score_value
- time_window_end
Expand All @@ -61,3 +64,15 @@ models:
- cast (last_stddev * 1000 as integer) as last_stddev
- interval_length_sec

- name: test_re_data_metrics
tests:
- dbt_utils.equality:
compare_model: ref('expected_sample_data_metrics')
compare_columns:
- right(upper(replace(replace(table_name, '"', ''), '`', '')), 10) as table_name
- coalesce(upper(column_name), '') as column_name
- metric
- cast (value * 1000 as integer) as value
- time_window_start
- time_window_end
- interval_length_sec
6 changes: 6 additions & 0 deletions integration_tests/models/expected/test_re_data_metrics.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{% set table_name = re_data.full_table_name_values(
"sample_table", target.schema + "_raw", re_data.get_target_database()) %}

select * from {{ ref('re_data_metrics')}}
where table_name = {{ table_name }}

6 changes: 1 addition & 5 deletions integration_tests/test_dbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,7 @@ def test_postgres():
_test_generic('postgres')

def test_snowflake():
dbt_vars = copy.deepcopy(DBT_VARS)
schemas = dbt_vars['re_data:schemas']
schemas = [el.upper() for el in schemas]
dbt_vars['re_data:schemas'] = schemas
_test_generic('snowflake', dbt_vars)
_test_generic('snowflake')

def test_redshift():
_test_generic('redshift')
Expand Down
13 changes: 13 additions & 0 deletions macros/meta/schema_name.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

{% macro schema_name(name) %}
{% set result = adapter.dispatch('schema_name', 're_data')(name) %}
{{ return(result) }}
{% endmacro %}

{% macro default__schema_name(name) %}
{{ return (name) }}
{% endmacro %}

{% macro snowflake__schema_name(name) %}
{{ return (name.upper()) }}
{% endmacro %}
11 changes: 2 additions & 9 deletions macros/metrics/base/build_in/optional_column_metrics.sql
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,11 @@

{% macro postgres__distinct_values(context) %}
{# /* In postgres, its faster to count distinct values in a column by selecting then counting in separate steps */ #}
( with temp_table as (
with temp_table as (
select distinct {{ context.column_name }} from {{ context.table_name }}
where {{ in_time_window(context.time_filter) }}
)
select coalesce(count(*), 0) from temp_table
)
select coalesce(count(*), 0) from temp_table
{% endmacro %}

{% macro re_data_metric_approx_distinct_values(context) %}
Expand Down Expand Up @@ -75,37 +74,31 @@
{% endmacro %}

{% macro re_data_metric_duplicate_values(context) %}
(
with temp_table as (
select {{ context.column_name }} from {{ context.table_name }}
where {{ in_time_window(context.time_filter) }}
group by {{ context.column_name }}
having count(1) > 1
)
select coalesce(count(*), 0) from temp_table
)
{% endmacro %}

{% macro re_data_metric_duplicate_rows(context) %}
(
with temp_table as (
select {{ context.column_name }}, count(1) as row_count from {{ context.table_name }}
where {{ in_time_window(context.time_filter) }}
group by {{ context.column_name }}
having count(1) > 1
)
select coalesce(sum(row_count), 0) from temp_table
)
{% endmacro %}

{% macro re_data_metric_unique_rows(context) %}
(
with temp_table as (
select {{ context.column_name }}, count(1) as row_count from {{ context.table_name }}
where {{ in_time_window(context.time_filter) }}
group by {{ context.column_name }}
having count(1) = 1
)
select coalesce(sum(row_count), 0) from temp_table
)
{% endmacro %}
5 changes: 2 additions & 3 deletions macros/metrics/base/build_in/optional_table_metrics.sql
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
{% macro re_data_metric_distinct_table_rows(context) %}
( with temp_table AS (
with temp_table AS (
select distinct * from {{ context.table_name }}
where {{ in_time_window(context.time_filter) }}
)
select coalesce(count(*), 0) FROM temp_table
)
select coalesce(count(*), 0) FROM temp_table
{% endmacro %}
2 changes: 1 addition & 1 deletion macros/metrics/base/queries.sql
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
with temp_table_metrics as (
select
{%- for col_expr in col_exprs %}
{{ col_expr.expr }} as {{ col_expr.col_name + '___' + col_expr.metric }}
( {{ col_expr.expr }} ) as {{ col_expr.col_name + '___' + col_expr.metric }}
{%- if not loop.last %},{%- endif %}
{% endfor %}
from
Expand Down
16 changes: 16 additions & 0 deletions macros/utils/get_database.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@

{% macro get_target_database() %}
{{- adapter.dispatch('get_target_database', 're_data')() -}}
{% endmacro %}

{% macro default__get_target_database() %}
{{- return (target.dbname) -}}
{% endmacro %}

{% macro bigquery__get_target_database() %}
{{- return (target.project) -}}
{% endmacro %}

{% macro snowflake__get_target_database() %}
{{- return (target.database) -}}
{% endmacro %}
3 changes: 2 additions & 1 deletion models/meta/re_data_columns.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
{% set schemas = var('re_data:schemas') %}
with columns_froms_select as (
{% for for_schema in schemas %}
{{ get_monitored_columns(for_schema) }}
{% set schema_name = re_data.schema_name(for_schema) %}
{{ get_monitored_columns(schema_name) }}
{%- if not loop.last %} union all {%- endif %}
{% endfor %}
)
Expand Down

0 comments on commit f310d50

Please sign in to comment.