diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ceb64f..14d352a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,28 @@ * Added support for optional `min_value` and `max_value` parameters to all`*_between_*` tests. ([#70](https://github.com/calogica/dbt-expectations/pull/70)) +* Added support for `strictly` parameter to `between` tests. If set to `True`, `striclty` changes the operators `>=` and `<=` to`>` and `<`. + + For example, while + + ```yaml + dbt_expectations.expect_column_stdev_to_be_between: + min_value: 0 + ``` + + evaluates to `>= 0`, + + ```yaml + dbt_expectations.expect_column_stdev_to_be_between: + min_value: 0 + strictly: True + ``` + + evaluates to `> 0`. + ([#72](https://github.com/calogica/dbt-expectations/issues/72), [#74](https://github.com/calogica/dbt-expectations/pull/74)) + + + ## Fixes * Corrected a typo in the README ([#67](https://github.com/calogica/dbt-expectations/pull/67)) diff --git a/README.md b/README.md index c67944c..b2fe5a0 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@ # dbt-expectations -Extension package for [**dbt**](https://github.com/fishtown-analytics/dbt) inspired by the [Great Expectations package for Python](https://greatexpectations.io/). The intent is to allow dbt users to deploy GE-like tests in their data warehouse directly from dbt, vs having to add another integration with their data warehouse. + + +**dbt-expectations** is an extension package for [**dbt**](https://github.com/fishtown-analytics/dbt), inspired by the [Great Expectations package for Python](https://greatexpectations.io/). The intent is to allow dbt users to deploy GE-like tests in their data warehouse directly from dbt, vs having to add another integration with their data warehouse. + ## Install diff --git a/expectations.gif b/expectations.gif new file mode 100644 index 0000000..4163d16 Binary files /dev/null and b/expectations.gif differ diff --git a/integration_tests/models/schema_tests/schema.yml b/integration_tests/models/schema_tests/schema.yml index 5e9eb77..f7516e9 100644 --- a/integration_tests/models/schema_tests/schema.yml +++ b/integration_tests/models/schema_tests/schema.yml @@ -230,6 +230,9 @@ models: - dbt_expectations.expect_column_stdev_to_be_between: min_value: 0 max_value: 2 + - dbt_expectations.expect_column_stdev_to_be_between: + min_value: 0 + strictly: true - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: [0.5] top_n: 1 @@ -289,6 +292,7 @@ models: - dbt_expectations.expect_table_row_count_to_be_between: max_value: 10000 group_by: [group_id] + strictly: True - dbt_expectations.expect_grouped_row_values_to_have_recent_data: group_by: [group_id] timestamp_column: date_day diff --git a/macros/schema_tests/_generalized/expression_between.sql b/macros/schema_tests/_generalized/expression_between.sql index 1636e9b..e9e884d 100644 --- a/macros/schema_tests/_generalized/expression_between.sql +++ b/macros/schema_tests/_generalized/expression_between.sql @@ -3,10 +3,11 @@ min_value=None, max_value=None, group_by_columns=None, - row_condition=None + row_condition=None, + strictly=False ) %} - {{ dbt_expectations.expression_between(model, expression, min_value, max_value, group_by_columns, row_condition) }} + {{ dbt_expectations.expression_between(model, expression, min_value, max_value, group_by_columns, row_condition, strictly) }} {% endmacro %} @@ -15,7 +16,8 @@ min_value, max_value, group_by_columns, - row_condition + row_condition, + strictly ) %} {%- if min_value is none and max_value is none -%} @@ -23,10 +25,13 @@ "You have to provide either a min_value, max_value or both." ) }} {%- endif -%} + +{%- set strict_operator = "" if strictly else "=" -%} + {% set expression_min_max %} ( 1=1 -{%- if min_value is not none %} and {{ expression }} >= {{ min_value }}{% endif %} -{%- if max_value is not none %} and {{ expression }} <= {{ max_value }}{% endif %} +{%- if min_value is not none %} and {{ expression | trim }} >{{ strict_operator }} {{ min_value }}{% endif %} +{%- if max_value is not none %} and {{ expression | trim }} <{{ strict_operator }} {{ max_value }}{% endif %} ) {% endset %} diff --git a/macros/schema_tests/_generalized/expression_is_true.sql b/macros/schema_tests/_generalized/expression_is_true.sql index 5909163..235e263 100644 --- a/macros/schema_tests/_generalized/expression_is_true.sql +++ b/macros/schema_tests/_generalized/expression_is_true.sql @@ -9,6 +9,14 @@ {% endmacro %} +{% macro truth_expression(expression) %} + {{ adapter.dispatch('truth_expression', packages = dbt_expectations._get_namespaces()) (expression) }} +{% endmacro %} + +{% macro default__truth_expression(expression) %} + {{ expression }} as expression +{% endmacro %} + {% macro expression_is_true(model, expression, test_condition="= true", @@ -26,7 +34,7 @@ with grouped_expression as ( {{ group_by_column }} as col_{{ loop.index }}, {% endfor -%} {% endif %} - {{ expression }} as expression + {{ dbt_expectations.truth_expression(expression) }} from {{ model }} {%- if row_condition %} where @@ -54,4 +62,5 @@ validation_errors as ( select count(*) from validation_errors -{% endmacro -%} + +{% endmacro -%} \ No newline at end of file diff --git a/macros/schema_tests/aggregate_functions/expect_column_max_to_be_between.sql b/macros/schema_tests/aggregate_functions/expect_column_max_to_be_between.sql index 2b0af6f..a40004c 100644 --- a/macros/schema_tests/aggregate_functions/expect_column_max_to_be_between.sql +++ b/macros/schema_tests/aggregate_functions/expect_column_max_to_be_between.sql @@ -2,7 +2,8 @@ min_value=None, max_value=None, group_by=None, - row_condition=None + row_condition=None, + strictly=False ) %} {% set expression %} max({{ column_name }}) @@ -12,6 +13,7 @@ max({{ column_name }}) min_value=min_value, max_value=max_value, group_by_columns=group_by, - row_condition=row_condition + row_condition=row_condition, + strictly=strictly ) }} {% endmacro %} diff --git a/macros/schema_tests/aggregate_functions/expect_column_mean_to_be_between.sql b/macros/schema_tests/aggregate_functions/expect_column_mean_to_be_between.sql index b8d9a6b..d2fec95 100644 --- a/macros/schema_tests/aggregate_functions/expect_column_mean_to_be_between.sql +++ b/macros/schema_tests/aggregate_functions/expect_column_mean_to_be_between.sql @@ -2,7 +2,8 @@ min_value=None, max_value=None, group_by=None, - row_condition=None + row_condition=None, + strictly=False ) %} {% set expression %} avg({{ column_name }}) @@ -12,6 +13,7 @@ avg({{ column_name }}) min_value=min_value, max_value=max_value, group_by_columns=group_by, - row_condition=row_condition + row_condition=row_condition, + strictly=strictly ) }} {% endmacro %} diff --git a/macros/schema_tests/aggregate_functions/expect_column_median_to_be_between.sql b/macros/schema_tests/aggregate_functions/expect_column_median_to_be_between.sql index ac175fe..3e9d9e0 100644 --- a/macros/schema_tests/aggregate_functions/expect_column_median_to_be_between.sql +++ b/macros/schema_tests/aggregate_functions/expect_column_median_to_be_between.sql @@ -2,7 +2,8 @@ min_value=None, max_value=None, group_by=None, - row_condition=None + row_condition=None, + strictly=False ) %} {% set expression %} @@ -13,6 +14,7 @@ min_value=min_value, max_value=max_value, group_by_columns=group_by, - row_condition=row_condition + row_condition=row_condition, + strictly=strictly ) }} {% endmacro %} diff --git a/macros/schema_tests/aggregate_functions/expect_column_min_to_be_between.sql b/macros/schema_tests/aggregate_functions/expect_column_min_to_be_between.sql index e857458..078bf7f 100644 --- a/macros/schema_tests/aggregate_functions/expect_column_min_to_be_between.sql +++ b/macros/schema_tests/aggregate_functions/expect_column_min_to_be_between.sql @@ -2,7 +2,8 @@ min_value=None, max_value=None, group_by=None, - row_condition=None + row_condition=None, + strictly=False ) %} {% set expression %} min({{ column_name }}) @@ -12,7 +13,8 @@ min({{ column_name }}) min_value=min_value, max_value=max_value, group_by_columns=group_by, - row_condition=row_condition + row_condition=row_condition, + strictly=strictly ) }} {% endmacro %} diff --git a/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql b/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql index 2bf74be..28488dd 100644 --- a/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql +++ b/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql @@ -30,7 +30,12 @@ with value_counts as ( {% if row_condition %} where {{ row_condition }} {% endif %} - group by 1 + + group by {% if quote_values -%} + {{ column_name }} + {%- else -%} + cast({{ column_name }} as {{ data_type }}) + {%- endif %} ), value_counts_ranked as ( @@ -86,4 +91,4 @@ validation_errors as ( select count(*) as validation_errors from validation_errors -{% endmacro %} +{% endmacro %} \ No newline at end of file diff --git a/macros/schema_tests/aggregate_functions/expect_column_proportion_of_unique_values_to_be_between.sql b/macros/schema_tests/aggregate_functions/expect_column_proportion_of_unique_values_to_be_between.sql index 3f79f34..8b4e40d 100644 --- a/macros/schema_tests/aggregate_functions/expect_column_proportion_of_unique_values_to_be_between.sql +++ b/macros/schema_tests/aggregate_functions/expect_column_proportion_of_unique_values_to_be_between.sql @@ -2,7 +2,8 @@ min_value=None, max_value=None, group_by=None, - row_condition=None + row_condition=None, + strictly=False ) %} {% set expression %} count(distinct {{ column_name }})/count({{ column_name }}) @@ -12,7 +13,8 @@ count(distinct {{ column_name }})/count({{ column_name }}) min_value=min_value, max_value=max_value, group_by_columns=group_by, - row_condition=row_condition + row_condition=row_condition, + strictly=strictly ) }} {% endmacro %} diff --git a/macros/schema_tests/aggregate_functions/expect_column_quantile_values_to_be_between.sql b/macros/schema_tests/aggregate_functions/expect_column_quantile_values_to_be_between.sql index 576a4b8..83958c5 100644 --- a/macros/schema_tests/aggregate_functions/expect_column_quantile_values_to_be_between.sql +++ b/macros/schema_tests/aggregate_functions/expect_column_quantile_values_to_be_between.sql @@ -3,7 +3,8 @@ min_value=None, max_value=None, group_by=None, - row_condition=None + row_condition=None, + strictly=False ) %} {% set expression %} @@ -14,6 +15,7 @@ min_value=min_value, max_value=max_value, group_by_columns=group_by, - row_condition=row_condition + row_condition=row_condition, + strictly=strictly ) }} {% endmacro %} diff --git a/macros/schema_tests/aggregate_functions/expect_column_stdev_to_be_between.sql b/macros/schema_tests/aggregate_functions/expect_column_stdev_to_be_between.sql index b084f50..10646a9 100644 --- a/macros/schema_tests/aggregate_functions/expect_column_stdev_to_be_between.sql +++ b/macros/schema_tests/aggregate_functions/expect_column_stdev_to_be_between.sql @@ -2,21 +2,26 @@ min_value=None, max_value=None, group_by=None, - row_condition=None + row_condition=None, + strictly=False ) -%} - {{ adapter.dispatch('test_expect_column_stdev_to_be_between', packages = dbt_expectations._get_namespaces()) (model, column_name, + {{ adapter.dispatch('test_expect_column_stdev_to_be_between', packages = dbt_expectations._get_namespaces()) ( + model, column_name, min_value, max_value, group_by, - row_condition + row_condition, + strictly ) }} {%- endmacro %} -{% macro default__test_expect_column_stdev_to_be_between(model, column_name, +{% macro default__test_expect_column_stdev_to_be_between( + model, column_name, min_value, max_value, group_by, - row_condition + row_condition, + strictly ) %} {% set expression %} @@ -27,6 +32,7 @@ stddev({{ column_name }}) min_value=min_value, max_value=max_value, group_by_columns=group_by, - row_condition=row_condition + row_condition=row_condition, + strictly=strictly ) }} {% endmacro %} diff --git a/macros/schema_tests/aggregate_functions/expect_column_sum_to_be_between.sql b/macros/schema_tests/aggregate_functions/expect_column_sum_to_be_between.sql index b5ad91a..9e91348 100644 --- a/macros/schema_tests/aggregate_functions/expect_column_sum_to_be_between.sql +++ b/macros/schema_tests/aggregate_functions/expect_column_sum_to_be_between.sql @@ -2,7 +2,8 @@ min_value=None, max_value=None, group_by=None, - row_condition=None + row_condition=None, + strictly=False ) %} {% set expression %} sum({{ column_name }}) @@ -12,6 +13,7 @@ sum({{ column_name }}) min_value=min_value, max_value=max_value, group_by_columns=group_by, - row_condition=row_condition + row_condition=row_condition, + strictly=strictly ) }} {% endmacro %} diff --git a/macros/schema_tests/aggregate_functions/expect_column_unique_value_count_to_be_between.sql b/macros/schema_tests/aggregate_functions/expect_column_unique_value_count_to_be_between.sql index 60058b5..87d3511 100644 --- a/macros/schema_tests/aggregate_functions/expect_column_unique_value_count_to_be_between.sql +++ b/macros/schema_tests/aggregate_functions/expect_column_unique_value_count_to_be_between.sql @@ -2,7 +2,8 @@ min_value=None, max_value=None, group_by=None, - row_condition=None + row_condition=None, + strictly=False ) %} {% set expression %} count(distinct {{ column_name }}) @@ -12,6 +13,7 @@ count(distinct {{ column_name }}) min_value=min_value, max_value=max_value, group_by_columns=group_by, - row_condition=row_condition + row_condition=row_condition, + strictly=strictly ) }} {% endmacro %} diff --git a/macros/schema_tests/column_values_basic/expect_column_values_to_be_between.sql b/macros/schema_tests/column_values_basic/expect_column_values_to_be_between.sql index afecbf4..71d8f7c 100644 --- a/macros/schema_tests/column_values_basic/expect_column_values_to_be_between.sql +++ b/macros/schema_tests/column_values_basic/expect_column_values_to_be_between.sql @@ -1,7 +1,8 @@ {% macro test_expect_column_values_to_be_between(model, column_name, min_value=None, max_value=None, - row_condition=None + row_condition=None, + strictly=False ) %} {% set expression %} @@ -13,7 +14,8 @@ min_value=min_value, max_value=max_value, group_by_columns=None, - row_condition=row_condition + row_condition=row_condition, + strictly=strictly ) }} diff --git a/macros/schema_tests/string_matching/expect_column_value_lengths_to_be_between.sql b/macros/schema_tests/string_matching/expect_column_value_lengths_to_be_between.sql index 4a89912..93e0426 100644 --- a/macros/schema_tests/string_matching/expect_column_value_lengths_to_be_between.sql +++ b/macros/schema_tests/string_matching/expect_column_value_lengths_to_be_between.sql @@ -1,7 +1,8 @@ {% macro test_expect_column_value_lengths_to_be_between(model, column_name, min_value=None, max_value=None, - row_condition=None + row_condition=None, + strictly=False ) %} {% set expression %} {{ dbt_utils.length(column_name) }} @@ -12,7 +13,8 @@ min_value=min_value, max_value=max_value, group_by_columns=None, - row_condition=row_condition + row_condition=row_condition, + strictly=strictly ) }} {% endmacro %} diff --git a/macros/schema_tests/table_shape/expect_grouped_row_values_to_have_recent_data.sql b/macros/schema_tests/table_shape/expect_grouped_row_values_to_have_recent_data.sql index 8580ce2..6d27832 100644 --- a/macros/schema_tests/table_shape/expect_grouped_row_values_to_have_recent_data.sql +++ b/macros/schema_tests/table_shape/expect_grouped_row_values_to_have_recent_data.sql @@ -1,4 +1,10 @@ {% macro test_expect_grouped_row_values_to_have_recent_data(model, group_by, timestamp_column, datepart, interval) %} + + {{ adapter.dispatch('test_expect_grouped_row_values_to_have_recent_data', packages = dbt_expectations._get_namespaces()) (model, group_by, timestamp_column, datepart, interval) }} + +{% endmacro %} + +{% macro default__test_expect_grouped_row_values_to_have_recent_data(model, group_by, timestamp_column, datepart, interval) %} with latest_grouped_timestamps as ( select diff --git a/macros/schema_tests/table_shape/expect_table_row_count_to_be_between.sql b/macros/schema_tests/table_shape/expect_table_row_count_to_be_between.sql index 17a95d2..8768e3b 100644 --- a/macros/schema_tests/table_shape/expect_table_row_count_to_be_between.sql +++ b/macros/schema_tests/table_shape/expect_table_row_count_to_be_between.sql @@ -2,7 +2,8 @@ min_value=None, max_value=None, group_by=None, - row_condition=None + row_condition=None, + strictly=False ) -%} {% set expression %} count(*) @@ -12,6 +13,7 @@ count(*) min_value=min_value, max_value=max_value, group_by_columns=group_by, - row_condition=row_condition + row_condition=row_condition, + strictly=strictly ) }} {%- endmacro -%} diff --git a/macros/schema_tests/table_shape/expect_table_row_count_to_equal.sql b/macros/schema_tests/table_shape/expect_table_row_count_to_equal.sql index 27e8634..34e82f4 100644 --- a/macros/schema_tests/table_shape/expect_table_row_count_to_equal.sql +++ b/macros/schema_tests/table_shape/expect_table_row_count_to_equal.sql @@ -3,6 +3,21 @@ group_by=None, row_condition=None ) -%} + {{ adapter.dispatch('test_expect_table_row_count_to_equal', + packages=dbt_expectations._get_namespaces()) (model, + value, + group_by, + row_condition + ) }} +{% endmacro %} + + + +{%- macro default__test_expect_table_row_count_to_equal(model, + value, + group_by, + row_condition + ) -%} {% set expression %} count(*) = {{ value }} {% endset %} diff --git a/regen_docs.sh b/regen_docs.sh new file mode 100644 index 0000000..d2c3ba3 --- /dev/null +++ b/regen_docs.sh @@ -0,0 +1,10 @@ +git checkout -B docs-regen +cd integration_tests +dbt docs generate +mv -f target/*.json ../docs +mv -f target/*.html ../docs +git add . +git commit -am"updating docs site" +git push --set-upstream origin docs-regen +git checkout +git branch -D docs-regen