Skip to content

Commit

Permalink
Implement review
Browse files Browse the repository at this point in the history
  • Loading branch information
judahrand committed Mar 8, 2022
1 parent e85023d commit ecaa396
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 54 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -706,15 +706,15 @@ This macro returns the sql required to build a date spine. The spine will includ
}}
```

#### dedupe ([source](macros/sql/dedupe.sql))
This macro returns the sql required to remove deduplicate rows from a model or source.
#### deduplicate ([source](macros/sql/deduplicate.sql))
This macro returns the sql required to remove duplicate rows from a model or source.

**Usage:**

```
{{ dbt_utils.dedupe(
source('my_source', 'my_table'),
"user_id, cast(timestamp as day)",
{{ dbt_utils.deduplicate(
relation=source('my_source', 'my_table'),
group_by="user_id, cast(timestamp as day)",
order_by="timestamp desc"
)
}}
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion integration_tests/models/sql/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,4 @@ models:
- name: test_dedupe
tests:
- dbt_utils.equality:
compare_model: ref('data_dedupe_expected')
compare_model: ref('data_deduplicate_expected')
7 changes: 0 additions & 7 deletions integration_tests/models/sql/test_dedupe.sql

This file was deleted.

7 changes: 7 additions & 0 deletions integration_tests/models/sql/test_deduplicate.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
with deduped as (

{{ dbt_utils.deduplicate(ref('data_deduplicate'), group_by='user_id', order_by='version desc') | indent }}

)

select * from deduped
41 changes: 0 additions & 41 deletions macros/sql/dedupe.sql

This file was deleted.

46 changes: 46 additions & 0 deletions macros/sql/deduplicate.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{%- macro deduplicate(relation, group_by, order_by=none) -%}
{{ return(adapter.dispatch('deduplicate', 'dbt_utils')(relation, group_by, order_by=order_by)) }}
{% endmacro %}

{%- macro default__deduplicate(relation, group_by, order_by=none) -%}

select
{{ dbt_utils.star(relation, relation_alias='deduped') | indent }}
from (
select
_inner.*,
row_number() over (
partition by {{ group_by }}
{% if order_by is not none -%}
order by {{ order_by }}
{%- endif %}
) as rn
from {{ relation }} as _inner
) as deduped
where deduped.rn = 1

{%- endmacro -%}

{#
-- It is more performant to deduplicate using `array_agg` with a limit
-- clause in BigQuery:
-- https://github.com/dbt-labs/dbt-utils/issues/335#issuecomment-788157572
#}
{%- macro bigquery__deduplicate(relation, group_by, order_by=none) -%}

select
{{ dbt_utils.star(relation, relation_alias='deduped') | indent }}
from (
select
array_agg (
original
{% if order_by is not none -%}
order by {{ order_by }}
{%- endif %}
limit 1
)[offset(0)] as deduped
from {{ relation }} as original
group by {{ group_by }}
)

{%- endmacro -%}

0 comments on commit ecaa396

Please sign in to comment.