From 325082814520ed3bc66f5be4abeff42c9d313485 Mon Sep 17 00:00:00 2001 From: Claire Carroll Date: Thu, 23 Apr 2020 11:59:01 -0400 Subject: [PATCH] Handle hard deletes in snapshots --- .../macros/materializations/snapshot/snapshot.sql | 6 +++--- .../macros/materializations/snapshot/strategies.sql | 11 +++++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/core/dbt/include/global_project/macros/materializations/snapshot/snapshot.sql b/core/dbt/include/global_project/macros/materializations/snapshot/snapshot.sql index a0166931f34..d64b36bae9b 100644 --- a/core/dbt/include/global_project/macros/materializations/snapshot/snapshot.sql +++ b/core/dbt/include/global_project/macros/materializations/snapshot/snapshot.sql @@ -110,10 +110,10 @@ select 'update' as dbt_change_type, snapshotted_data.dbt_scd_id, - source_data.dbt_valid_from as dbt_valid_to + coalesce(source_data.dbt_valid_from, {{ snapshot_get_time() }}) as dbt_valid_to - from source_data - join snapshotted_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key + from snapshotted_data + left join source_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key where snapshotted_data.dbt_valid_to is null and ( {{ strategy.row_changed }} diff --git a/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql b/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql index 108d3f01709..2ddfbde216d 100644 --- a/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql +++ b/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql @@ -68,7 +68,10 @@ {% set updated_at = config['updated_at'] %} {% set row_changed_expr -%} - ({{ snapshotted_rel }}.{{ updated_at }} < {{ current_rel }}.{{ updated_at }}) + ({{ snapshotted_rel }}.{{ updated_at }} < {{ current_rel }}.{{ updated_at }} + -- this indicates that the source record was hard-deleted + or {{ current_rel }}.{{ primary_key }} is null + ) {%- endset %} {% set scd_id_expr = snapshot_hash_arguments([primary_key, updated_at]) %} @@ -148,12 +151,16 @@ {%- if column_added -%} TRUE {%- else -%} - {%- for col in check_cols -%} + -- this indicates that the source record was hard-deleted + {{ current_rel }}.{{ primary_key }} is null) or + {% for col in check_cols -%} {{ snapshotted_rel }}.{{ col }} != {{ current_rel }}.{{ col }} or ({{ snapshotted_rel }}.{{ col }} is null) != ({{ current_rel }}.{{ col }} is null) {%- if not loop.last %} or {% endif -%} {%- endfor -%} + + {%- endif -%} ) {%- endset %}