From b93fa4974ebc24e8a2c0725696dec8739aecb8a6 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Fri, 19 Apr 2024 16:56:11 +1200
Subject: [PATCH 01/59] Add macro for new hash-based comparison strategy

---
 .vscode/settings.json                         |  21 +++
 dbt_project.yml                               |   2 +
 integration_tests/dbt_project.yml             |   5 +
 .../unit_test_model_a.sql                     |   1 +
 .../unit_test_model_b.sql                     |   1 +
 .../unit_compare_queries.sql                  |   8 ++
 .../unit_reworked_compare.sql                 |   9 ++
 .../models/unit_test_wrappers/unit_tests.yml  | 124 ++++++++++++++++++
 macros/get_comparison_bounds.sql              |  22 ++++
 macros/reworked_compare.sql                   | 112 ++++++++++++++++
 10 files changed, 305 insertions(+)
 create mode 100644 .vscode/settings.json
 create mode 100644 integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql
 create mode 100644 integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_compare_queries.sql
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_tests.yml
 create mode 100644 macros/get_comparison_bounds.sql
 create mode 100644 macros/reworked_compare.sql

diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 00000000..437dcba6
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,21 @@
+{    
+    "yaml.schemas": {
+        "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/dbt_yml_files-latest.json": [
+            "/**/*.yml",
+            "!profiles.yml",
+            "!dbt_project.yml",
+            "!packages.yml",
+            "!selectors.yml",
+            "!profile_template.yml"
+        ],
+        "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/dbt_project-latest.json": [
+            "dbt_project.yml"
+        ],
+        "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/selectors-latest.json": [
+            "selectors.yml"
+        ],
+        "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/packages-latest.json": [
+            "packages.yml"
+        ]
+    },
+}
\ No newline at end of file
diff --git a/dbt_project.yml b/dbt_project.yml
index 987e03d1..e6fb7460 100644
--- a/dbt_project.yml
+++ b/dbt_project.yml
@@ -4,6 +4,8 @@ config-version: 2
 
 require-dbt-version: [">=1.2.0", "<2.0.0"]
 
+profile: joel_ska
+
 target-path: "target"
 clean-targets: ["target", "dbt_packages"]
 macro-paths: ["macros"]
diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml
index 07120e4c..13646b9e 100644
--- a/integration_tests/dbt_project.yml
+++ b/integration_tests/dbt_project.yml
@@ -17,3 +17,8 @@ clean-targets:         # directories to be removed by `dbt clean`
 
 seeds:
   +quote_columns: false
+
+vars:
+  compare_queries_summarize: true
+  reworked_compare__columns: []
+  reworked_compare__event_time:
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql
new file mode 100644
index 00000000..55a6c71e
--- /dev/null
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql
@@ -0,0 +1 @@
+select 1 as id, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql
new file mode 100644
index 00000000..55a6c71e
--- /dev/null
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql
@@ -0,0 +1 @@
+select 1 as id, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_compare_queries.sql b/integration_tests/models/unit_test_wrappers/unit_compare_queries.sql
new file mode 100644
index 00000000..c589ee53
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_compare_queries.sql
@@ -0,0 +1,8 @@
+
+{{ 
+    audit_helper.compare_queries(
+        "select * from " ~ ref('unit_test_model_a'),
+        "select * from " ~ ref('unit_test_model_b'),
+        summarize = var('compare_queries_summarize')
+    ) 
+}}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
new file mode 100644
index 00000000..157826a8
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
@@ -0,0 +1,9 @@
+{{ 
+    audit_helper.reworked_compare(
+        ref('unit_test_model_a'),
+        ref('unit_test_model_b'),
+        primary_key='id',
+        columns=var('reworked_compare__columns'),
+        event_time=var('reworked_compare__event_time')
+    ) 
+}}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_tests.yml b/integration_tests/models/unit_test_wrappers/unit_tests.yml
new file mode 100644
index 00000000..b067950d
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_tests.yml
@@ -0,0 +1,124 @@
+version: 2
+
+unit_tests:
+  - name: identical_records_compare_queries
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"in_a": true, "in_b": true}
+
+    model: unit_compare_queries
+    description: The world's most basic unit test. 
+    overrides:
+      vars:
+        compare_queries_summarize: true
+
+  - name: identical_records_compare_queries_no_summarize
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows: []
+
+    model: unit_compare_queries
+    description: The world's second most basic unit test.
+    overrides:
+      vars:
+        compare_queries_summarize: false
+
+  - name: reworked_compare_identical_tables
+    model: unit_reworked_compare
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"status": 'identical', 'id': 1, num_in_status: 3}
+        - {"status": 'identical', 'id': 2, num_in_status: 3}
+        - {"status": 'identical', 'id': 3, num_in_status: 3}
+
+  - name: reworked_compare_identical_tables_event_time_filter
+    model: unit_reworked_compare
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2', 'created_at']
+        reworked_compare__event_time: 'created_at'
+      macros: 
+        audit_helper.get_comparison_bounds:
+          "min_event_time": "2024-01-02"
+          "max_event_time": "2024-01-03"
+      
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def", "created_at": '2024-01-01' }
+          - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
+          - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
+          - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
+        
+    expect:
+      rows:
+        - {"status": 'identical', 'id': 2, num_in_status: 2}
+        - {"status": 'identical', 'id': 3, num_in_status: 2}
+
+  - name: reworked_compare_all_statuses
+    model: unit_reworked_compare
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "changed", "col2": "values" }
+          - { "id": 4, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"status": 'identical', 'id': 1, num_in_status: 1}
+        - {"status": 'modified', 'id': 2, num_in_status: 1}
+        - {"status": 'modified', 'id': 2, num_in_status: 1}
+        - {"status": 'removed', 'id': 3, num_in_status: 1}
+        - {"status": 'added', 'id': 4, num_in_status: 1}
diff --git a/macros/get_comparison_bounds.sql b/macros/get_comparison_bounds.sql
new file mode 100644
index 00000000..07903c19
--- /dev/null
+++ b/macros/get_comparison_bounds.sql
@@ -0,0 +1,22 @@
+{% macro get_comparison_bounds(a_relation, b_relation, event_time) %}
+    {% set min_max_queries %}
+        with min_maxes as (
+            select min({{ event_time }}) as min_event_time, max({{ event_time }}) as max_event_time
+            from {{ a_relation }}
+            union all 
+            select min({{ event_time }}) as min_event_time, max({{ event_time }}) as max_event_time
+            from {{ b_relation }}
+        )
+        select max(min_event_time) as "min_event_time", min(max_event_time) as "max_event_time"
+        from min_maxes
+    {% endset %}
+
+    {% set query_response = dbt_utils.get_query_results_as_dict(min_max_queries) %}
+    
+    {% set min_max_event_time_results = {} %}
+    {% for k in query_response.keys() %}
+        {% do min_max_event_time_results.update({k: query_response[k][0]}) %}
+    {% endfor %}
+    
+    {% do return(min_max_event_time_results) %}
+{% endmacro %}
\ No newline at end of file
diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
new file mode 100644
index 00000000..8d5c476e
--- /dev/null
+++ b/macros/reworked_compare.sql
@@ -0,0 +1,112 @@
+{% macro reworked_compare(a_relation, b_relation, primary_key=[], columns=[], event_time=None, sample_limit=20) %}
+
+    {% set joined_cols = columns | join(", ") %}
+
+    {% if event_time %}
+        {% set min_max_event_time_results = audit_helper.get_comparison_bounds(a_relation, b_relation, event_time) %}
+        {% set min_event_time = min_max_event_time_results["min_event_time"] %}
+        {% set max_event_time = min_max_event_time_results["max_event_time"] %}
+    {% endif %}
+
+    with a as (
+        select 
+            *,
+            hash({{ joined_cols }}) as dbt_compare_row_hash
+        from {{ a_relation }}
+        {% if min_event_time and max_event_time %}
+            where {{ event_time }} >= '{{ min_event_time }}'
+            and {{ event_time }} <= '{{ max_event_time }}'
+        {% endif %}
+    ),
+
+    b as (
+        select 
+            *,
+            hash({{ joined_cols }}) as dbt_compare_row_hash
+        from {{ b_relation }}
+        {% if min_event_time and max_event_time %}
+            where {{ event_time }} >= '{{ min_event_time }}'
+            and {{ event_time }} <= '{{ max_event_time }}'
+        {% endif %}
+    ),
+
+    a_intersect_b as (
+
+        select * from a
+        where a.dbt_compare_row_hash in (select b.dbt_compare_row_hash from b)
+
+    ),
+
+    a_except_b as (
+
+        select * from a
+        where a.dbt_compare_row_hash not in (select b.dbt_compare_row_hash from b)
+
+    ),
+
+    b_except_a as (
+
+        select * from b
+        where b.dbt_compare_row_hash not in (select a.dbt_compare_row_hash from a)
+
+    ),
+
+    all_records as (
+
+        select
+            *,
+            true as in_a,
+            true as in_b,
+        from a_intersect_b
+
+        union all
+
+        select
+            *,
+            true as in_a,
+            false as in_b
+        from a_except_b
+
+        union all
+
+        select
+            *,
+            false as in_a,
+            true as in_b
+        from b_except_a
+
+    ),
+
+
+    classified as (
+        
+        select 
+            *,
+            case 
+                when in_a and in_b then 'identical'
+                when {{ dbt.bool_or('in_a') }} over (partition by {{ primary_key }}) 
+                    and {{ dbt.bool_or('in_b') }} over (partition by {{ primary_key }})
+                then 'modified'
+                when in_a then 'removed'
+                when in_b then 'added'
+            end as status
+        from all_records
+        order by {{ primary_key ~ ", " if primary_key is not none }} in_a desc, in_b desc
+
+    ),
+
+    final as (
+        select 
+            *,
+            count(distinct {{ primary_key }}) over (partition by status) as num_in_status,
+            dense_rank() over (partition by status order by {{ primary_key }}) as sample_number
+        from classified
+    )
+
+    select * from final
+    {% if sample_limit %}
+        where sample_number <= {{ sample_limit }}
+    {% endif %}
+    order by status, sample_number
+
+{% endmacro %}
\ No newline at end of file

From d3dfa77f277c51507d83ffd5601d5a77a5c9706d Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Fri, 19 Apr 2024 20:52:02 +1200
Subject: [PATCH 02/59] split out SF-focused version of macro

---
 macros/get_comparison_bounds.sql |   4 +-
 macros/reworked_compare.sql      | 145 ++++++++++++++++++++++---------
 2 files changed, 106 insertions(+), 43 deletions(-)

diff --git a/macros/get_comparison_bounds.sql b/macros/get_comparison_bounds.sql
index 07903c19..e5f50f63 100644
--- a/macros/get_comparison_bounds.sql
+++ b/macros/get_comparison_bounds.sql
@@ -7,7 +7,7 @@
             select min({{ event_time }}) as min_event_time, max({{ event_time }}) as max_event_time
             from {{ b_relation }}
         )
-        select max(min_event_time) as "min_event_time", min(max_event_time) as "max_event_time"
+        select max(min_event_time) as min_event_time, min(max_event_time) as max_event_time
         from min_maxes
     {% endset %}
 
@@ -15,7 +15,7 @@
     
     {% set min_max_event_time_results = {} %}
     {% for k in query_response.keys() %}
-        {% do min_max_event_time_results.update({k: query_response[k][0]}) %}
+        {% do min_max_event_time_results.update({k | lower: query_response[k][0]}) %}
     {% endfor %}
     
     {% do return(min_max_event_time_results) %}
diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index 8d5c476e..6f595659 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -6,50 +6,18 @@
         {% set min_max_event_time_results = audit_helper.get_comparison_bounds(a_relation, b_relation, event_time) %}
         {% set min_event_time = min_max_event_time_results["min_event_time"] %}
         {% set max_event_time = min_max_event_time_results["max_event_time"] %}
+        {% set event_time_props = {
+            "event_time": event_time,
+            "min_event_time": min_event_time,
+            "max_event_time": max_event_time
+        } %}
     {% endif %}
 
-    with a as (
-        select 
-            *,
-            hash({{ joined_cols }}) as dbt_compare_row_hash
-        from {{ a_relation }}
-        {% if min_event_time and max_event_time %}
-            where {{ event_time }} >= '{{ min_event_time }}'
-            and {{ event_time }} <= '{{ max_event_time }}'
-        {% endif %}
-    ),
-
-    b as (
-        select 
-            *,
-            hash({{ joined_cols }}) as dbt_compare_row_hash
-        from {{ b_relation }}
-        {% if min_event_time and max_event_time %}
-            where {{ event_time }} >= '{{ min_event_time }}'
-            and {{ event_time }} <= '{{ max_event_time }}'
-        {% endif %}
-    ),
-
-    a_intersect_b as (
+    with 
 
-        select * from a
-        where a.dbt_compare_row_hash in (select b.dbt_compare_row_hash from b)
-
-    ),
-
-    a_except_b as (
-
-        select * from a
-        where a.dbt_compare_row_hash not in (select b.dbt_compare_row_hash from b)
-
-    ),
-
-    b_except_a as (
-
-        select * from b
-        where b.dbt_compare_row_hash not in (select a.dbt_compare_row_hash from a)
-
-    ),
+    {{ generate_set_results(a_relation, b_relation, columns, event_time_props)}}
+    
+    ,
 
     all_records as (
 
@@ -109,4 +77,99 @@
     {% endif %}
     order by status, sample_number
 
+{% endmacro %}
+
+{% macro generate_set_results(a_relation, b_relation, columns, event_time_props=None) %}
+  {{ return(adapter.dispatch('generate_set_results', 'audit_helper')(a_relation, b_relation, columns, event_time_props)) }}
+{% endmacro %}
+
+{% macro default__generate_set_results(a_relation, b_relation, columns, event_time_props) %}
+{% set columns_joined = columns | join(", ") %}
+
+    a as (
+        select {{ columns_joined }}
+        from {{ a_relation }}
+        {% if event_time_props %}
+            where {{ event_time }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    b as (
+        select {{ columns_joined }}
+        from {{ b_relation }}
+        {% if event_time_props %}
+            where {{ event_time }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    a_intersect_b as (
+
+        select * from a
+        {{ dbt.intersect() }}
+        select * from b
+
+    ),
+
+    a_except_b as (
+
+        select * from a
+        {{ dbt.except() }}
+        select * from b
+
+    ),
+
+    b_except_a as (
+
+        select * from b
+        {{ dbt.except() }}
+        select * from a
+
+    )
+{% endmacro %}
+
+{% macro snowflake__generate_set_results(a_relation, b_relation, columns, event_time_props) %}
+    a as (
+        select 
+            *,
+            hash({{ joined_cols }}) as dbt_compare_row_hash
+        from {{ a_relation }}
+        {% if event_time_props %}
+            where {{ event_time }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    b as (
+        select 
+            *,
+            hash({{ joined_cols }}) as dbt_compare_row_hash
+        from {{ b_relation }}
+        {% if event_time_props %}
+            where {{ event_time }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    a_intersect_b as (
+
+        select * from a
+        where a.dbt_compare_row_hash in (select b.dbt_compare_row_hash from b)
+
+    ),
+
+    a_except_b as (
+
+        select * from a
+        where a.dbt_compare_row_hash not in (select b.dbt_compare_row_hash from b)
+
+    ),
+
+    b_except_a as (
+
+        select * from b
+        where b.dbt_compare_row_hash not in (select a.dbt_compare_row_hash from a)
+
+    )
 {% endmacro %}
\ No newline at end of file

From 1a6c35fbe34732173fd3080ca3c6a437673c8150 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 23 Apr 2024 17:44:35 +1200
Subject: [PATCH 03/59] Fix change to complex object

---
 macros/reworked_compare.sql | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index 6f595659..977546fb 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -15,7 +15,7 @@
 
     with 
 
-    {{ generate_set_results(a_relation, b_relation, columns, event_time_props)}}
+    {{ audit_helper.generate_set_results(a_relation, b_relation, columns, event_time_props)}}
     
     ,
 
@@ -84,23 +84,23 @@
 {% endmacro %}
 
 {% macro default__generate_set_results(a_relation, b_relation, columns, event_time_props) %}
-{% set columns_joined = columns | join(", ") %}
+    {% set joined_cols = columns | join(", ") %}
 
     a as (
-        select {{ columns_joined }}
+        select {{ joined_cols }}
         from {{ a_relation }}
         {% if event_time_props %}
-            where {{ event_time }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time }} <= '{{ event_time_props["max_event_time"] }}'
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
         {% endif %}
     ),
 
     b as (
-        select {{ columns_joined }}
+        select {{ joined_cols }}
         from {{ b_relation }}
         {% if event_time_props %}
-            where {{ event_time }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time }} <= '{{ event_time_props["max_event_time"] }}'
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
         {% endif %}
     ),
 
@@ -130,14 +130,15 @@
 {% endmacro %}
 
 {% macro snowflake__generate_set_results(a_relation, b_relation, columns, event_time_props) %}
+    {% set joined_cols = columns | join(", ") %}
     a as (
         select 
             *,
             hash({{ joined_cols }}) as dbt_compare_row_hash
         from {{ a_relation }}
         {% if event_time_props %}
-            where {{ event_time }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time }} <= '{{ event_time_props["max_event_time"] }}'
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
         {% endif %}
     ),
 
@@ -147,8 +148,8 @@
             hash({{ joined_cols }}) as dbt_compare_row_hash
         from {{ b_relation }}
         {% if event_time_props %}
-            where {{ event_time }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time }} <= '{{ event_time_props["max_event_time"] }}'
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
         {% endif %}
     ),
 

From 4a7f1201306b596a1c574a1c66f0878c114b66b5 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 23 Apr 2024 17:51:01 +1200
Subject: [PATCH 04/59] Fix overuse of star

---
 .../unit_test_model_b_more_cols.sql           |  1 +
 ...worked_compare_column_details_mismatch.sql |  9 +++++++
 .../models/unit_test_wrappers/unit_tests.yml  | 26 +++++++++++++++++++
 macros/reworked_compare.sql                   |  4 +--
 package-lock.yml                              |  4 +++
 5 files changed, 42 insertions(+), 2 deletions(-)
 create mode 100644 integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql
 create mode 100644 package-lock.yml

diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql
new file mode 100644
index 00000000..f0105eae
--- /dev/null
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql
@@ -0,0 +1 @@
+select 1 as id, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at, 'pineapple' as pizza
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql
new file mode 100644
index 00000000..e1752942
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql
@@ -0,0 +1,9 @@
+{{ 
+    audit_helper.reworked_compare(
+        ref('unit_test_model_a'),
+        ref('unit_test_model_b_more_cols'),
+        primary_key='id',
+        columns=var('reworked_compare__columns'),
+        event_time=var('reworked_compare__event_time')
+    ) 
+}}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_tests.yml b/integration_tests/models/unit_test_wrappers/unit_tests.yml
index b067950d..d5013c72 100644
--- a/integration_tests/models/unit_test_wrappers/unit_tests.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_tests.yml
@@ -122,3 +122,29 @@ unit_tests:
         - {"status": 'modified', 'id': 2, num_in_status: 1}
         - {"status": 'removed', 'id': 3, num_in_status: 1}
         - {"status": 'added', 'id': 4, num_in_status: 1}
+
+  - name: reworked_compare_all_statuses_different_columns
+    model: unit_reworked_compare_column_details_mismatch
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1']
+        reworked_compare__event_time:
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b_more_cols')
+        rows:
+          - { "id": 1, "col1": "abc" }
+          - { "id": 2, "col1": "changed" }
+          - { "id": 4, "col1": "nop" }
+        
+    expect:
+      rows:
+        - {"status": 'identical', 'id': 1, num_in_status: 1}
+        - {"status": 'modified', 'id': 2, num_in_status: 1}
+        - {"status": 'modified', 'id': 2, num_in_status: 1}
+        - {"status": 'removed', 'id': 3, num_in_status: 1}
+        - {"status": 'added', 'id': 4, num_in_status: 1}
diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index 977546fb..a0cb217a 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -133,7 +133,7 @@
     {% set joined_cols = columns | join(", ") %}
     a as (
         select 
-            *,
+            {{ joined_cols }},
             hash({{ joined_cols }}) as dbt_compare_row_hash
         from {{ a_relation }}
         {% if event_time_props %}
@@ -144,7 +144,7 @@
 
     b as (
         select 
-            *,
+            {{ joined_cols }},
             hash({{ joined_cols }}) as dbt_compare_row_hash
         from {{ b_relation }}
         {% if event_time_props %}
diff --git a/package-lock.yml b/package-lock.yml
new file mode 100644
index 00000000..32c6ccc0
--- /dev/null
+++ b/package-lock.yml
@@ -0,0 +1,4 @@
+packages:
+  - package: dbt-labs/dbt_utils
+    version: 1.1.1
+sha1_hash: 106400343ad0c92a7417f5156d0d6c3893bb2429

From 87afbe92e7e85a16fed1ea4a14746e8daef1aa8e Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Thu, 25 Apr 2024 16:41:30 +1200
Subject: [PATCH 05/59] switch from compare rels to compare queries

---
 .../unit_reworked_compare.sql                 |  4 ++--
 ...worked_compare_column_details_mismatch.sql |  4 ++--
 macros/reworked_compare.sql                   | 22 +++++++++----------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
index 157826a8..b2947a17 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
@@ -1,7 +1,7 @@
 {{ 
     audit_helper.reworked_compare(
-        ref('unit_test_model_a'),
-        ref('unit_test_model_b'),
+        "select * from " ~ ref('unit_test_model_a'),
+        "select * from " ~ ref('unit_test_model_b'),
         primary_key='id',
         columns=var('reworked_compare__columns'),
         event_time=var('reworked_compare__event_time')
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql
index e1752942..0e4a17f1 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql
@@ -1,7 +1,7 @@
 {{ 
     audit_helper.reworked_compare(
-        ref('unit_test_model_a'),
-        ref('unit_test_model_b_more_cols'),
+        "select * from " ~ ref('unit_test_model_a'),
+        "select * from " ~ ref('unit_test_model_b_more_cols'),
         primary_key='id',
         columns=var('reworked_compare__columns'),
         event_time=var('reworked_compare__event_time')
diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index a0cb217a..febdd4d4 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -1,9 +1,9 @@
-{% macro reworked_compare(a_relation, b_relation, primary_key=[], columns=[], event_time=None, sample_limit=20) %}
+{% macro reworked_compare(a_query, b_query, primary_key=[], columns=[], event_time=None, sample_limit=20) %}
 
     {% set joined_cols = columns | join(", ") %}
 
     {% if event_time %}
-        {% set min_max_event_time_results = audit_helper.get_comparison_bounds(a_relation, b_relation, event_time) %}
+        {% set min_max_event_time_results = audit_helper.get_comparison_bounds(a_query, b_query, event_time) %}
         {% set min_event_time = min_max_event_time_results["min_event_time"] %}
         {% set max_event_time = min_max_event_time_results["max_event_time"] %}
         {% set event_time_props = {
@@ -15,7 +15,7 @@
 
     with 
 
-    {{ audit_helper.generate_set_results(a_relation, b_relation, columns, event_time_props)}}
+    {{ audit_helper.generate_set_results(a_query, b_query, columns, event_time_props)}}
     
     ,
 
@@ -79,16 +79,16 @@
 
 {% endmacro %}
 
-{% macro generate_set_results(a_relation, b_relation, columns, event_time_props=None) %}
-  {{ return(adapter.dispatch('generate_set_results', 'audit_helper')(a_relation, b_relation, columns, event_time_props)) }}
+{% macro generate_set_results(a_query, b_query, columns, event_time_props=None) %}
+  {{ return(adapter.dispatch('generate_set_results', 'audit_helper')(a_query, b_query, columns, event_time_props)) }}
 {% endmacro %}
 
-{% macro default__generate_set_results(a_relation, b_relation, columns, event_time_props) %}
+{% macro default__generate_set_results(a_query, b_query, columns, event_time_props) %}
     {% set joined_cols = columns | join(", ") %}
 
     a as (
         select {{ joined_cols }}
-        from {{ a_relation }}
+        from {{ a_query }}
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
             and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
@@ -97,7 +97,7 @@
 
     b as (
         select {{ joined_cols }}
-        from {{ b_relation }}
+        from {{ b_query }}
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
             and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
@@ -129,13 +129,13 @@
     )
 {% endmacro %}
 
-{% macro snowflake__generate_set_results(a_relation, b_relation, columns, event_time_props) %}
+{% macro snowflake__generate_set_results(a_query, b_query, columns, event_time_props) %}
     {% set joined_cols = columns | join(", ") %}
     a as (
         select 
             {{ joined_cols }},
             hash({{ joined_cols }}) as dbt_compare_row_hash
-        from {{ a_relation }}
+        from {{ a_query }}
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
             and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
@@ -146,7 +146,7 @@
         select 
             {{ joined_cols }},
             hash({{ joined_cols }}) as dbt_compare_row_hash
-        from {{ b_relation }}
+        from {{ b_query }}
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
             and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'

From e754ab74a3be7d2c11bd1267c0ea3898182587ed Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Thu, 25 Apr 2024 16:48:27 +1200
Subject: [PATCH 06/59] provide wrapping parens

---
 macros/reworked_compare.sql | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index febdd4d4..7e075598 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -88,7 +88,7 @@
 
     a as (
         select {{ joined_cols }}
-        from {{ a_query }}
+        from ( {{-  a_query  -}} )
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
             and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
@@ -97,7 +97,7 @@
 
     b as (
         select {{ joined_cols }}
-        from {{ b_query }}
+        from ( {{-  b_query  -}} )
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
             and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
@@ -135,7 +135,7 @@
         select 
             {{ joined_cols }},
             hash({{ joined_cols }}) as dbt_compare_row_hash
-        from {{ a_query }}
+        from ( {{-  a_query  -}} )
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
             and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
@@ -146,7 +146,7 @@
         select 
             {{ joined_cols }},
             hash({{ joined_cols }}) as dbt_compare_row_hash
-        from {{ b_query }}
+        from ( {{-  b_query  -}} )
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
             and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'

From e6be75cf86225fba9f9050fc64770d0398d8fe9c Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Thu, 25 Apr 2024 17:23:41 +1200
Subject: [PATCH 07/59] switch to array of columns for PK

---
 macros/reworked_compare.sql | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index 7e075598..eb87a5e7 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -1,6 +1,7 @@
-{% macro reworked_compare(a_query, b_query, primary_key=[], columns=[], event_time=None, sample_limit=20) %}
-
+{% macro reworked_compare(a_query, b_query, primary_key_columns=[], columns=[], event_time=None, sample_limit=20) %}
+    
     {% set joined_cols = columns | join(", ") %}
+    {% set primary_key = primary_key_columns | join(", ") }
 
     {% if event_time %}
         {% set min_max_event_time_results = audit_helper.get_comparison_bounds(a_query, b_query, event_time) %}
@@ -59,7 +60,7 @@
                 when in_b then 'added'
             end as status
         from all_records
-        order by {{ primary_key ~ ", " if primary_key is not none }} in_a desc, in_b desc
+        order by {{ primary_key }}, in_a desc, in_b desc
 
     ),
 

From 60fe426357d82ed2e21beb0d4d90bbd2427466d6 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Thu, 25 Apr 2024 17:24:03 +1200
Subject: [PATCH 08/59] split unit tests into own files, change unit tests to
 array pk

---
 .../unit_compare_queries.yml                  | 47 +++++++++++
 .../unit_reworked_compare.sql                 |  2 +-
 ...it_tests.yml => unit_reworked_compare.yml} | 83 ++-----------------
 ...worked_compare_column_details_mismatch.sql |  2 +-
 ...worked_compare_column_details_mismatch.yml | 26 ++++++
 5 files changed, 81 insertions(+), 79 deletions(-)
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_compare_queries.yml
 rename integration_tests/models/unit_test_wrappers/{unit_tests.yml => unit_reworked_compare.yml} (55%)
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.yml

diff --git a/integration_tests/models/unit_test_wrappers/unit_compare_queries.yml b/integration_tests/models/unit_test_wrappers/unit_compare_queries.yml
new file mode 100644
index 00000000..0308e509
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_compare_queries.yml
@@ -0,0 +1,47 @@
+unit_tests:
+  - name: identical_records_compare_queries
+    model: unit_compare_queries
+    description: The world's most basic unit test. 
+
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+    
+    expect:
+      rows:
+        - {"in_a": true, "in_b": true}
+
+    overrides:
+      vars:
+        compare_queries_summarize: true
+
+  - name: identical_records_compare_queries_no_summarize
+    model: unit_compare_queries
+    description: The world's second most basic unit test.
+
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows: []
+
+    overrides:
+      vars:
+        compare_queries_summarize: false
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
index b2947a17..fdcebaf7 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
@@ -2,7 +2,7 @@
     audit_helper.reworked_compare(
         "select * from " ~ ref('unit_test_model_a'),
         "select * from " ~ ref('unit_test_model_b'),
-        primary_key='id',
+        primary_key=['id'],
         columns=var('reworked_compare__columns'),
         event_time=var('reworked_compare__event_time')
     ) 
diff --git a/integration_tests/models/unit_test_wrappers/unit_tests.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
similarity index 55%
rename from integration_tests/models/unit_test_wrappers/unit_tests.yml
rename to integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
index d5013c72..49d3b394 100644
--- a/integration_tests/models/unit_test_wrappers/unit_tests.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -1,57 +1,7 @@
-version: 2
-
 unit_tests:
-  - name: identical_records_compare_queries
-    given:
-      - input: ref('unit_test_model_a')
-        rows:
-          - { "id": 1, "col1": "abc", "col2": "def" }
-          - { "id": 2, "col1": "hij", "col2": "klm" }
-          - { "id": 3, "col1": "nop", "col2": "qrs" }
-      - input: ref('unit_test_model_b')
-        rows:
-          - { "id": 1, "col1": "abc", "col2": "def" }
-          - { "id": 2, "col1": "hij", "col2": "klm" }
-          - { "id": 3, "col1": "nop", "col2": "qrs" }
-        
-    expect:
-      rows:
-        - {"in_a": true, "in_b": true}
-
-    model: unit_compare_queries
-    description: The world's most basic unit test. 
-    overrides:
-      vars:
-        compare_queries_summarize: true
-
-  - name: identical_records_compare_queries_no_summarize
-    given:
-      - input: ref('unit_test_model_a')
-        rows:
-          - { "id": 1, "col1": "abc", "col2": "def" }
-          - { "id": 2, "col1": "hij", "col2": "klm" }
-          - { "id": 3, "col1": "nop", "col2": "qrs" }
-      - input: ref('unit_test_model_b')
-        rows:
-          - { "id": 1, "col1": "abc", "col2": "def" }
-          - { "id": 2, "col1": "hij", "col2": "klm" }
-          - { "id": 3, "col1": "nop", "col2": "qrs" }
-        
-    expect:
-      rows: []
-
-    model: unit_compare_queries
-    description: The world's second most basic unit test.
-    overrides:
-      vars:
-        compare_queries_summarize: false
-
   - name: reworked_compare_identical_tables
     model: unit_reworked_compare
-    overrides:
-      vars:
-        reworked_compare__columns: ['id', 'col1', 'col2']
-        reworked_compare__event_time:
+    
     given:
       - input: ref('unit_test_model_a')
         rows:
@@ -70,6 +20,11 @@ unit_tests:
         - {"status": 'identical', 'id': 2, num_in_status: 3}
         - {"status": 'identical', 'id': 3, num_in_status: 3}
 
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+
   - name: reworked_compare_identical_tables_event_time_filter
     model: unit_reworked_compare
     overrides:
@@ -122,29 +77,3 @@ unit_tests:
         - {"status": 'modified', 'id': 2, num_in_status: 1}
         - {"status": 'removed', 'id': 3, num_in_status: 1}
         - {"status": 'added', 'id': 4, num_in_status: 1}
-
-  - name: reworked_compare_all_statuses_different_columns
-    model: unit_reworked_compare_column_details_mismatch
-    overrides:
-      vars:
-        reworked_compare__columns: ['id', 'col1']
-        reworked_compare__event_time:
-    given:
-      - input: ref('unit_test_model_a')
-        rows:
-          - { "id": 1, "col1": "abc", "col2": "def" }
-          - { "id": 2, "col1": "hij", "col2": "klm" }
-          - { "id": 3, "col1": "nop", "col2": "qrs" }
-      - input: ref('unit_test_model_b_more_cols')
-        rows:
-          - { "id": 1, "col1": "abc" }
-          - { "id": 2, "col1": "changed" }
-          - { "id": 4, "col1": "nop" }
-        
-    expect:
-      rows:
-        - {"status": 'identical', 'id': 1, num_in_status: 1}
-        - {"status": 'modified', 'id': 2, num_in_status: 1}
-        - {"status": 'modified', 'id': 2, num_in_status: 1}
-        - {"status": 'removed', 'id': 3, num_in_status: 1}
-        - {"status": 'added', 'id': 4, num_in_status: 1}
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql
index 0e4a17f1..2762c083 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql
@@ -2,7 +2,7 @@
     audit_helper.reworked_compare(
         "select * from " ~ ref('unit_test_model_a'),
         "select * from " ~ ref('unit_test_model_b_more_cols'),
-        primary_key='id',
+        primary_key=['id'],
         columns=var('reworked_compare__columns'),
         event_time=var('reworked_compare__event_time')
     ) 
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.yml
new file mode 100644
index 00000000..923d2584
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.yml
@@ -0,0 +1,26 @@
+unit_tests:
+  - name: reworked_compare_all_statuses_different_columns
+    model: unit_reworked_compare_column_details_mismatch
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1']
+        reworked_compare__event_time:
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b_more_cols')
+        rows:
+          - { "id": 1, "col1": "abc" }
+          - { "id": 2, "col1": "changed" }
+          - { "id": 4, "col1": "nop" }
+        
+    expect:
+      rows:
+        - {"status": 'identical', 'id': 1, num_in_status: 1}
+        - {"status": 'modified', 'id': 2, num_in_status: 1}
+        - {"status": 'modified', 'id': 2, num_in_status: 1}
+        - {"status": 'removed', 'id': 3, num_in_status: 1}
+        - {"status": 'added', 'id': 4, num_in_status: 1}

From 886728dae1e3fd5d59aff33b66257706f3d1c914 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Thu, 25 Apr 2024 18:41:50 +1200
Subject: [PATCH 09/59] tidy up get_comp_bounds

---
 macros/get_comparison_bounds.sql |  6 +++---
 macros/reworked_compare.sql      | 11 ++---------
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/macros/get_comparison_bounds.sql b/macros/get_comparison_bounds.sql
index e5f50f63..85f8fcdc 100644
--- a/macros/get_comparison_bounds.sql
+++ b/macros/get_comparison_bounds.sql
@@ -13,10 +13,10 @@
 
     {% set query_response = dbt_utils.get_query_results_as_dict(min_max_queries) %}
     
-    {% set min_max_event_time_results = {} %}
+    {% set event_time_props = {"event_time": event_time} %}
     {% for k in query_response.keys() %}
-        {% do min_max_event_time_results.update({k | lower: query_response[k][0]}) %}
+        {% do event_time_props.update({k | lower: query_response[k][0]}) %}
     {% endfor %}
     
-    {% do return(min_max_event_time_results) %}
+    {% do return(event_time_props) %}
 {% endmacro %}
\ No newline at end of file
diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index eb87a5e7..3378dbf0 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -1,17 +1,10 @@
 {% macro reworked_compare(a_query, b_query, primary_key_columns=[], columns=[], event_time=None, sample_limit=20) %}
     
     {% set joined_cols = columns | join(", ") %}
-    {% set primary_key = primary_key_columns | join(", ") }
+    {% set primary_key = primary_key_columns | join(", ") %}
 
     {% if event_time %}
-        {% set min_max_event_time_results = audit_helper.get_comparison_bounds(a_query, b_query, event_time) %}
-        {% set min_event_time = min_max_event_time_results["min_event_time"] %}
-        {% set max_event_time = min_max_event_time_results["max_event_time"] %}
-        {% set event_time_props = {
-            "event_time": event_time,
-            "min_event_time": min_event_time,
-            "max_event_time": max_event_time
-        } %}
+        {% set event_time_props = audit_helper.get_comparison_bounds(a_query, b_query, event_time) %}
     {% endif %}
 
     with 

From b53db5822e51082c077ff9726d278c2402e6383e Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Thu, 25 Apr 2024 18:42:10 +1200
Subject: [PATCH 10/59] fix arg rename

---
 .../models/unit_test_wrappers/unit_reworked_compare.sql         | 2 +-
 .../unit_reworked_compare_column_details_mismatch.sql           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
index fdcebaf7..38960022 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
@@ -2,7 +2,7 @@
     audit_helper.reworked_compare(
         "select * from " ~ ref('unit_test_model_a'),
         "select * from " ~ ref('unit_test_model_b'),
-        primary_key=['id'],
+        primary_key_columns=['id'],
         columns=var('reworked_compare__columns'),
         event_time=var('reworked_compare__event_time')
     ) 
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql
index 2762c083..d8ed546a 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql
@@ -2,7 +2,7 @@
     audit_helper.reworked_compare(
         "select * from " ~ ref('unit_test_model_a'),
         "select * from " ~ ref('unit_test_model_b_more_cols'),
-        primary_key=['id'],
+        primary_key_columns=['id'],
         columns=var('reworked_compare__columns'),
         event_time=var('reworked_compare__event_time')
     ) 

From 0d766d67ccf36f8de99882d09d52336378aed49e Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Thu, 25 Apr 2024 18:42:26 +1200
Subject: [PATCH 11/59] add quick_are_queries_identical and unit tests

---
 .../unit_quick_are_queries_identical.sql      |  8 +++
 .../unit_quick_are_queries_identical.yml      | 72 +++++++++++++++++++
 macros/quick_are_queries_identical.sql        | 42 +++++++++++
 3 files changed, 122 insertions(+)
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
 create mode 100644 macros/quick_are_queries_identical.sql

diff --git a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
new file mode 100644
index 00000000..72fd9e72
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
@@ -0,0 +1,8 @@
+{{ 
+    audit_helper.quick_are_queries_identical(
+        "select * from " ~ ref('unit_test_model_a'),
+        "select * from " ~ ref('unit_test_model_b'),
+        columns=var('quick_are_queries_identical_cols'),
+        event_time=var('quick_are_queries_identical_event_time')
+    ) 
+}}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
new file mode 100644
index 00000000..3a43e843
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
@@ -0,0 +1,72 @@
+unit_tests:
+  - name: quick_are_queries_identical_identical_tables
+    model: quick_are_queries_identical
+    
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"are_tables_identical": true}
+
+    overrides:
+      vars:
+        quick_are_queries_identical_cols: ['id', 'col1', 'col2']
+        quick_are_queries_identical_event_time:
+
+  - name: quick_are_queries_identical_identical_tables_event_time_filter
+    model: quick_are_queries_identical
+    overrides:
+      vars:
+        quick_are_queries_identical_cols: ['id', 'col1', 'col2', 'created_at']
+        quick_are_queries_identical_event_time: 'created_at'
+      macros: 
+        audit_helper.get_comparison_bounds:
+          "min_event_time": "2024-01-02"
+          "max_event_time": "2024-01-03"
+      
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def", "created_at": '2024-01-01' }
+          - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
+          - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
+          - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
+        
+    expect:
+      rows:
+        - {"are_tables_identical": true}
+
+  - name: quick_are_queries_identical_differences
+    model: unit_reworked_compare
+    overrides:
+      vars:
+        quick_are_queries_identical_cols: ['id', 'col1', 'col2']
+        quick_are_queries_identical_event_time:
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "changed", "col2": "values" }
+          - { "id": 4, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"are_tables_identical": false}
\ No newline at end of file
diff --git a/macros/quick_are_queries_identical.sql b/macros/quick_are_queries_identical.sql
new file mode 100644
index 00000000..218c78bf
--- /dev/null
+++ b/macros/quick_are_queries_identical.sql
@@ -0,0 +1,42 @@
+{% macro quick_are_queries_identical(query_a, query_b, columns=[], event_time=None) %}
+    {{ return (adapter.dispatch('quick_are_queries_identical', 'audit_helper')(query_a, query_b, columns, event_time)) }}
+{% endmacro %}
+
+{% macro default__quick_are_queries_identical(query_a, query_b, columns, event_time) %}
+    {% set joined_cols = columns | join(", ") %}
+    {% if event_time %}
+        {% set event_time_props = audit_helper.get_comparison_bounds(a_query, b_query, event_time) %}
+    {% endif %}
+
+    select count(hash_result) = 1 as are_tables_identical
+    from (
+        select hash_agg(joined_cols) as hash_result
+        from ({{ query_a }})
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+
+        union 
+        
+        select hash_agg(joined_cols) as hash_result
+        from analytics_dev.dbt_jlabes.fct_dbt_invocations
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+
+    ) as hashes
+{% endmacro %}
+
+{% macro is_quick_are_queries_identical_supported() %}
+    {{ return (adapter.dispatch('is_quick_are_queries_identical_supported', 'audit_helper')()) }}
+{% endmacro %}
+
+{% macro default__is_quick_are_queries_identical_supported() %}
+    {{ return (False) }}
+{% endmacro %}
+
+{% macro snowflake__is_quick_are_queries_identical_supported() %}
+    {{ return (True) }}
+{% endmacro %}
\ No newline at end of file

From c8ccf596004f7b9438f5ff65f9524097cb812a38 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Mon, 6 May 2024 16:30:34 +1200
Subject: [PATCH 12/59] Move data tests into own directory

---
 .../compare_all_columns_concat_pk_with_summary.sql                | 0
 .../compare_all_columns_concat_pk_without_summary.sql             | 0
 .../models/{ => data_tests}/compare_all_columns_where_clause.sql  | 0
 .../models/{ => data_tests}/compare_all_columns_with_summary.sql  | 0
 .../compare_all_columns_with_summary_and_exclude.sql              | 0
 .../{ => data_tests}/compare_all_columns_without_summary.sql      | 0
 integration_tests/models/{ => data_tests}/compare_queries.sql     | 0
 .../compare_queries_concat_pk_without_summary.sql                 | 0
 .../models/{ => data_tests}/compare_queries_with_summary.sql      | 0
 .../models/{ => data_tests}/compare_queries_without_summary.sql   | 0
 .../models/{ => data_tests}/compare_relation_columns.sql          | 0
 .../compare_relations_concat_pk_without_summary.sql               | 0
 .../models/{ => data_tests}/compare_relations_with_exclude.sql    | 0
 .../models/{ => data_tests}/compare_relations_with_summary.sql    | 0
 .../models/{ => data_tests}/compare_relations_without_exclude.sql | 0
 .../models/{ => data_tests}/compare_relations_without_summary.sql | 0
 integration_tests/models/{ => data_tests}/compare_row_counts.sql  | 0
 .../models/{ => data_tests}/compare_which_columns_differ.sql      | 0
 .../compare_which_columns_differ_exclude_cols.sql                 | 0
 integration_tests/models/{ => data_tests}/schema.yml              | 0
 20 files changed, 0 insertions(+), 0 deletions(-)
 rename integration_tests/models/{ => data_tests}/compare_all_columns_concat_pk_with_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_all_columns_concat_pk_without_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_all_columns_where_clause.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_all_columns_with_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_all_columns_with_summary_and_exclude.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_all_columns_without_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_queries.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_queries_concat_pk_without_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_queries_with_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_queries_without_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_relation_columns.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_relations_concat_pk_without_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_relations_with_exclude.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_relations_with_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_relations_without_exclude.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_relations_without_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_row_counts.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_which_columns_differ.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_which_columns_differ_exclude_cols.sql (100%)
 rename integration_tests/models/{ => data_tests}/schema.yml (100%)

diff --git a/integration_tests/models/compare_all_columns_concat_pk_with_summary.sql b/integration_tests/models/data_tests/compare_all_columns_concat_pk_with_summary.sql
similarity index 100%
rename from integration_tests/models/compare_all_columns_concat_pk_with_summary.sql
rename to integration_tests/models/data_tests/compare_all_columns_concat_pk_with_summary.sql
diff --git a/integration_tests/models/compare_all_columns_concat_pk_without_summary.sql b/integration_tests/models/data_tests/compare_all_columns_concat_pk_without_summary.sql
similarity index 100%
rename from integration_tests/models/compare_all_columns_concat_pk_without_summary.sql
rename to integration_tests/models/data_tests/compare_all_columns_concat_pk_without_summary.sql
diff --git a/integration_tests/models/compare_all_columns_where_clause.sql b/integration_tests/models/data_tests/compare_all_columns_where_clause.sql
similarity index 100%
rename from integration_tests/models/compare_all_columns_where_clause.sql
rename to integration_tests/models/data_tests/compare_all_columns_where_clause.sql
diff --git a/integration_tests/models/compare_all_columns_with_summary.sql b/integration_tests/models/data_tests/compare_all_columns_with_summary.sql
similarity index 100%
rename from integration_tests/models/compare_all_columns_with_summary.sql
rename to integration_tests/models/data_tests/compare_all_columns_with_summary.sql
diff --git a/integration_tests/models/compare_all_columns_with_summary_and_exclude.sql b/integration_tests/models/data_tests/compare_all_columns_with_summary_and_exclude.sql
similarity index 100%
rename from integration_tests/models/compare_all_columns_with_summary_and_exclude.sql
rename to integration_tests/models/data_tests/compare_all_columns_with_summary_and_exclude.sql
diff --git a/integration_tests/models/compare_all_columns_without_summary.sql b/integration_tests/models/data_tests/compare_all_columns_without_summary.sql
similarity index 100%
rename from integration_tests/models/compare_all_columns_without_summary.sql
rename to integration_tests/models/data_tests/compare_all_columns_without_summary.sql
diff --git a/integration_tests/models/compare_queries.sql b/integration_tests/models/data_tests/compare_queries.sql
similarity index 100%
rename from integration_tests/models/compare_queries.sql
rename to integration_tests/models/data_tests/compare_queries.sql
diff --git a/integration_tests/models/compare_queries_concat_pk_without_summary.sql b/integration_tests/models/data_tests/compare_queries_concat_pk_without_summary.sql
similarity index 100%
rename from integration_tests/models/compare_queries_concat_pk_without_summary.sql
rename to integration_tests/models/data_tests/compare_queries_concat_pk_without_summary.sql
diff --git a/integration_tests/models/compare_queries_with_summary.sql b/integration_tests/models/data_tests/compare_queries_with_summary.sql
similarity index 100%
rename from integration_tests/models/compare_queries_with_summary.sql
rename to integration_tests/models/data_tests/compare_queries_with_summary.sql
diff --git a/integration_tests/models/compare_queries_without_summary.sql b/integration_tests/models/data_tests/compare_queries_without_summary.sql
similarity index 100%
rename from integration_tests/models/compare_queries_without_summary.sql
rename to integration_tests/models/data_tests/compare_queries_without_summary.sql
diff --git a/integration_tests/models/compare_relation_columns.sql b/integration_tests/models/data_tests/compare_relation_columns.sql
similarity index 100%
rename from integration_tests/models/compare_relation_columns.sql
rename to integration_tests/models/data_tests/compare_relation_columns.sql
diff --git a/integration_tests/models/compare_relations_concat_pk_without_summary.sql b/integration_tests/models/data_tests/compare_relations_concat_pk_without_summary.sql
similarity index 100%
rename from integration_tests/models/compare_relations_concat_pk_without_summary.sql
rename to integration_tests/models/data_tests/compare_relations_concat_pk_without_summary.sql
diff --git a/integration_tests/models/compare_relations_with_exclude.sql b/integration_tests/models/data_tests/compare_relations_with_exclude.sql
similarity index 100%
rename from integration_tests/models/compare_relations_with_exclude.sql
rename to integration_tests/models/data_tests/compare_relations_with_exclude.sql
diff --git a/integration_tests/models/compare_relations_with_summary.sql b/integration_tests/models/data_tests/compare_relations_with_summary.sql
similarity index 100%
rename from integration_tests/models/compare_relations_with_summary.sql
rename to integration_tests/models/data_tests/compare_relations_with_summary.sql
diff --git a/integration_tests/models/compare_relations_without_exclude.sql b/integration_tests/models/data_tests/compare_relations_without_exclude.sql
similarity index 100%
rename from integration_tests/models/compare_relations_without_exclude.sql
rename to integration_tests/models/data_tests/compare_relations_without_exclude.sql
diff --git a/integration_tests/models/compare_relations_without_summary.sql b/integration_tests/models/data_tests/compare_relations_without_summary.sql
similarity index 100%
rename from integration_tests/models/compare_relations_without_summary.sql
rename to integration_tests/models/data_tests/compare_relations_without_summary.sql
diff --git a/integration_tests/models/compare_row_counts.sql b/integration_tests/models/data_tests/compare_row_counts.sql
similarity index 100%
rename from integration_tests/models/compare_row_counts.sql
rename to integration_tests/models/data_tests/compare_row_counts.sql
diff --git a/integration_tests/models/compare_which_columns_differ.sql b/integration_tests/models/data_tests/compare_which_columns_differ.sql
similarity index 100%
rename from integration_tests/models/compare_which_columns_differ.sql
rename to integration_tests/models/data_tests/compare_which_columns_differ.sql
diff --git a/integration_tests/models/compare_which_columns_differ_exclude_cols.sql b/integration_tests/models/data_tests/compare_which_columns_differ_exclude_cols.sql
similarity index 100%
rename from integration_tests/models/compare_which_columns_differ_exclude_cols.sql
rename to integration_tests/models/data_tests/compare_which_columns_differ_exclude_cols.sql
diff --git a/integration_tests/models/schema.yml b/integration_tests/models/data_tests/schema.yml
similarity index 100%
rename from integration_tests/models/schema.yml
rename to integration_tests/models/data_tests/schema.yml

From 58751e6495118193fbb1386e8b6a97992aa41775 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Mon, 6 May 2024 16:34:07 +1200
Subject: [PATCH 13/59] Add test for multiple PKs

---
 integration_tests/dbt_project.yml             |  4 ++-
 .../unit_test_model_a.sql                     |  2 +-
 .../unit_test_model_b.sql                     |  2 +-
 .../unit_test_model_b_more_cols.sql           |  2 +-
 .../unit_reworked_compare.sql                 |  2 +-
 .../unit_reworked_compare.yml                 | 27 +++++++++++++++++++
 6 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml
index 13646b9e..f23704fa 100644
--- a/integration_tests/dbt_project.yml
+++ b/integration_tests/dbt_project.yml
@@ -20,5 +20,7 @@ seeds:
 
 vars:
   compare_queries_summarize: true
+  reworked_compare__primary_key_columns: []
   reworked_compare__columns: []
-  reworked_compare__event_time:
\ No newline at end of file
+  reworked_compare__event_time:
+  quick_are_queries_identical_cols: []
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql
index 55a6c71e..183f26ca 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql
@@ -1 +1 @@
-select 1 as id, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at
\ No newline at end of file
+select 1 as id, 2 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql
index 55a6c71e..183f26ca 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql
@@ -1 +1 @@
-select 1 as id, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at
\ No newline at end of file
+select 1 as id, 2 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql
index f0105eae..11476a96 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql
@@ -1 +1 @@
-select 1 as id, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at, 'pineapple' as pizza
\ No newline at end of file
+select 1 as id, 2 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at, 'pineapple' as pizza
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
index 38960022..37473546 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
@@ -2,7 +2,7 @@
     audit_helper.reworked_compare(
         "select * from " ~ ref('unit_test_model_a'),
         "select * from " ~ ref('unit_test_model_b'),
-        primary_key_columns=['id'],
+        primary_key_columns=var('reworked_compare__primary_key_columns'),
         columns=var('reworked_compare__columns'),
         event_time=var('reworked_compare__event_time')
     ) 
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
index 49d3b394..1b3561a2 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -24,6 +24,7 @@ unit_tests:
       vars:
         reworked_compare__columns: ['id', 'col1', 'col2']
         reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
 
   - name: reworked_compare_identical_tables_event_time_filter
     model: unit_reworked_compare
@@ -31,6 +32,7 @@ unit_tests:
       vars:
         reworked_compare__columns: ['id', 'col1', 'col2', 'created_at']
         reworked_compare__event_time: 'created_at'
+        reworked_compare__primary_key_columns: ['id']
       macros: 
         audit_helper.get_comparison_bounds:
           "min_event_time": "2024-01-02"
@@ -58,6 +60,7 @@ unit_tests:
       vars:
         reworked_compare__columns: ['id', 'col1', 'col2']
         reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
     given:
       - input: ref('unit_test_model_a')
         rows:
@@ -77,3 +80,27 @@ unit_tests:
         - {"status": 'modified', 'id': 2, num_in_status: 1}
         - {"status": 'removed', 'id': 3, num_in_status: 1}
         - {"status": 'added', 'id': 4, num_in_status: 1}
+
+  - name: reworked_compare_all_statuses_multiple_pk_cols
+    model: unit_reworked_compare
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'id_2', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id', 'id_2']
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 12, "id_2": 3, "col1": "abc", "col2": "def" }
+          - { "id": 1, "id_2": 23, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "id_2": 4, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 12, "id_2": 3, "col1": "abc", "col2": "def" }
+          - { "id": 1, "id_2": 23, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "id_2": 4, "col1": "nop", "col2": "qrs" }        
+    expect:
+      rows:
+        - {"status": 'identical', 'id': 12, "id_2": 3, "num_in_status": 3}
+        - {"status": 'identical', 'id': 1, "id_2": 23, "num_in_status": 3}
+        - {"status": 'identical', 'id': 3, "id_2": 4, "num_in_status": 3}
\ No newline at end of file

From 022b91b0dc2346b5716f498daddaea6a2e98e3c0 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Mon, 6 May 2024 16:44:14 +1200
Subject: [PATCH 14/59] fix incorrect unit test configs

---
 .../unit_test_wrappers/unit_quick_are_queries_identical.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
index 3a43e843..7690a3fb 100644
--- a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
@@ -1,6 +1,6 @@
 unit_tests:
   - name: quick_are_queries_identical_identical_tables
-    model: quick_are_queries_identical
+    model: unit_quick_are_queries_identical
     
     given:
       - input: ref('unit_test_model_a')
@@ -24,7 +24,7 @@ unit_tests:
         quick_are_queries_identical_event_time:
 
   - name: quick_are_queries_identical_identical_tables_event_time_filter
-    model: quick_are_queries_identical
+    model: unit_quick_are_queries_identical
     overrides:
       vars:
         quick_are_queries_identical_cols: ['id', 'col1', 'col2', 'created_at']
@@ -50,7 +50,7 @@ unit_tests:
         - {"are_tables_identical": true}
 
   - name: quick_are_queries_identical_differences
-    model: unit_reworked_compare
+    model: unit_quick_are_queries_identical
     overrides:
       vars:
         quick_are_queries_identical_cols: ['id', 'col1', 'col2']

From bef6e1838231054f4a1f57abbbb5a996bab498ae Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Mon, 6 May 2024 16:59:50 +1200
Subject: [PATCH 15/59] make data types for id and id_2 big enough nums

---
 .../models/unit_test_placeholder_models/unit_test_model_a.sql   | 2 +-
 .../models/unit_test_placeholder_models/unit_test_model_b.sql   | 2 +-
 .../unit_test_model_b_more_cols.sql                             | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql
index 183f26ca..3c729df2 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql
@@ -1 +1 @@
-select 1 as id, 2 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at
\ No newline at end of file
+select 12 as id, 22 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql
index 183f26ca..3c729df2 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql
@@ -1 +1 @@
-select 1 as id, 2 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at
\ No newline at end of file
+select 12 as id, 22 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql
index 11476a96..b9e425b7 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql
@@ -1 +1 @@
-select 1 as id, 2 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at, 'pineapple' as pizza
\ No newline at end of file
+select 12 as id, 22 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at, 'pineapple' as pizza
\ No newline at end of file

From 0f1e09ece6e4146da012d6cefe1a07febefa9b75 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Mon, 6 May 2024 17:18:21 +1200
Subject: [PATCH 16/59] Mock event_time response

---
 .../unit_test_wrappers/unit_quick_are_queries_identical.yml      | 1 +
 .../models/unit_test_wrappers/unit_reworked_compare.yml          | 1 +
 2 files changed, 2 insertions(+)

diff --git a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
index 7690a3fb..32f458d5 100644
--- a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
@@ -33,6 +33,7 @@ unit_tests:
         audit_helper.get_comparison_bounds:
           "min_event_time": "2024-01-02"
           "max_event_time": "2024-01-03"
+          "event_time": 'created_at'
       
     given:
       - input: ref('unit_test_model_a')
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
index 1b3561a2..91cc7f4a 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -37,6 +37,7 @@ unit_tests:
         audit_helper.get_comparison_bounds:
           "min_event_time": "2024-01-02"
           "max_event_time": "2024-01-03"
+          "event_time": 'created_at'
       
     given:
       - input: ref('unit_test_model_a')

From 33e4c507585abfe4f27227d4de4ad6c1ef5959fe Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Mon, 6 May 2024 17:30:24 +1200
Subject: [PATCH 17/59] fix hardcoded value in quick_are_qs_identical

---
 integration_tests/dbt_project.yml      | 3 ++-
 macros/quick_are_queries_identical.sql | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml
index f23704fa..39664887 100644
--- a/integration_tests/dbt_project.yml
+++ b/integration_tests/dbt_project.yml
@@ -23,4 +23,5 @@ vars:
   reworked_compare__primary_key_columns: []
   reworked_compare__columns: []
   reworked_compare__event_time:
-  quick_are_queries_identical_cols: []
\ No newline at end of file
+  quick_are_queries_identical_cols: []
+  quick_are_queries_identical_event_time:
\ No newline at end of file
diff --git a/macros/quick_are_queries_identical.sql b/macros/quick_are_queries_identical.sql
index 218c78bf..466b4e6f 100644
--- a/macros/quick_are_queries_identical.sql
+++ b/macros/quick_are_queries_identical.sql
@@ -10,7 +10,7 @@
 
     select count(hash_result) = 1 as are_tables_identical
     from (
-        select hash_agg(joined_cols) as hash_result
+        select hash_agg({{ joined_cols }}) as hash_result
         from ({{ query_a }})
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
@@ -19,8 +19,8 @@
 
         union 
         
-        select hash_agg(joined_cols) as hash_result
-        from analytics_dev.dbt_jlabes.fct_dbt_invocations
+        select hash_agg({{ joined_cols }}) as hash_result
+        from ({{ query_b }})
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
             and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'

From 0df1b6f3ad06bfbe550b1a684ed29ccb8b9fbaf3 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 7 May 2024 15:20:18 +1200
Subject: [PATCH 18/59] Add unit tests for null handling (still broken)

---
 .../unit_quick_are_queries_identical.yml      | 26 +++++-
 .../unit_reworked_compare.yml                 | 88 ++++++++++++++++++-
 macros/reworked_compare.sql                   | 42 +++++----
 3 files changed, 135 insertions(+), 21 deletions(-)

diff --git a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
index 32f458d5..0d953506 100644
--- a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
@@ -70,4 +70,28 @@ unit_tests:
         
     expect:
       rows:
-        - {"are_tables_identical": false}
\ No newline at end of file
+        - {"are_tables_identical": false}
+
+  - name: quick_are_queries_identical_identical_tables_with_null_pks
+    model: unit_quick_are_queries_identical
+    
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id":, "col1": "abc", "col2": "def" }
+          - { "id":, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id":, "col1": "abc", "col2": "def" }
+          - { "id":, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"are_tables_identical": true}
+
+    overrides:
+      vars:
+        quick_are_queries_identical_cols: ['id', 'col1', 'col2']
+        quick_are_queries_identical_event_time:
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
index 91cc7f4a..a7b111ee 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -82,7 +82,7 @@ unit_tests:
         - {"status": 'removed', 'id': 3, num_in_status: 1}
         - {"status": 'added', 'id': 4, num_in_status: 1}
 
-  - name: reworked_compare_all_statuses_multiple_pk_cols
+  - name: reworked_compare_identical_tables_multiple_pk_cols
     model: unit_reworked_compare
     overrides:
       vars:
@@ -104,4 +104,88 @@ unit_tests:
       rows:
         - {"status": 'identical', 'id': 12, "id_2": 3, "num_in_status": 3}
         - {"status": 'identical', 'id': 1, "id_2": 23, "num_in_status": 3}
-        - {"status": 'identical', 'id': 3, "id_2": 4, "num_in_status": 3}
\ No newline at end of file
+        - {"status": 'identical', 'id': 3, "id_2": 4, "num_in_status": 3}
+
+  - name: reworked_compare_identical_tables_single_null_pk
+    model: unit_reworked_compare
+    
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"status": 'identical', 'id': , num_in_status: 3}
+        - {"status": 'identical', 'id': 2, num_in_status: 3}
+        - {"status": 'identical', 'id': 3, num_in_status: 3}
+
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+
+  - name: reworked_compare_identical_tables_multiple_null_pk
+    model: unit_reworked_compare
+    
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": , "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": , "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"status": 'identical', 'id': 3, num_in_status: 3}
+        - {"status": 'identical', 'id': , num_in_status: 3}
+        - {"status": 'identical', 'id': , num_in_status: 3}
+
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+
+  - name: reworked_compare_identical_tables_multiple_null_pk_with_duplicate_rows
+    description: The two rows with a null ID are identical. They should both be returned as individual rows instead of being combined
+    model: unit_reworked_compare
+    
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"status": 'identical', 'id': 3, num_in_status: 3}
+        - {"status": 'identical', 'id': , num_in_status: 3}
+        - {"status": 'identical', 'id': , num_in_status: 3}
+        - {"status": 'added', 'id': , num_in_status: 1}
+
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
\ No newline at end of file
diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index 3378dbf0..792697a6 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -9,7 +9,7 @@
 
     with 
 
-    {{ audit_helper.generate_set_results(a_query, b_query, columns, event_time_props)}}
+    {{ audit_helper.generate_set_results(a_query, b_query, primary_key, columns, event_time_props)}}
     
     ,
 
@@ -46,8 +46,8 @@
             *,
             case 
                 when in_a and in_b then 'identical'
-                when {{ dbt.bool_or('in_a') }} over (partition by {{ primary_key }}) 
-                    and {{ dbt.bool_or('in_b') }} over (partition by {{ primary_key }})
+                when {{ dbt.bool_or('in_a') }} over (partition by {{ primary_key }}, dbt_audit_pk_row_num) 
+                    and {{ dbt.bool_or('in_b') }} over (partition by {{ primary_key }}, dbt_audit_pk_row_num)
                 then 'modified'
                 when in_a then 'removed'
                 when in_b then 'added'
@@ -60,8 +60,8 @@
     final as (
         select 
             *,
-            count(distinct {{ primary_key }}) over (partition by status) as num_in_status,
-            dense_rank() over (partition by status order by {{ primary_key }}) as sample_number
+            count(distinct {{ primary_key }}, dbt_audit_pk_row_num) over (partition by status) as num_in_status,
+            dense_rank() over (partition by status order by {{ primary_key }}, dbt_audit_pk_row_num) as sample_number
         from classified
     )
 
@@ -73,15 +73,17 @@
 
 {% endmacro %}
 
-{% macro generate_set_results(a_query, b_query, columns, event_time_props=None) %}
-  {{ return(adapter.dispatch('generate_set_results', 'audit_helper')(a_query, b_query, columns, event_time_props)) }}
+{% macro generate_set_results(a_query, b_query, primary_key, columns, event_time_props=None) %}
+  {{ return(adapter.dispatch('generate_set_results', 'audit_helper')(a_query, b_query, primary_key, columns, event_time_props)) }}
 {% endmacro %}
 
-{% macro default__generate_set_results(a_query, b_query, columns, event_time_props) %}
+{% macro default__generate_set_results(a_query, b_query, primary_key, columns, event_time_props) %}
     {% set joined_cols = columns | join(", ") %}
 
     a as (
-        select {{ joined_cols }}
+        select 
+            {{ joined_cols }}, 
+            row_number() over (partition by {{ primary_key }} order by {{ primary_key}} ) as dbt_audit_pk_row_num
         from ( {{-  a_query  -}} )
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
@@ -90,7 +92,9 @@
     ),
 
     b as (
-        select {{ joined_cols }}
+        select 
+            {{ joined_cols }}, 
+            row_number() over (partition by {{ primary_key }} order by {{ primary_key}} ) as dbt_audit_pk_row_num
         from ( {{-  b_query  -}} )
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
@@ -123,12 +127,13 @@
     )
 {% endmacro %}
 
-{% macro snowflake__generate_set_results(a_query, b_query, columns, event_time_props) %}
+{% macro snowflake__generate_set_results(a_query, b_query, primary_key, columns, event_time_props) %}
     {% set joined_cols = columns | join(", ") %}
     a as (
         select 
-            {{ joined_cols }},
-            hash({{ joined_cols }}) as dbt_compare_row_hash
+            {{ joined_cols }}, 
+            row_number() over (partition by {{ primary_key }} order by {{ primary_key}} ) as dbt_audit_pk_row_num,
+            hash({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
         from ( {{-  a_query  -}} )
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
@@ -138,8 +143,9 @@
 
     b as (
         select 
-            {{ joined_cols }},
-            hash({{ joined_cols }}) as dbt_compare_row_hash
+            {{ joined_cols }}, 
+            row_number() over (partition by {{ primary_key }} order by {{ primary_key}} ) as dbt_audit_pk_row_num,
+            hash({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
         from ( {{-  b_query  -}} )
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
@@ -150,21 +156,21 @@
     a_intersect_b as (
 
         select * from a
-        where a.dbt_compare_row_hash in (select b.dbt_compare_row_hash from b)
+        where a.dbt_audit_row_hash in (select b.dbt_audit_row_hash from b)
 
     ),
 
     a_except_b as (
 
         select * from a
-        where a.dbt_compare_row_hash not in (select b.dbt_compare_row_hash from b)
+        where a.dbt_audit_row_hash not in (select b.dbt_audit_row_hash from b)
 
     ),
 
     b_except_a as (
 
         select * from b
-        where b.dbt_compare_row_hash not in (select a.dbt_compare_row_hash from a)
+        where b.dbt_audit_row_hash not in (select a.dbt_audit_row_hash from a)
 
     )
 {% endmacro %}
\ No newline at end of file

From 9a75fc966552931bb6b2d3de87b17d0f92e95d32 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 7 May 2024 16:33:28 +1200
Subject: [PATCH 19/59] Rename columsn to be more unique

---
 .../unit_reworked_compare.yml                 | 54 ++++++++++---------
 ...worked_compare_column_details_mismatch.yml | 10 ++--
 macros/reworked_compare.sql                   | 13 ++---
 3 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
index a7b111ee..8cebfbcc 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -16,9 +16,9 @@ unit_tests:
         
     expect:
       rows:
-        - {"status": 'identical', 'id': 1, num_in_status: 3}
-        - {"status": 'identical', 'id': 2, num_in_status: 3}
-        - {"status": 'identical', 'id': 3, num_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 1, dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 2, dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 3}
 
     overrides:
       vars:
@@ -52,9 +52,9 @@ unit_tests:
         
     expect:
       rows:
-        - {"status": 'identical', 'id': 2, num_in_status: 2}
-        - {"status": 'identical', 'id': 3, num_in_status: 2}
-
+        - {"dbt_audit_row_status": 'identical', 'id': 2, dbt_audit_num_rows_in_status: 2}
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 2}
+    
   - name: reworked_compare_all_statuses
     model: unit_reworked_compare
     overrides:
@@ -76,11 +76,11 @@ unit_tests:
         
     expect:
       rows:
-        - {"status": 'identical', 'id': 1, num_in_status: 1}
-        - {"status": 'modified', 'id': 2, num_in_status: 1}
-        - {"status": 'modified', 'id': 2, num_in_status: 1}
-        - {"status": 'removed', 'id': 3, num_in_status: 1}
-        - {"status": 'added', 'id': 4, num_in_status: 1}
+        - {"dbt_audit_row_status": 'identical', 'id': 1, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'removed', 'id': 3, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'added', 'id': 4, dbt_audit_num_rows_in_status: 1}
 
   - name: reworked_compare_identical_tables_multiple_pk_cols
     model: unit_reworked_compare
@@ -102,9 +102,9 @@ unit_tests:
           - { "id": 3, "id_2": 4, "col1": "nop", "col2": "qrs" }        
     expect:
       rows:
-        - {"status": 'identical', 'id': 12, "id_2": 3, "num_in_status": 3}
-        - {"status": 'identical', 'id': 1, "id_2": 23, "num_in_status": 3}
-        - {"status": 'identical', 'id': 3, "id_2": 4, "num_in_status": 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 12, "id_2": 3, "dbt_audit_num_rows_in_status": 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 1, "id_2": 23, "dbt_audit_num_rows_in_status": 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 3, "id_2": 4, "dbt_audit_num_rows_in_status": 3}
 
   - name: reworked_compare_identical_tables_single_null_pk
     model: unit_reworked_compare
@@ -123,9 +123,9 @@ unit_tests:
         
     expect:
       rows:
-        - {"status": 'identical', 'id': , num_in_status: 3}
-        - {"status": 'identical', 'id': 2, num_in_status: 3}
-        - {"status": 'identical', 'id': 3, num_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': , dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 2, dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 3}
 
     overrides:
       vars:
@@ -150,9 +150,9 @@ unit_tests:
         
     expect:
       rows:
-        - {"status": 'identical', 'id': 3, num_in_status: 3}
-        - {"status": 'identical', 'id': , num_in_status: 3}
-        - {"status": 'identical', 'id': , num_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': , dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': , dbt_audit_num_rows_in_status: 3}
 
     overrides:
       vars:
@@ -179,13 +179,17 @@ unit_tests:
         
     expect:
       rows:
-        - {"status": 'identical', 'id': 3, num_in_status: 3}
-        - {"status": 'identical', 'id': , num_in_status: 3}
-        - {"status": 'identical', 'id': , num_in_status: 3}
-        - {"status": 'added', 'id': , num_in_status: 1}
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': , dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': , dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'added', 'id': , dbt_audit_num_rows_in_status: 1}
 
     overrides:
       vars:
         reworked_compare__columns: ['id', 'col1', 'col2']
         reworked_compare__event_time:
-        reworked_compare__primary_key_columns: ['id']
\ No newline at end of file
+        reworked_compare__primary_key_columns: ['id']
+    
+    config:
+      tags: [bq_only]
+        
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.yml
index 923d2584..f134aa24 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.yml
@@ -19,8 +19,8 @@ unit_tests:
         
     expect:
       rows:
-        - {"status": 'identical', 'id': 1, num_in_status: 1}
-        - {"status": 'modified', 'id': 2, num_in_status: 1}
-        - {"status": 'modified', 'id': 2, num_in_status: 1}
-        - {"status": 'removed', 'id': 3, num_in_status: 1}
-        - {"status": 'added', 'id': 4, num_in_status: 1}
+        - {"dbt_audit_row_status": 'identical', 'id': 1, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'removed', 'id': 3, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'added', 'id': 4, dbt_audit_num_rows_in_status: 1}
diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index 792697a6..09479ce3 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -51,7 +51,7 @@
                 then 'modified'
                 when in_a then 'removed'
                 when in_b then 'added'
-            end as status
+            end as dbt_audit_row_status
         from all_records
         order by {{ primary_key }}, in_a desc, in_b desc
 
@@ -60,16 +60,16 @@
     final as (
         select 
             *,
-            count(distinct {{ primary_key }}, dbt_audit_pk_row_num) over (partition by status) as num_in_status,
-            dense_rank() over (partition by status order by {{ primary_key }}, dbt_audit_pk_row_num) as sample_number
+            count(distinct {{ primary_key }}, dbt_audit_pk_row_num) over (partition by dbt_audit_row_status) as dbt_audit_num_rows_in_status,
+            dense_rank() over (partition by dbt_audit_row_status order by {{ primary_key }}, dbt_audit_pk_row_num) as dbt_audit_sample_number
         from classified
     )
 
     select * from final
     {% if sample_limit %}
-        where sample_number <= {{ sample_limit }}
+        where dbt_audit_sample_number <= {{ sample_limit }}
     {% endif %}
-    order by status, sample_number
+    order by dbt_audit_row_status, dbt_audit_sample_number
 
 {% endmacro %}
 
@@ -83,7 +83,8 @@
     a as (
         select 
             {{ joined_cols }}, 
-            row_number() over (partition by {{ primary_key }} order by {{ primary_key}} ) as dbt_audit_pk_row_num
+            row_number() over (partition by {{ primary_key }} order by {{ primary_key}} ) as dbt_audit_pk_row_num,
+            audit_helper.generate_surrogate_key(primary_keys + )
         from ( {{-  a_query  -}} )
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'

From 815760075460827c507a10b5d962d5b80f548f1b Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Wed, 8 May 2024 13:21:21 +1200
Subject: [PATCH 20/59] Steal surrogate key macro from utils

---
 macros/utils/generate_null_safe_sk.sql       | 25 ++++++++++++++++++++
 macros/{ => utils}/get_comparison_bounds.sql |  0
 2 files changed, 25 insertions(+)
 create mode 100644 macros/utils/generate_null_safe_sk.sql
 rename macros/{ => utils}/get_comparison_bounds.sql (100%)

diff --git a/macros/utils/generate_null_safe_sk.sql b/macros/utils/generate_null_safe_sk.sql
new file mode 100644
index 00000000..4078c334
--- /dev/null
+++ b/macros/utils/generate_null_safe_sk.sql
@@ -0,0 +1,25 @@
+{# Taken from https://github.com/dbt-labs/dbt-utils/blob/main/macros/sql/generate_surrogate_key.sql but without the option to treat nulls as empty strings #}
+
+{%- macro generate_null_safe_surrogate_key(field_list) -%}
+    {{ return(adapter.dispatch('generate_null_safe_surrogate_key', 'audit_helper')(field_list)) }}
+{% endmacro %}
+
+{%- macro default__generate_null_safe_surrogate_key(field_list) -%}
+
+{%- set fields = [] -%}
+
+{%- for field in field_list -%}
+
+    {%- do fields.append(
+        "coalesce(cast(" ~ field ~ " as " ~ dbt.type_string() ~ "), '_dbt_audit_helper_surrogate_key_null_')"
+    ) -%}
+
+    {%- if not loop.last %}
+        {%- do fields.append("'-'") -%}
+    {%- endif -%}
+
+{%- endfor -%}
+
+{{ dbt.hash(dbt.concat(fields)) }}
+
+{%- endmacro -%}
\ No newline at end of file
diff --git a/macros/get_comparison_bounds.sql b/macros/utils/get_comparison_bounds.sql
similarity index 100%
rename from macros/get_comparison_bounds.sql
rename to macros/utils/get_comparison_bounds.sql

From 0e78f25af84d549cd204e17c53bba30bb6f421f2 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 14 May 2024 16:00:49 +1200
Subject: [PATCH 21/59] Use generated surrogate key across the board in place
 of PK

---
 .../unit_reworked_compare.yml                 | 12 ++----
 macros/reworked_compare.sql                   | 39 +++++++++++--------
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
index 8cebfbcc..fc821eff 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -150,9 +150,9 @@ unit_tests:
         
     expect:
       rows:
-        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 3}
         - {"dbt_audit_row_status": 'identical', 'id': , dbt_audit_num_rows_in_status: 3}
         - {"dbt_audit_row_status": 'identical', 'id': , dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 3}
 
     overrides:
       vars:
@@ -179,17 +179,13 @@ unit_tests:
         
     expect:
       rows:
-        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'added', 'id': , dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'identical', 'id': , dbt_audit_num_rows_in_status: 3}
         - {"dbt_audit_row_status": 'identical', 'id': , dbt_audit_num_rows_in_status: 3}
-        - {"dbt_audit_row_status": 'added', 'id': , dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 3}
 
     overrides:
       vars:
         reworked_compare__columns: ['id', 'col1', 'col2']
         reworked_compare__event_time:
-        reworked_compare__primary_key_columns: ['id']
-    
-    config:
-      tags: [bq_only]
-        
\ No newline at end of file
+        reworked_compare__primary_key_columns: ['id']
\ No newline at end of file
diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index 09479ce3..d3b1fb8e 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -1,15 +1,17 @@
 {% macro reworked_compare(a_query, b_query, primary_key_columns=[], columns=[], event_time=None, sample_limit=20) %}
     
     {% set joined_cols = columns | join(", ") %}
-    {% set primary_key = primary_key_columns | join(", ") %}
 
     {% if event_time %}
         {% set event_time_props = audit_helper.get_comparison_bounds(a_query, b_query, event_time) %}
     {% endif %}
 
     with 
-
-    {{ audit_helper.generate_set_results(a_query, b_query, primary_key, columns, event_time_props)}}
+    {#-
+        Set generation is dispatched because it's possible to get performance optimisations 
+        on some platforms, while keeping the post-processing standardised
+    -#}
+    {{ audit_helper.generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props)}}
     
     ,
 
@@ -46,22 +48,22 @@
             *,
             case 
                 when in_a and in_b then 'identical'
-                when {{ dbt.bool_or('in_a') }} over (partition by {{ primary_key }}, dbt_audit_pk_row_num) 
-                    and {{ dbt.bool_or('in_b') }} over (partition by {{ primary_key }}, dbt_audit_pk_row_num)
+                when {{ dbt.bool_or('in_a') }} over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) 
+                    and {{ dbt.bool_or('in_b') }} over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num)
                 then 'modified'
                 when in_a then 'removed'
                 when in_b then 'added'
             end as dbt_audit_row_status
         from all_records
-        order by {{ primary_key }}, in_a desc, in_b desc
+        order by dbt_audit_surrogate_key, in_a desc, in_b desc
 
     ),
 
     final as (
         select 
             *,
-            count(distinct {{ primary_key }}, dbt_audit_pk_row_num) over (partition by dbt_audit_row_status) as dbt_audit_num_rows_in_status,
-            dense_rank() over (partition by dbt_audit_row_status order by {{ primary_key }}, dbt_audit_pk_row_num) as dbt_audit_sample_number
+            count(distinct dbt_audit_surrogate_key, dbt_audit_pk_row_num) over (partition by dbt_audit_row_status) as dbt_audit_num_rows_in_status,
+            dense_rank() over (partition by dbt_audit_row_status order by dbt_audit_surrogate_key, dbt_audit_pk_row_num) as dbt_audit_sample_number
         from classified
     )
 
@@ -73,18 +75,18 @@
 
 {% endmacro %}
 
-{% macro generate_set_results(a_query, b_query, primary_key, columns, event_time_props=None) %}
-  {{ return(adapter.dispatch('generate_set_results', 'audit_helper')(a_query, b_query, primary_key, columns, event_time_props)) }}
+{% macro generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props=None) %}
+  {{ return(adapter.dispatch('generate_set_results', 'audit_helper')(a_query, b_query, primary_key_columns, columns, event_time_props)) }}
 {% endmacro %}
 
-{% macro default__generate_set_results(a_query, b_query, primary_key, columns, event_time_props) %}
+{% macro default__generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
     {% set joined_cols = columns | join(", ") %}
 
     a as (
         select 
             {{ joined_cols }}, 
-            row_number() over (partition by {{ primary_key }} order by {{ primary_key}} ) as dbt_audit_pk_row_num,
-            audit_helper.generate_surrogate_key(primary_keys + )
+            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
+            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num
         from ( {{-  a_query  -}} )
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
@@ -95,7 +97,8 @@
     b as (
         select 
             {{ joined_cols }}, 
-            row_number() over (partition by {{ primary_key }} order by {{ primary_key}} ) as dbt_audit_pk_row_num
+            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
+            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num
         from ( {{-  b_query  -}} )
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
@@ -128,12 +131,13 @@
     )
 {% endmacro %}
 
-{% macro snowflake__generate_set_results(a_query, b_query, primary_key, columns, event_time_props) %}
+{% macro snowflake__generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
     {% set joined_cols = columns | join(", ") %}
     a as (
         select 
             {{ joined_cols }}, 
-            row_number() over (partition by {{ primary_key }} order by {{ primary_key}} ) as dbt_audit_pk_row_num,
+            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
+            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num,
             hash({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
         from ( {{-  a_query  -}} )
         {% if event_time_props %}
@@ -145,7 +149,8 @@
     b as (
         select 
             {{ joined_cols }}, 
-            row_number() over (partition by {{ primary_key }} order by {{ primary_key}} ) as dbt_audit_pk_row_num,
+            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
+            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num,
             hash({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
         from ( {{-  b_query  -}} )
         {% if event_time_props %}

From f59b4110e6a03a4f88a21c43e863222810fd500d Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 14 May 2024 16:02:21 +1200
Subject: [PATCH 22/59] rm my profile reference

---
 dbt_project.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/dbt_project.yml b/dbt_project.yml
index e6fb7460..987e03d1 100644
--- a/dbt_project.yml
+++ b/dbt_project.yml
@@ -4,8 +4,6 @@ config-version: 2
 
 require-dbt-version: [">=1.2.0", "<2.0.0"]
 
-profile: joel_ska
-
 target-path: "target"
 clean-targets: ["target", "dbt_packages"]
 macro-paths: ["macros"]

From ab7d8b9d853c05c7126b7527e822e713c60e9239 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 14 May 2024 16:06:06 +1200
Subject: [PATCH 23/59] Update quick_are_queries_identical.sql

---
 macros/quick_are_queries_identical.sql | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/macros/quick_are_queries_identical.sql b/macros/quick_are_queries_identical.sql
index 466b4e6f..fecf2d83 100644
--- a/macros/quick_are_queries_identical.sql
+++ b/macros/quick_are_queries_identical.sql
@@ -8,7 +8,7 @@
         {% set event_time_props = audit_helper.get_comparison_bounds(a_query, b_query, event_time) %}
     {% endif %}
 
-    select count(hash_result) = 1 as are_tables_identical
+    select count(distinct hash_result) = 1 as are_tables_identical
     from (
         select hash_agg({{ joined_cols }}) as hash_result
         from ({{ query_a }})
@@ -17,7 +17,7 @@
             and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
         {% endif %}
 
-        union 
+        union all
         
         select hash_agg({{ joined_cols }}) as hash_result
         from ({{ query_b }})

From 120ac18df8e0550fc5c6e548efda1387a575133f Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 14 May 2024 16:31:31 +1200
Subject: [PATCH 24/59] Add diagram explaining comparison bounds

---
 macros/utils/get_comparison_bounds.sql | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/macros/utils/get_comparison_bounds.sql b/macros/utils/get_comparison_bounds.sql
index 85f8fcdc..4f224f5f 100644
--- a/macros/utils/get_comparison_bounds.sql
+++ b/macros/utils/get_comparison_bounds.sql
@@ -1,3 +1,23 @@
+/*
+The idea here is that if the event_time is set, we will only compare records enclosed in both models.
+This improves performance and allows us to compare apples to apples, instead of detecting millions/billions
+of "deletions" identified due to prod having all data while CI only has a few days' worth.
+
+In the diagram below, the thatched section is the comparison bounds. You can think of it as
+                                                         
+         greatest(model_a.min_value, model_b.min_value)  
+            least(model_a.max_value, model_b.max_value)  
+                                                         
+                 ┌────────────────────────────┐          
+  a min_value    │                a max_value │        
+    └──► ┌───────┼────────────────────┐ ◄───┘ │        
+         │       │┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼│       │        
+model_a  │       │┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼│       │ model_b
+         │       │┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼│       │        
+         └───────┼────────────────────┘       │        
+            ┌──► └────────────────────────────┘ ◄────┐ 
+           b min_value                      b max_value 
+*/
 {% macro get_comparison_bounds(a_relation, b_relation, event_time) %}
     {% set min_max_queries %}
         with min_maxes as (

From c275056b5cc0720283232cc499527763c746ae7f Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 14 May 2024 16:39:42 +1200
Subject: [PATCH 25/59] Add comments explaining warehouse-specific
 optimisations

---
 macros/quick_are_queries_identical.sql | 9 +++++++++
 macros/reworked_compare.sql            | 1 +
 2 files changed, 10 insertions(+)

diff --git a/macros/quick_are_queries_identical.sql b/macros/quick_are_queries_identical.sql
index fecf2d83..1f57c3d7 100644
--- a/macros/quick_are_queries_identical.sql
+++ b/macros/quick_are_queries_identical.sql
@@ -1,3 +1,12 @@
+/*
+As described by the Infinite Lambda team here: https://infinitelambda.com/data-validation-refactoring-snowflake/
+
+Some platforms let you take a hash of the whole table, which can be very very fast compared to comparing each row. 
+
+If you run this and it returns false, you still have to run the more in-depth queries to find out what specific changes there are, 
+but it's a good way to quickly verify identical results if that's what you're expecting. 
+*/
+
 {% macro quick_are_queries_identical(query_a, query_b, columns=[], event_time=None) %}
     {{ return (adapter.dispatch('quick_are_queries_identical', 'audit_helper')(query_a, query_b, columns, event_time)) }}
 {% endmacro %}
diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index d3b1fb8e..8b178cb6 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -10,6 +10,7 @@
     {#-
         Set generation is dispatched because it's possible to get performance optimisations 
         on some platforms, while keeping the post-processing standardised
+        See https://infinitelambda.com/data-validation-refactoring-snowflake/ for an example and background
     -#}
     {{ audit_helper.generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props)}}
     

From 311fbdcd6ef256344540ba35bf61ccc7dd386083 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 14 May 2024 16:48:30 +1200
Subject: [PATCH 26/59] cross-db support

---
 .../models/unit_test_placeholder_models/unit_test_model_a.sql   | 2 +-
 .../models/unit_test_placeholder_models/unit_test_model_b.sql   | 2 +-
 .../unit_test_model_b_more_cols.sql                             | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql
index 3c729df2..a4bc3985 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql
@@ -1 +1 @@
-select 12 as id, 22 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at
\ No newline at end of file
+select 12 as id, 22 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, {{ dbt.current_timestamp() }} as created_at
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql
index 3c729df2..a4bc3985 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql
@@ -1 +1 @@
-select 12 as id, 22 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at
\ No newline at end of file
+select 12 as id, 22 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, {{ dbt.current_timestamp() }} as created_at
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql
index b9e425b7..816accc5 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql
@@ -1 +1 @@
-select 12 as id, 22 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, getdate() as created_at, 'pineapple' as pizza
\ No newline at end of file
+select 12 as id, 22 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, {{ dbt.current_timestamp() }} as created_at, 'pineapple' as pizza
\ No newline at end of file

From ac635218123c9422421b6eae1c2df82e62d3f467 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 14 May 2024 16:54:06 +1200
Subject: [PATCH 27/59] subq

---
 macros/quick_are_queries_identical.sql | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/macros/quick_are_queries_identical.sql b/macros/quick_are_queries_identical.sql
index 1f57c3d7..add26638 100644
--- a/macros/quick_are_queries_identical.sql
+++ b/macros/quick_are_queries_identical.sql
@@ -20,7 +20,7 @@ but it's a good way to quickly verify identical results if that's what you're ex
     select count(distinct hash_result) = 1 as are_tables_identical
     from (
         select hash_agg({{ joined_cols }}) as hash_result
-        from ({{ query_a }})
+        from ({{ query_a }}) query_a_subq
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
             and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
@@ -29,7 +29,7 @@ but it's a good way to quickly verify identical results if that's what you're ex
         union all
         
         select hash_agg({{ joined_cols }}) as hash_result
-        from ({{ query_b }})
+        from ({{ query_b }}) query_b_subq
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
             and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'

From ffae04fbd3581cdda4187df40641cd195feac47d Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 14 May 2024 17:05:01 +1200
Subject: [PATCH 28/59] no postgres or redshift for a sec

---
 .circleci/config.yml | 56 ++++++++++++++++++++++----------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 1701f721..79daed08 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -38,34 +38,34 @@ jobs:
             mkdir -p ~/.dbt
             cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml
 
-      - run:
-          name: "Run Tests - Postgres"
-          environment:
-            POSTGRES_TEST_HOST: localhost
-            POSTGRES_TEST_USER: root
-            POSTGRES_TEST_PASS: ''
-            POSTGRES_TEST_PORT: 5432
-            POSTGRES_TEST_DBNAME: circle_test
-          command: |
-            . dbt_venv/bin/activate
-            cd integration_tests
-            dbt deps --target postgres
-            dbt seed --target postgres --full-refresh
-            dbt compile --target postgres
-            dbt run --target postgres
-            dbt test --target postgres
-
-      - run:
-          name: "Run Tests - Redshift"
-          command: |
-            . dbt_venv/bin/activate
-            echo `pwd`
-            cd integration_tests
-            dbt deps --target redshift
-            dbt seed --target redshift --full-refresh
-            dbt compile --target redshift
-            dbt run --target redshift
-            dbt test --target redshift
+      # - run:
+      #     name: "Run Tests - Postgres"
+      #     environment:
+      #       POSTGRES_TEST_HOST: localhost
+      #       POSTGRES_TEST_USER: root
+      #       POSTGRES_TEST_PASS: ''
+      #       POSTGRES_TEST_PORT: 5432
+      #       POSTGRES_TEST_DBNAME: circle_test
+      #     command: |
+      #       . dbt_venv/bin/activate
+      #       cd integration_tests
+      #       dbt deps --target postgres
+      #       dbt seed --target postgres --full-refresh
+      #       dbt compile --target postgres
+      #       dbt run --target postgres
+      #       dbt test --target postgres
+
+      # - run:
+      #     name: "Run Tests - Redshift"
+      #     command: |
+      #       . dbt_venv/bin/activate
+      #       echo `pwd`
+      #       cd integration_tests
+      #       dbt deps --target redshift
+      #       dbt seed --target redshift --full-refresh
+      #       dbt compile --target redshift
+      #       dbt run --target redshift
+      #       dbt test --target redshift
 
       - run:
           name: "Run Tests - Snowflake"

From 3ff4f7d4a1665a84b50614733f2fa11644c7d0cb Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 14 May 2024 17:18:55 +1200
Subject: [PATCH 29/59] add default var values for compare wrappers

---
 integration_tests/dbt_project.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml
index 39664887..66c943e4 100644
--- a/integration_tests/dbt_project.yml
+++ b/integration_tests/dbt_project.yml
@@ -20,8 +20,8 @@ seeds:
 
 vars:
   compare_queries_summarize: true
-  reworked_compare__primary_key_columns: []
-  reworked_compare__columns: []
+  reworked_compare__primary_key_columns: ['col1']
+  reworked_compare__columns: ['col1']
   reworked_compare__event_time:
-  quick_are_queries_identical_cols: []
+  quick_are_queries_identical_cols: [col1]
   quick_are_queries_identical_event_time:
\ No newline at end of file

From 8865b4a900d9df08a70428ef86da9df2284f78f7 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 14 May 2024 17:35:21 +1200
Subject: [PATCH 30/59] avoid lateral alias reference for BQ

---
 macros/reworked_compare.sql | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index 8b178cb6..c56dad0f 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -83,11 +83,10 @@
 {% macro default__generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
     {% set joined_cols = columns | join(", ") %}
 
-    a as (
+    a_base as (
         select 
             {{ joined_cols }}, 
-            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
-            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num
+            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
         from ( {{-  a_query  -}} )
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
@@ -95,11 +94,10 @@
         {% endif %}
     ),
 
-    b as (
+    b_base as (
         select 
             {{ joined_cols }}, 
-            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
-            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num
+            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
         from ( {{-  b_query  -}} )
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
@@ -107,6 +105,20 @@
         {% endif %}
     ),
 
+    a as (
+        select 
+            *, 
+            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key) as dbt_audit_pk_row_num
+        from a_base
+    ),
+
+    b as (
+        select 
+            *, 
+            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key) as dbt_audit_pk_row_num
+        from b_base
+    ),
+
     a_intersect_b as (
 
         select * from a

From 683c8827314d632193e3761fa93d581c736ad2ff Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 14 May 2024 20:00:33 +1200
Subject: [PATCH 31/59] BQ doesn't support count(arg1, arg2)

---
 macros/reworked_compare.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index c56dad0f..bcf95523 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -63,7 +63,7 @@
     final as (
         select 
             *,
-            count(distinct dbt_audit_surrogate_key, dbt_audit_pk_row_num) over (partition by dbt_audit_row_status) as dbt_audit_num_rows_in_status,
+            count(distinct {{ dbt.concat(["dbt_audit_surrogate_key", "dbt_audit_pk_row_num"]) }}) over (partition by dbt_audit_row_status) as dbt_audit_num_rows_in_status,
             dense_rank() over (partition by dbt_audit_row_status order by dbt_audit_surrogate_key, dbt_audit_pk_row_num) as dbt_audit_sample_number
         from classified
     )

From 93e2b251788b80e398b5a908064e4e4f85800126 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 14 May 2024 20:10:24 +1200
Subject: [PATCH 32/59] re-enable redshift

---
 .circleci/config.yml                     | 22 +++++++++++-----------
 integration_tests/ci/sample.profiles.yml |  6 +++---
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 79daed08..cb3d5aae 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -55,17 +55,17 @@ jobs:
       #       dbt run --target postgres
       #       dbt test --target postgres
 
-      # - run:
-      #     name: "Run Tests - Redshift"
-      #     command: |
-      #       . dbt_venv/bin/activate
-      #       echo `pwd`
-      #       cd integration_tests
-      #       dbt deps --target redshift
-      #       dbt seed --target redshift --full-refresh
-      #       dbt compile --target redshift
-      #       dbt run --target redshift
-      #       dbt test --target redshift
+      - run:
+          name: "Run Tests - Redshift"
+          command: |
+            . dbt_venv/bin/activate
+            echo `pwd`
+            cd integration_tests
+            dbt deps --target redshift
+            dbt seed --target redshift --full-refresh
+            dbt compile --target redshift
+            dbt run --target redshift
+            dbt test --target redshift
 
       - run:
           name: "Run Tests - Snowflake"
diff --git a/integration_tests/ci/sample.profiles.yml b/integration_tests/ci/sample.profiles.yml
index 843d659e..167e8a8a 100644
--- a/integration_tests/ci/sample.profiles.yml
+++ b/integration_tests/ci/sample.profiles.yml
@@ -27,7 +27,7 @@ integration_tests:
       dbname: "{{ env_var('REDSHIFT_TEST_DBNAME') }}"
       port: "{{ env_var('REDSHIFT_TEST_PORT') | as_number }}"
       schema: audit_helper_integration_tests_redshift
-      threads: 1
+      threads: 8
 
     bigquery:
       type: bigquery
@@ -35,7 +35,7 @@ integration_tests:
       keyfile: "{{ env_var('BIGQUERY_SERVICE_KEY_PATH') }}"
       project: "{{ env_var('BIGQUERY_TEST_DATABASE') }}"
       schema: audit_helper_integration_tests_bigquery
-      threads: 1
+      threads: 8
 
     snowflake:
       type: snowflake
@@ -46,4 +46,4 @@ integration_tests:
       database: "{{ env_var('SNOWFLAKE_TEST_DATABASE') }}"
       warehouse: "{{ env_var('SNOWFLAKE_TEST_WAREHOUSE') }}"
       schema: audit_helper_integration_tests_snowflake
-      threads: 1
+      threads: 8

From 981768dd396e285de61331790e162173226e7942 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 14 May 2024 20:17:11 +1200
Subject: [PATCH 33/59] Alias subq for redshift

---
 macros/reworked_compare.sql | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index bcf95523..42e8d854 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -87,7 +87,7 @@
         select 
             {{ joined_cols }}, 
             {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
-        from ( {{-  a_query  -}} )
+        from ( {{-  a_query  -}} ) a_base_subq
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
             and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
@@ -98,7 +98,7 @@
         select 
             {{ joined_cols }}, 
             {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
-        from ( {{-  b_query  -}} )
+        from ( {{-  b_query  -}} ) b_base_subq
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
             and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'

From 7e3e1714de9f32029b686dce09462eb4568f7d08 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Fri, 17 May 2024 11:03:10 +1200
Subject: [PATCH 34/59] remove extra comma

---
 macros/reworked_compare.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index 42e8d854..c9eaa261 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -21,7 +21,7 @@
         select
             *,
             true as in_a,
-            true as in_b,
+            true as in_b
         from a_intersect_b
 
         union all

From df95fcae469a998b8098d3429daa3a6ed99d4e28 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Fri, 17 May 2024 11:50:52 +1200
Subject: [PATCH 35/59] add row status of nonunique_pk

---
 .../unit_reworked_compare.yml                   | 17 +++++++++--------
 macros/reworked_compare.sql                     |  1 +
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
index fc821eff..dd69783b 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -108,6 +108,7 @@ unit_tests:
 
   - name: reworked_compare_identical_tables_single_null_pk
     model: unit_reworked_compare
+    description: "`nonunique_pk` status checks whether a PK is unique. It's intended to avoid arbitrary comparisons, not protect against null records (that's what constraints or tests are for)."
     
     given:
       - input: ref('unit_test_model_a')
@@ -150,9 +151,9 @@ unit_tests:
         
     expect:
       rows:
-        - {"dbt_audit_row_status": 'identical', 'id': , dbt_audit_num_rows_in_status: 3}
-        - {"dbt_audit_row_status": 'identical', 'id': , dbt_audit_num_rows_in_status: 3}
-        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 2}
+        - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 2}
 
     overrides:
       vars:
@@ -161,7 +162,7 @@ unit_tests:
         reworked_compare__primary_key_columns: ['id']
 
   - name: reworked_compare_identical_tables_multiple_null_pk_with_duplicate_rows
-    description: The two rows with a null ID are identical. They should both be returned as individual rows instead of being combined
+    description: All rows with a null ID are identical. They should both be returned as individual rows instead of being combined
     model: unit_reworked_compare
     
     given:
@@ -179,10 +180,10 @@ unit_tests:
         
     expect:
       rows:
-        - {"dbt_audit_row_status": 'added', 'id': , dbt_audit_num_rows_in_status: 1}
-        - {"dbt_audit_row_status": 'identical', 'id': , dbt_audit_num_rows_in_status: 3}
-        - {"dbt_audit_row_status": 'identical', 'id': , dbt_audit_num_rows_in_status: 3}
-        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 3}
 
     overrides:
       vars:
diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index c9eaa261..9a058c0b 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -48,6 +48,7 @@
         select 
             *,
             case 
+                when max(dbt_audit_pk_row_num) over (partition by dbt_audit_surrogate_key) > 1 then 'nonunique_pk'
                 when in_a and in_b then 'identical'
                 when {{ dbt.bool_or('in_a') }} over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) 
                     and {{ dbt.bool_or('in_b') }} over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num)

From 9523db8675d112896abad65f613cab51962e45c3 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Fri, 17 May 2024 12:55:22 +1200
Subject: [PATCH 36/59] remove redundant test and wrapper model

---
 .../unit_test_model_b_more_cols.sql           |  1 -
 .../unit_reworked_compare.yml                 | 40 ++++++++++++++++---
 ...worked_compare_column_details_mismatch.sql |  9 -----
 ...worked_compare_column_details_mismatch.yml | 26 ------------
 4 files changed, 35 insertions(+), 41 deletions(-)
 delete mode 100644 integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql
 delete mode 100644 integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql
 delete mode 100644 integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.yml

diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql
deleted file mode 100644
index 816accc5..00000000
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_model_b_more_cols.sql
+++ /dev/null
@@ -1 +0,0 @@
-select 12 as id, 22 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, {{ dbt.current_timestamp() }} as created_at, 'pineapple' as pizza
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
index dd69783b..68b632ec 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -17,8 +17,8 @@ unit_tests:
     expect:
       rows:
         - {"dbt_audit_row_status": 'identical', 'id': 1, dbt_audit_num_rows_in_status: 3}
-        - {"dbt_audit_row_status": 'identical', 'id': 2, dbt_audit_num_rows_in_status: 3}
         - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 2, dbt_audit_num_rows_in_status: 3}
 
     overrides:
       vars:
@@ -76,8 +76,8 @@ unit_tests:
         
     expect:
       rows:
-        - {"dbt_audit_row_status": 'identical', 'id': 1, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'identical', 'id': 1, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'removed', 'id': 3, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'added', 'id': 4, dbt_audit_num_rows_in_status: 1}
@@ -162,7 +162,7 @@ unit_tests:
         reworked_compare__primary_key_columns: ['id']
 
   - name: reworked_compare_identical_tables_multiple_null_pk_with_duplicate_rows
-    description: All rows with a null ID are identical. They should both be returned as individual rows instead of being combined
+    description: All rows with a null ID are identical. They should be returned as individual rows instead of being combined
     model: unit_reworked_compare
     
     given:
@@ -180,13 +180,43 @@ unit_tests:
         
     expect:
       rows:
-        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 3}
         - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 3}
         - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 1}
 
     overrides:
       vars:
         reworked_compare__columns: ['id', 'col1', 'col2']
         reworked_compare__event_time:
-        reworked_compare__primary_key_columns: ['id']
\ No newline at end of file
+        reworked_compare__primary_key_columns: ['id']
+
+  - name: reworked_compare_struct
+    model: my_model
+
+  - name: reworked_compare_all_statuses_different_column_set
+    model: unit_reworked_compare
+    overrides:
+      vars:
+        reworked_compare__primary_key_columns: ['id']
+        reworked_compare__columns: ['id', 'col1']
+        reworked_compare__event_time:
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc" }
+          - { "id": 2, "col1": "ddd" }
+          - { "id": 4, "col1": "nop" }
+        
+    expect:
+      rows:
+        - {"dbt_audit_row_status": 'added', 'id': 4, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'identical', 'id': 1, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'removed', 'id': 3, dbt_audit_num_rows_in_status: 1}
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql
deleted file mode 100644
index d8ed546a..00000000
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.sql
+++ /dev/null
@@ -1,9 +0,0 @@
-{{ 
-    audit_helper.reworked_compare(
-        "select * from " ~ ref('unit_test_model_a'),
-        "select * from " ~ ref('unit_test_model_b_more_cols'),
-        primary_key_columns=['id'],
-        columns=var('reworked_compare__columns'),
-        event_time=var('reworked_compare__event_time')
-    ) 
-}}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.yml
deleted file mode 100644
index f134aa24..00000000
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_column_details_mismatch.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-unit_tests:
-  - name: reworked_compare_all_statuses_different_columns
-    model: unit_reworked_compare_column_details_mismatch
-    overrides:
-      vars:
-        reworked_compare__columns: ['id', 'col1']
-        reworked_compare__event_time:
-    given:
-      - input: ref('unit_test_model_a')
-        rows:
-          - { "id": 1, "col1": "abc", "col2": "def" }
-          - { "id": 2, "col1": "hij", "col2": "klm" }
-          - { "id": 3, "col1": "nop", "col2": "qrs" }
-      - input: ref('unit_test_model_b_more_cols')
-        rows:
-          - { "id": 1, "col1": "abc" }
-          - { "id": 2, "col1": "changed" }
-          - { "id": 4, "col1": "nop" }
-        
-    expect:
-      rows:
-        - {"dbt_audit_row_status": 'identical', 'id': 1, dbt_audit_num_rows_in_status: 1}
-        - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
-        - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
-        - {"dbt_audit_row_status": 'removed', 'id': 3, dbt_audit_num_rows_in_status: 1}
-        - {"dbt_audit_row_status": 'added', 'id': 4, dbt_audit_num_rows_in_status: 1}

From a506d72a199da92586d8a6ee141a1bc1f8422dd5 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Sat, 18 May 2024 05:55:44 +1200
Subject: [PATCH 37/59] Create json-y tests for snowflake

---
 .../unit_test_struct_model_a.sql              |   1 +
 .../unit_test_struct_model_b.sql              |   1 +
 .../unit_reworked_compare.yml                 |   3 -
 .../unit_reworked_compare_struct.sql          |   9 ++
 .../unit_reworked_compare_struct.yml          | 130 ++++++++++++++++++
 5 files changed, 141 insertions(+), 3 deletions(-)
 create mode 100644 integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
 create mode 100644 integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.sql
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml

diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
new file mode 100644
index 00000000..220f0767
--- /dev/null
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
@@ -0,0 +1 @@
+select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
new file mode 100644
index 00000000..220f0767
--- /dev/null
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
@@ -0,0 +1 @@
+select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
index 68b632ec..a82a9b7e 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -191,9 +191,6 @@ unit_tests:
         reworked_compare__event_time:
         reworked_compare__primary_key_columns: ['id']
 
-  - name: reworked_compare_struct
-    model: my_model
-
   - name: reworked_compare_all_statuses_different_column_set
     model: unit_reworked_compare
     overrides:
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.sql b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.sql
new file mode 100644
index 00000000..7aab2177
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.sql
@@ -0,0 +1,9 @@
+{{ 
+    audit_helper.reworked_compare(
+        "select * from " ~ ref('unit_test_struct_model_a'),
+        "select * from " ~ ref('unit_test_struct_model_b'),
+        primary_key_columns=var('reworked_compare__primary_key_columns'),
+        columns=var('reworked_compare__columns'),
+        event_time=var('reworked_compare__event_time')
+    ) 
+}}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml
new file mode 100644
index 00000000..309eb6e8
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml
@@ -0,0 +1,130 @@
+unit_tests:
+  - name: reworked_compare_struct
+    model: unit_reworked_compare_struct
+    given:
+      - input: ref('unit_test_struct_model_a')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
+      - input: ref('unit_test_struct_model_b')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
+    expect:
+      rows:
+        - {"id": 1, "dbt_audit_row_status": "identical", "dbt_audit_num_rows_in_status": 1}
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+    config:
+      tags: "{{ 'works_on_my_machine' if (target.type in ['bq', 'redshift']) else 'skip' }}"
+
+  - name: unit_reworked_compare_struct_identical_values_different_order
+    model: unit_reworked_compare_struct
+    description: Snowflake sorts objects' keys alphabetically, so sort order is ignored.
+    given:
+      - input: ref('unit_test_struct_model_a')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
+      - input: ref('unit_test_struct_model_b')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('state', 'CA', 'street', '123 Main St', 'city', 'Anytown') as col2
+    expect:
+      rows:
+        - {"id": 1, "dbt_audit_row_status": "identical", "dbt_audit_num_rows_in_status": 1}
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+
+  - name: unit_reworked_compare_struct_removed_key
+    model: unit_reworked_compare_struct
+    given:
+      - input: ref('unit_test_struct_model_a')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
+      - input: ref('unit_test_struct_model_b')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'state', 'CA') as col2
+    expect:
+      rows:
+        - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
+        - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+
+  - name: reworked_compare_complex_struct
+    model: unit_reworked_compare_struct
+    given:
+      - input: ref('unit_test_struct_model_a')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.doe@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'home', 'number', '123-456-7890'), object_construct('type', 'work', 'number', '987-654-3210'))) as col2
+      - input: ref('unit_test_struct_model_b')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.doe@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'home', 'number', '123-456-7890'), object_construct('type', 'work', 'number', '987-654-3210'))) as col2
+    expect:
+      rows:
+        - {"id": 1, "dbt_audit_row_status": "identical", "dbt_audit_num_rows_in_status": 1}
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+
+
+  - name: reworked_compare_complex_struct_different_values
+    model: unit_reworked_compare_struct
+    given:
+      - input: ref('unit_test_struct_model_a')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.doe@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'home', 'number', '123-456-7890'), object_construct('type', 'work', 'number', '987-654-3210'))) as col2
+      - input: ref('unit_test_struct_model_b')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.smith@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'home', 'number', '123-456-7890'), object_construct('type', 'work', 'number', '987-654-3210'))) as col2
+    expect:
+      rows:
+        - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
+        - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+
+  - name: unit_reworked_compare_complex_struct_identical_values_different_order
+    model: unit_reworked_compare_struct
+    description: Snowflake sorts objects' keys alphabetically, but respects the order items are added to arrays so differences are detected.
+    given:
+      - input: ref('unit_test_struct_model_a')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.doe@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'home', 'number', '123-456-7890'), object_construct('type', 'work', 'number', '987-654-3210'))) as col2
+      - input: ref('unit_test_struct_model_b')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.doe@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'work', 'number', '987-654-3210'), object_construct('type', 'home', 'number', '123-456-7890'))) as col2
+    expect:
+      rows:
+        - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
+        - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+
+

From a7542a8986e58b185c166e54e7c9b2585d2f7b13 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Sat, 18 May 2024 12:14:32 +1200
Subject: [PATCH 38/59] Add workaround for redshift to support count num rows
 in status

---
 .../unit_reworked_compare.yml                   |  4 ++--
 macros/reworked_compare.sql                     |  2 +-
 macros/utils/_count_num_rows_in_status.sql      | 17 +++++++++++++++++
 3 files changed, 20 insertions(+), 3 deletions(-)
 create mode 100644 macros/utils/_count_num_rows_in_status.sql

diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
index a82a9b7e..3346b9d9 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -76,9 +76,9 @@ unit_tests:
         
     expect:
       rows:
-        - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'identical', 'id': 1, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'removed', 'id': 3, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'added', 'id': 4, dbt_audit_num_rows_in_status: 1}
 
@@ -180,10 +180,10 @@ unit_tests:
         
     expect:
       rows:
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 3}
         - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 3}
         - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 3}
-        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 1}
 
     overrides:
       vars:
diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index 9a058c0b..69537d92 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -64,7 +64,7 @@
     final as (
         select 
             *,
-            count(distinct {{ dbt.concat(["dbt_audit_surrogate_key", "dbt_audit_pk_row_num"]) }}) over (partition by dbt_audit_row_status) as dbt_audit_num_rows_in_status,
+            {{ audit_helper._count_num_rows_in_status() }} as dbt_audit_num_rows_in_status,
             dense_rank() over (partition by dbt_audit_row_status order by dbt_audit_surrogate_key, dbt_audit_pk_row_num) as dbt_audit_sample_number
         from classified
     )
diff --git a/macros/utils/_count_num_rows_in_status.sql b/macros/utils/_count_num_rows_in_status.sql
new file mode 100644
index 00000000..c7d14e08
--- /dev/null
+++ b/macros/utils/_count_num_rows_in_status.sql
@@ -0,0 +1,17 @@
+{% macro _count_num_rows_in_status() %}
+    {{ return(adapter.dispatch('_count_num_rows_in_status', 'audit_helper')()) }}
+{% endmacro %}
+
+{%- macro default___count_num_rows_in_status() -%}
+    count(distinct dbt_audit_surrogate_key, dbt_audit_pk_row_num) over (partition by dbt_audit_row_status)
+{% endmacro %}
+
+{%- macro bigquery___count_num_rows_in_status() -%}
+    count(distinct {{ dbt.concat(["dbt_audit_surrogate_key", "dbt_audit_pk_row_num"]) }}) over (partition by dbt_audit_row_status)
+{% endmacro %}
+
+{%- macro redshift___count_num_rows_in_status() -%}
+    {#- Redshift doesn't support count(distinct) inside of window functions :( -#}
+    {#- modified rows are the only ones that return two rows per PK/row num pairing, so just need to be halved -#}
+    (count(*) over (partition by dbt_audit_row_status)) / case when dbt_audit_row_status = 'modified' then 2 else 1 end
+{% endmacro %}
\ No newline at end of file

From eb2cfcdd2e3993456c44d865d4c4926331217700 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Sat, 18 May 2024 12:25:31 +1200
Subject: [PATCH 39/59] skip incompatible tests

---
 .circleci/config.yml                                          | 4 ++--
 .../unit_test_placeholder_models/unit_test_struct_model_a.sql | 2 ++
 .../unit_test_placeholder_models/unit_test_struct_model_b.sql | 2 ++
 .../unit_test_wrappers/unit_quick_are_queries_identical.sql   | 2 ++
 4 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index cb3d5aae..cffe4dd4 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -64,8 +64,8 @@ jobs:
             dbt deps --target redshift
             dbt seed --target redshift --full-refresh
             dbt compile --target redshift
-            dbt run --target redshift
-            dbt test --target redshift
+            dbt run --target redshift --exclude tag:skip+
+            dbt test --target redshift --exclude tag:skip+
 
       - run:
           name: "Run Tests - Snowflake"
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
index 220f0767..8ae63364 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
@@ -1 +1,3 @@
+{{ config(tags=['skip' if target.type in ('redshift') else 'runnable']) }}
+
 select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
index 220f0767..8ae63364 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
@@ -1 +1,3 @@
+{{ config(tags=['skip' if target.type in ('redshift') else 'runnable']) }}
+
 select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
index 72fd9e72..85a9b618 100644
--- a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
@@ -1,3 +1,5 @@
+{{ config(tags=['skip' if target.type in ('redshift') else 'runnable']) }}
+
 {{ 
     audit_helper.quick_are_queries_identical(
         "select * from " ~ ref('unit_test_model_a'),

From 10392b0201f4536681f707ada9db388e206fb3eb Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Sat, 18 May 2024 15:59:13 +1200
Subject: [PATCH 40/59] Fix redshift lack of bool_or support in window funcs

---
 macros/reworked_compare.sql                 | 139 +-------------------
 macros/utils/_classify_audit_row_status.sql |  28 ++++
 macros/utils/_generate_set_results.sql      | 123 +++++++++++++++++
 3 files changed, 154 insertions(+), 136 deletions(-)
 create mode 100644 macros/utils/_classify_audit_row_status.sql
 create mode 100644 macros/utils/_generate_set_results.sql

diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index 69537d92..e7838833 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -7,12 +7,8 @@
     {% endif %}
 
     with 
-    {#-
-        Set generation is dispatched because it's possible to get performance optimisations 
-        on some platforms, while keeping the post-processing standardised
-        See https://infinitelambda.com/data-validation-refactoring-snowflake/ for an example and background
-    -#}
-    {{ audit_helper.generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props)}}
+
+    {{ audit_helper._generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props)}}
     
     ,
 
@@ -44,21 +40,10 @@
 
 
     classified as (
-        
         select 
             *,
-            case 
-                when max(dbt_audit_pk_row_num) over (partition by dbt_audit_surrogate_key) > 1 then 'nonunique_pk'
-                when in_a and in_b then 'identical'
-                when {{ dbt.bool_or('in_a') }} over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) 
-                    and {{ dbt.bool_or('in_b') }} over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num)
-                then 'modified'
-                when in_a then 'removed'
-                when in_b then 'added'
-            end as dbt_audit_row_status
+            {{ audit_helper._classify_audit_row_status() }} as dbt_audit_row_status
         from all_records
-        order by dbt_audit_surrogate_key, in_a desc, in_b desc
-
     ),
 
     final as (
@@ -75,122 +60,4 @@
     {% endif %}
     order by dbt_audit_row_status, dbt_audit_sample_number
 
-{% endmacro %}
-
-{% macro generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props=None) %}
-  {{ return(adapter.dispatch('generate_set_results', 'audit_helper')(a_query, b_query, primary_key_columns, columns, event_time_props)) }}
-{% endmacro %}
-
-{% macro default__generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
-    {% set joined_cols = columns | join(", ") %}
-
-    a_base as (
-        select 
-            {{ joined_cols }}, 
-            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
-        from ( {{-  a_query  -}} ) a_base_subq
-        {% if event_time_props %}
-            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
-        {% endif %}
-    ),
-
-    b_base as (
-        select 
-            {{ joined_cols }}, 
-            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
-        from ( {{-  b_query  -}} ) b_base_subq
-        {% if event_time_props %}
-            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
-        {% endif %}
-    ),
-
-    a as (
-        select 
-            *, 
-            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key) as dbt_audit_pk_row_num
-        from a_base
-    ),
-
-    b as (
-        select 
-            *, 
-            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key) as dbt_audit_pk_row_num
-        from b_base
-    ),
-
-    a_intersect_b as (
-
-        select * from a
-        {{ dbt.intersect() }}
-        select * from b
-
-    ),
-
-    a_except_b as (
-
-        select * from a
-        {{ dbt.except() }}
-        select * from b
-
-    ),
-
-    b_except_a as (
-
-        select * from b
-        {{ dbt.except() }}
-        select * from a
-
-    )
-{% endmacro %}
-
-{% macro snowflake__generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
-    {% set joined_cols = columns | join(", ") %}
-    a as (
-        select 
-            {{ joined_cols }}, 
-            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
-            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num,
-            hash({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
-        from ( {{-  a_query  -}} )
-        {% if event_time_props %}
-            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
-        {% endif %}
-    ),
-
-    b as (
-        select 
-            {{ joined_cols }}, 
-            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
-            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num,
-            hash({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
-        from ( {{-  b_query  -}} )
-        {% if event_time_props %}
-            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
-        {% endif %}
-    ),
-
-    a_intersect_b as (
-
-        select * from a
-        where a.dbt_audit_row_hash in (select b.dbt_audit_row_hash from b)
-
-    ),
-
-    a_except_b as (
-
-        select * from a
-        where a.dbt_audit_row_hash not in (select b.dbt_audit_row_hash from b)
-
-    ),
-
-    b_except_a as (
-
-        select * from b
-        where b.dbt_audit_row_hash not in (select a.dbt_audit_row_hash from a)
-
-    )
 {% endmacro %}
\ No newline at end of file
diff --git a/macros/utils/_classify_audit_row_status.sql b/macros/utils/_classify_audit_row_status.sql
new file mode 100644
index 00000000..e28e3f4e
--- /dev/null
+++ b/macros/utils/_classify_audit_row_status.sql
@@ -0,0 +1,28 @@
+{% macro _classify_audit_row_status() %}
+    {{ return(adapter.dispatch('_classify_audit_row_status', 'audit_helper')()) }}
+{% endmacro %}
+
+{%- macro default___classify_audit_row_status() -%}
+    case 
+        when max(dbt_audit_pk_row_num) over (partition by dbt_audit_surrogate_key) > 1 then 'nonunique_pk'
+        when in_a and in_b then 'identical'
+        when {{ dbt.bool_or('in_a') }} over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) 
+            and {{ dbt.bool_or('in_b') }} over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num)
+            then 'modified'
+        when in_a then 'removed'
+        when in_b then 'added'
+    end
+{% endmacro %}
+
+
+{%- macro redshift___classify_audit_row_status() -%}
+    {#- Redshift doesn't support bitwise operations (e.g. bool_or) inside of a window function :( -#}
+    case 
+        when max(dbt_audit_pk_row_num) over (partition by dbt_audit_surrogate_key) > 1 then 'nonunique_pk'
+        when in_a and in_b then 'identical'
+        when max(case when in_a then 1 else 0 end) over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) = 1
+            and max(case when in_b then 1 else 0 end) over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) = 1
+            then 'modified'
+        when in_a then 'removed'
+        when in_b then 'added'
+    end{% endmacro %}
\ No newline at end of file
diff --git a/macros/utils/_generate_set_results.sql b/macros/utils/_generate_set_results.sql
new file mode 100644
index 00000000..848ff9e2
--- /dev/null
+++ b/macros/utils/_generate_set_results.sql
@@ -0,0 +1,123 @@
+{#-
+    Set generation is dispatched because it's possible to get performance optimisations 
+    on some platforms, while keeping the post-processing standardised
+    See https://infinitelambda.com/data-validation-refactoring-snowflake/ for an example and background
+-#}
+
+{% macro _generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props=None) %}
+  {{ return(adapter.dispatch('_generate_set_results', 'audit_helper')(a_query, b_query, primary_key_columns, columns, event_time_props)) }}
+{% endmacro %}
+
+{% macro default___generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
+    {% set joined_cols = columns | join(", ") %}
+
+    a_base as (
+        select 
+            {{ joined_cols }}, 
+            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
+        from ( {{-  a_query  -}} ) a_base_subq
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    b_base as (
+        select 
+            {{ joined_cols }}, 
+            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
+        from ( {{-  b_query  -}} ) b_base_subq
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    a as (
+        select 
+            *, 
+            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key) as dbt_audit_pk_row_num
+        from a_base
+    ),
+
+    b as (
+        select 
+            *, 
+            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key) as dbt_audit_pk_row_num
+        from b_base
+    ),
+
+    a_intersect_b as (
+
+        select * from a
+        {{ dbt.intersect() }}
+        select * from b
+
+    ),
+
+    a_except_b as (
+
+        select * from a
+        {{ dbt.except() }}
+        select * from b
+
+    ),
+
+    b_except_a as (
+
+        select * from b
+        {{ dbt.except() }}
+        select * from a
+
+    )
+{% endmacro %}
+
+{% macro snowflake___generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
+    {% set joined_cols = columns | join(", ") %}
+    a as (
+        select 
+            {{ joined_cols }}, 
+            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
+            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num,
+            hash({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
+        from ( {{-  a_query  -}} )
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    b as (
+        select 
+            {{ joined_cols }}, 
+            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
+            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num,
+            hash({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
+        from ( {{-  b_query  -}} )
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    a_intersect_b as (
+
+        select * from a
+        where a.dbt_audit_row_hash in (select b.dbt_audit_row_hash from b)
+
+    ),
+
+    a_except_b as (
+
+        select * from a
+        where a.dbt_audit_row_hash not in (select b.dbt_audit_row_hash from b)
+
+    ),
+
+    b_except_a as (
+
+        select * from b
+        where b.dbt_audit_row_hash not in (select a.dbt_audit_row_hash from a)
+
+    )
+{% endmacro %}
\ No newline at end of file

From 8c9690cdbdf8eed7e64f78965e1d51ad0607c4ee Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Sat, 18 May 2024 15:59:23 +1200
Subject: [PATCH 41/59] add skip exclusions for everything else

---
 .circleci/config.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index cffe4dd4..200699ff 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -52,8 +52,8 @@ jobs:
       #       dbt deps --target postgres
       #       dbt seed --target postgres --full-refresh
       #       dbt compile --target postgres
-      #       dbt run --target postgres
-      #       dbt test --target postgres
+      #       dbt run --target postgres --exclude tag:skip+
+      #       dbt test --target postgres --exclude tag:skip+
 
       - run:
           name: "Run Tests - Redshift"
@@ -76,8 +76,8 @@ jobs:
             dbt deps --target snowflake
             dbt seed --target snowflake --full-refresh
             dbt compile --target snowflake
-            dbt run --target snowflake
-            dbt test --target snowflake
+            dbt run --target snowflake --exclude tag:skip+
+            dbt test --target snowflake --exclude tag:skip+
 
       - run:
           name: "Run Tests - BigQuery"
@@ -91,8 +91,8 @@ jobs:
             dbt deps --target bigquery
             dbt seed --target bigquery --full-refresh
             dbt compile --target bigquery
-            dbt run --target bigquery --full-refresh
-            dbt test --target bigquery
+            dbt run --target bigquery --full-refresh --exclude tag:skip+
+            dbt test --target bigquery --exclude tag:skip+
 
 
       - save_cache:

From 1cf18874b4ef703a83db14e8ebf88edbb2d02508 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Sat, 18 May 2024 16:32:34 +1200
Subject: [PATCH 42/59] fix incorrect skip tag application

---
 .../unit_test_placeholder_models/unit_test_struct_model_a.sql   | 2 +-
 .../unit_test_placeholder_models/unit_test_struct_model_b.sql   | 2 +-
 .../unit_test_wrappers/unit_quick_are_queries_identical.sql     | 2 +-
 .../models/unit_test_wrappers/unit_reworked_compare_struct.yml  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
index 8ae63364..3d7e9999 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
@@ -1,3 +1,3 @@
-{{ config(tags=['skip' if target.type in ('redshift') else 'runnable']) }}
+{{ config(tags=['skip' if (target.type in ['redshift']) else 'runnable']) }}
 
 select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
index 8ae63364..3d7e9999 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
@@ -1,3 +1,3 @@
-{{ config(tags=['skip' if target.type in ('redshift') else 'runnable']) }}
+{{ config(tags=['skip' if (target.type in ['redshift']) else 'runnable']) }}
 
 select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
index 85a9b618..c66828f5 100644
--- a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
@@ -1,4 +1,4 @@
-{{ config(tags=['skip' if target.type in ('redshift') else 'runnable']) }}
+{{ config(tags=['skip' if (target.type in ['redshift']) else 'runnable']) }}
 
 {{ 
     audit_helper.quick_are_queries_identical(
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml
index 309eb6e8..3139a570 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml
@@ -19,7 +19,7 @@ unit_tests:
         reworked_compare__event_time:
         reworked_compare__primary_key_columns: ['id']
     config:
-      tags: "{{ 'works_on_my_machine' if (target.type in ['bq', 'redshift']) else 'skip' }}"
+      tags: "{{ 'skip' if (target.type in ['redshift']) else 'runnable' }}"
 
   - name: unit_reworked_compare_struct_identical_values_different_order
     model: unit_reworked_compare_struct

From 319a967c227a68d1f4602124867d6d869819a7be Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Sat, 18 May 2024 16:32:46 +1200
Subject: [PATCH 43/59] Move user configs to project.yml from profiles

---
 integration_tests/ci/sample.profiles.yml | 4 ----
 integration_tests/dbt_project.yml        | 8 ++++++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/integration_tests/ci/sample.profiles.yml b/integration_tests/ci/sample.profiles.yml
index 167e8a8a..66eac960 100644
--- a/integration_tests/ci/sample.profiles.yml
+++ b/integration_tests/ci/sample.profiles.yml
@@ -2,10 +2,6 @@
 # HEY! This file is used in the dbt-audit-helper integrations tests with CircleCI.
 # You should __NEVER__ check credentials into version control. Thanks for reading :)
 
-config:
-    send_anonymous_usage_stats: False
-    use_colors: True
-
 integration_tests:
   target: postgres
   outputs:
diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml
index 66c943e4..ef906729 100644
--- a/integration_tests/dbt_project.yml
+++ b/integration_tests/dbt_project.yml
@@ -23,5 +23,9 @@ vars:
   reworked_compare__primary_key_columns: ['col1']
   reworked_compare__columns: ['col1']
   reworked_compare__event_time:
-  quick_are_queries_identical_cols: [col1]
-  quick_are_queries_identical_event_time:
\ No newline at end of file
+  quick_are_queries_identical_cols: ['col1']
+  quick_are_queries_identical_event_time:
+
+flags:
+  send_anonymous_usage_stats: False
+  use_colors: True
\ No newline at end of file

From 698aa997d9f0240bcb95db313ee9377b0dd13f8a Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Sat, 18 May 2024 17:15:54 +1200
Subject: [PATCH 44/59] Temporarily disable unpassable redshift tests

---
 .../models/unit_test_wrappers/unit_reworked_compare.yml     | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
index 3346b9d9..27cc867d 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -133,6 +133,8 @@ unit_tests:
         reworked_compare__columns: ['id', 'col1', 'col2']
         reworked_compare__event_time:
         reworked_compare__primary_key_columns: ['id']
+    config:
+      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
   - name: reworked_compare_identical_tables_multiple_null_pk
     model: unit_reworked_compare
@@ -160,6 +162,8 @@ unit_tests:
         reworked_compare__columns: ['id', 'col1', 'col2']
         reworked_compare__event_time:
         reworked_compare__primary_key_columns: ['id']
+    config:
+      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
   - name: reworked_compare_identical_tables_multiple_null_pk_with_duplicate_rows
     description: All rows with a null ID are identical. They should be returned as individual rows instead of being combined
@@ -190,6 +194,8 @@ unit_tests:
         reworked_compare__columns: ['id', 'col1', 'col2']
         reworked_compare__event_time:
         reworked_compare__primary_key_columns: ['id']
+    config:
+      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
   - name: reworked_compare_all_statuses_different_column_set
     model: unit_reworked_compare

From a255d43ff6053c5f80e791316728de36ed4df257 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Sat, 18 May 2024 17:18:29 +1200
Subject: [PATCH 45/59] add temp skip to circle's config.yml

---
 .circleci/config.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 200699ff..00367f6b 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -52,8 +52,8 @@ jobs:
       #       dbt deps --target postgres
       #       dbt seed --target postgres --full-refresh
       #       dbt compile --target postgres
-      #       dbt run --target postgres --exclude tag:skip+
-      #       dbt test --target postgres --exclude tag:skip+
+      #       dbt run --target postgres --exclude tag:skip+ temporary_skip+
+      #       dbt test --target postgres --exclude tag:skip+ temporary_skip+
 
       - run:
           name: "Run Tests - Redshift"
@@ -64,8 +64,8 @@ jobs:
             dbt deps --target redshift
             dbt seed --target redshift --full-refresh
             dbt compile --target redshift
-            dbt run --target redshift --exclude tag:skip+
-            dbt test --target redshift --exclude tag:skip+
+            dbt run --target redshift --exclude tag:skip+ temporary_skip+
+            dbt test --target redshift --exclude tag:skip+ temporary_skip+
 
       - run:
           name: "Run Tests - Snowflake"
@@ -76,8 +76,8 @@ jobs:
             dbt deps --target snowflake
             dbt seed --target snowflake --full-refresh
             dbt compile --target snowflake
-            dbt run --target snowflake --exclude tag:skip+
-            dbt test --target snowflake --exclude tag:skip+
+            dbt run --target snowflake --exclude tag:skip+ temporary_skip+
+            dbt test --target snowflake --exclude tag:skip+ temporary_skip+
 
       - run:
           name: "Run Tests - BigQuery"
@@ -91,8 +91,8 @@ jobs:
             dbt deps --target bigquery
             dbt seed --target bigquery --full-refresh
             dbt compile --target bigquery
-            dbt run --target bigquery --full-refresh --exclude tag:skip+
-            dbt test --target bigquery --exclude tag:skip+
+            dbt run --target bigquery --full-refresh --exclude tag:skip+ temporary_skip+
+            dbt test --target bigquery --exclude tag:skip+ temporary_skip+
 
 
       - save_cache:

From a9a47c131bc419446dfa2e22ed8629894373156e Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Sat, 18 May 2024 17:22:38 +1200
Subject: [PATCH 46/59] forgot tag: method

---
 .circleci/config.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 00367f6b..4aec9aa7 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -52,8 +52,8 @@ jobs:
       #       dbt deps --target postgres
       #       dbt seed --target postgres --full-refresh
       #       dbt compile --target postgres
-      #       dbt run --target postgres --exclude tag:skip+ temporary_skip+
-      #       dbt test --target postgres --exclude tag:skip+ temporary_skip+
+      #       dbt run --target postgres --exclude tag:skip+ tag:temporary_skip+
+      #       dbt test --target postgres --exclude tag:skip+ tag:temporary_skip+
 
       - run:
           name: "Run Tests - Redshift"
@@ -64,8 +64,8 @@ jobs:
             dbt deps --target redshift
             dbt seed --target redshift --full-refresh
             dbt compile --target redshift
-            dbt run --target redshift --exclude tag:skip+ temporary_skip+
-            dbt test --target redshift --exclude tag:skip+ temporary_skip+
+            dbt run --target redshift --exclude tag:skip+ tag:temporary_skip+
+            dbt test --target redshift --exclude tag:skip+ tag:temporary_skip+
 
       - run:
           name: "Run Tests - Snowflake"
@@ -76,8 +76,8 @@ jobs:
             dbt deps --target snowflake
             dbt seed --target snowflake --full-refresh
             dbt compile --target snowflake
-            dbt run --target snowflake --exclude tag:skip+ temporary_skip+
-            dbt test --target snowflake --exclude tag:skip+ temporary_skip+
+            dbt run --target snowflake --exclude tag:skip+ tag:temporary_skip+
+            dbt test --target snowflake --exclude tag:skip+ tag:temporary_skip+
 
       - run:
           name: "Run Tests - BigQuery"
@@ -91,8 +91,8 @@ jobs:
             dbt deps --target bigquery
             dbt seed --target bigquery --full-refresh
             dbt compile --target bigquery
-            dbt run --target bigquery --full-refresh --exclude tag:skip+ temporary_skip+
-            dbt test --target bigquery --exclude tag:skip+ temporary_skip+
+            dbt run --target bigquery --full-refresh --exclude tag:skip+ tag:temporary_skip+
+            dbt test --target bigquery --exclude tag:skip+ tag:temporary_skip+
 
 
       - save_cache:

From ec2d1421ccf04f1d7e4cdebd6449018c43b1d3ec Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Wed, 22 May 2024 13:46:08 +1200
Subject: [PATCH 47/59] Temporarily skip
 reworked_compare_all_statuses_different_column_set

---
 .gitignore                                                     | 3 ++-
 .../models/unit_test_wrappers/unit_reworked_compare.yml        | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index a33e3f41..0606e5c3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 target/
 dbt_packages/
 logs/
-logfile
\ No newline at end of file
+logfile
+.DS_Store
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
index 27cc867d..cd20a672 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -223,3 +223,5 @@ unit_tests:
         - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'removed', 'id': 3, dbt_audit_num_rows_in_status: 1}
+    config:
+      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-core/issues/10167

From fe91fd1b1b8fe3343aeb73d268977b138686229e Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Wed, 22 May 2024 16:37:44 +1200
Subject: [PATCH 48/59] Skip another test redshift

---
 .../models/unit_test_wrappers/unit_reworked_compare.yml         | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
index cd20a672..61c7ee37 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -81,6 +81,8 @@ unit_tests:
         - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'removed', 'id': 3, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'added', 'id': 4, dbt_audit_num_rows_in_status: 1}
+    config:
+      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-core/issues/10167
 
   - name: reworked_compare_identical_tables_multiple_pk_cols
     model: unit_reworked_compare

From 77f6a50a19cde71a5d123c5e4f33bf0ca10be56b Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Wed, 22 May 2024 17:10:26 +1200
Subject: [PATCH 49/59] disable unsupported tests BQ

---
 .../unit_test_placeholder_models/unit_test_struct_model_a.sql   | 2 +-
 .../unit_test_placeholder_models/unit_test_struct_model_b.sql   | 2 +-
 .../unit_test_wrappers/unit_quick_are_queries_identical.sql     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
index 3d7e9999..918912e9 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
@@ -1,3 +1,3 @@
-{{ config(tags=['skip' if (target.type in ['redshift']) else 'runnable']) }}
+{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery']) else 'runnable']) }}
 
 select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
index 3d7e9999..918912e9 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
@@ -1,3 +1,3 @@
-{{ config(tags=['skip' if (target.type in ['redshift']) else 'runnable']) }}
+{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery']) else 'runnable']) }}
 
 select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
index c66828f5..d272dc50 100644
--- a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
@@ -1,4 +1,4 @@
-{{ config(tags=['skip' if (target.type in ['redshift']) else 'runnable']) }}
+{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery']) else 'runnable']) }}
 
 {{ 
     audit_helper.quick_are_queries_identical(

From df7300118d66f724fb2813adf22730691ac48e3a Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Mon, 27 May 2024 14:24:28 +1200
Subject: [PATCH 50/59] postgres too?

---
 .circleci/config.yml | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 4aec9aa7..03718dbc 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -38,22 +38,22 @@ jobs:
             mkdir -p ~/.dbt
             cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml
 
-      # - run:
-      #     name: "Run Tests - Postgres"
-      #     environment:
-      #       POSTGRES_TEST_HOST: localhost
-      #       POSTGRES_TEST_USER: root
-      #       POSTGRES_TEST_PASS: ''
-      #       POSTGRES_TEST_PORT: 5432
-      #       POSTGRES_TEST_DBNAME: circle_test
-      #     command: |
-      #       . dbt_venv/bin/activate
-      #       cd integration_tests
-      #       dbt deps --target postgres
-      #       dbt seed --target postgres --full-refresh
-      #       dbt compile --target postgres
-      #       dbt run --target postgres --exclude tag:skip+ tag:temporary_skip+
-      #       dbt test --target postgres --exclude tag:skip+ tag:temporary_skip+
+      - run:
+          name: "Run Tests - Postgres"
+          environment:
+            POSTGRES_TEST_HOST: localhost
+            POSTGRES_TEST_USER: root
+            POSTGRES_TEST_PASS: ''
+            POSTGRES_TEST_PORT: 5432
+            POSTGRES_TEST_DBNAME: circle_test
+          command: |
+            . dbt_venv/bin/activate
+            cd integration_tests
+            dbt deps --target postgres
+            dbt seed --target postgres --full-refresh
+            dbt compile --target postgres
+            dbt run --target postgres --exclude tag:skip+ tag:temporary_skip+
+            dbt test --target postgres --exclude tag:skip+ tag:temporary_skip+
 
       - run:
           name: "Run Tests - Redshift"

From 12e307dc8f59f94a9e3e0f61013601fa5c05b848 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Mon, 27 May 2024 14:44:41 +1200
Subject: [PATCH 51/59] Fixes for postgres

---
 .../unit_test_struct_model_a.sql                       |  2 +-
 .../unit_test_struct_model_b.sql                       |  2 +-
 .../unit_quick_are_queries_identical.sql               |  4 ++--
 macros/utils/_count_num_rows_in_status.sql             | 10 +++++++++-
 4 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
index 918912e9..55f280a9 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
@@ -1,3 +1,3 @@
-{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery']) else 'runnable']) }}
+{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres']) else 'runnable']) }}
 
 select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
index 918912e9..55f280a9 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
@@ -1,3 +1,3 @@
-{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery']) else 'runnable']) }}
+{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres']) else 'runnable']) }}
 
 select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
index d272dc50..1fc11811 100644
--- a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
@@ -1,4 +1,4 @@
-{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery']) else 'runnable']) }}
+{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres']) else 'runnable']) }}
 
 {{ 
     audit_helper.quick_are_queries_identical(
@@ -7,4 +7,4 @@
         columns=var('quick_are_queries_identical_cols'),
         event_time=var('quick_are_queries_identical_event_time')
     ) 
-}}
\ No newline at end of file
+}}  
\ No newline at end of file
diff --git a/macros/utils/_count_num_rows_in_status.sql b/macros/utils/_count_num_rows_in_status.sql
index c7d14e08..1f557316 100644
--- a/macros/utils/_count_num_rows_in_status.sql
+++ b/macros/utils/_count_num_rows_in_status.sql
@@ -7,11 +7,19 @@
 {% endmacro %}
 
 {%- macro bigquery___count_num_rows_in_status() -%}
-    count(distinct {{ dbt.concat(["dbt_audit_surrogate_key", "dbt_audit_pk_row_num"]) }}) over (partition by dbt_audit_row_status)
+    {{ _count_num_rows_in_status_concat() }}
+{% endmacro %}
+
+{%- macro postgres___count_num_rows_in_status() -%}
+    {{ _count_num_rows_in_status_concat() }}
 {% endmacro %}
 
 {%- macro redshift___count_num_rows_in_status() -%}
     {#- Redshift doesn't support count(distinct) inside of window functions :( -#}
     {#- modified rows are the only ones that return two rows per PK/row num pairing, so just need to be halved -#}
     (count(*) over (partition by dbt_audit_row_status)) / case when dbt_audit_row_status = 'modified' then 2 else 1 end
+{% endmacro %}
+
+{% macro _count_num_rows_in_status_concat() %}
+    count(distinct {{ dbt.concat(["dbt_audit_surrogate_key", "dbt_audit_pk_row_num"]) }}) over (partition by dbt_audit_row_status)
 {% endmacro %}
\ No newline at end of file

From f2171681ce55f85e6fc1dc6189016ade020d55fe Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Mon, 27 May 2024 14:48:55 +1200
Subject: [PATCH 52/59] namespace macros

---
 macros/utils/_count_num_rows_in_status.sql | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/macros/utils/_count_num_rows_in_status.sql b/macros/utils/_count_num_rows_in_status.sql
index 1f557316..574832c6 100644
--- a/macros/utils/_count_num_rows_in_status.sql
+++ b/macros/utils/_count_num_rows_in_status.sql
@@ -7,11 +7,11 @@
 {% endmacro %}
 
 {%- macro bigquery___count_num_rows_in_status() -%}
-    {{ _count_num_rows_in_status_concat() }}
+    {{ audit_helper._count_num_rows_in_status_concat() }}
 {% endmacro %}
 
 {%- macro postgres___count_num_rows_in_status() -%}
-    {{ _count_num_rows_in_status_concat() }}
+    {{ audit_helper._count_num_rows_in_status_concat() }}
 {% endmacro %}
 
 {%- macro redshift___count_num_rows_in_status() -%}

From 88f2be84545576cd6a1360f1f76d5fc79da31290 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Mon, 27 May 2024 14:52:49 +1200
Subject: [PATCH 53/59] It's a postgres problem, not a redshift problem

---
 macros/utils/_count_num_rows_in_status.sql | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/macros/utils/_count_num_rows_in_status.sql b/macros/utils/_count_num_rows_in_status.sql
index 574832c6..c3781815 100644
--- a/macros/utils/_count_num_rows_in_status.sql
+++ b/macros/utils/_count_num_rows_in_status.sql
@@ -7,19 +7,11 @@
 {% endmacro %}
 
 {%- macro bigquery___count_num_rows_in_status() -%}
-    {{ audit_helper._count_num_rows_in_status_concat() }}
+    count(distinct {{ dbt.concat(["dbt_audit_surrogate_key", "dbt_audit_pk_row_num"]) }}) over (partition by dbt_audit_row_status)
 {% endmacro %}
 
 {%- macro postgres___count_num_rows_in_status() -%}
-    {{ audit_helper._count_num_rows_in_status_concat() }}
-{% endmacro %}
-
-{%- macro redshift___count_num_rows_in_status() -%}
-    {#- Redshift doesn't support count(distinct) inside of window functions :( -#}
+    {#- Postgres/Redshift doesn't support count(distinct) inside of window functions :( -#}
     {#- modified rows are the only ones that return two rows per PK/row num pairing, so just need to be halved -#}
     (count(*) over (partition by dbt_audit_row_status)) / case when dbt_audit_row_status = 'modified' then 2 else 1 end
 {% endmacro %}
-
-{% macro _count_num_rows_in_status_concat() %}
-    count(distinct {{ dbt.concat(["dbt_audit_surrogate_key", "dbt_audit_pk_row_num"]) }}) over (partition by dbt_audit_row_status)
-{% endmacro %}
\ No newline at end of file

From ad6e9d84d0fe633f15cb257af2a2bf3a6f8310cf Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Mon, 27 May 2024 15:00:20 +1200
Subject: [PATCH 54/59] Handle postgres 63 char limit

---
 .../models/unit_test_wrappers/unit_reworked_compare.yml         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
index 61c7ee37..fd2c0d02 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -167,7 +167,7 @@ unit_tests:
     config:
       tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
-  - name: reworked_compare_identical_tables_multiple_null_pk_with_duplicate_rows
+  - name: reworked_compare_identical_tables_multi_null_pk_dupe_rows
     description: All rows with a null ID are identical. They should be returned as individual rows instead of being combined
     model: unit_reworked_compare
     

From 669bb69858c3aeff148978030c50bcecb11f7329 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Mon, 27 May 2024 15:05:09 +1200
Subject: [PATCH 55/59] Add databricks

---
 .circleci/config.yml                     | 14 +++++++++++++-
 integration_tests/ci/sample.profiles.yml |  8 ++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 03718dbc..cde7c9e7 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -33,7 +33,7 @@ jobs:
             . dbt_venv/bin/activate
 
             python -m pip install --upgrade pip setuptools
-            python -m pip install --pre dbt-core dbt-postgres dbt-redshift dbt-snowflake dbt-bigquery
+            python -m pip install --pre dbt-core dbt-postgres dbt-redshift dbt-snowflake dbt-bigquery dbt-databricks
 
             mkdir -p ~/.dbt
             cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml
@@ -94,6 +94,17 @@ jobs:
             dbt run --target bigquery --full-refresh --exclude tag:skip+ tag:temporary_skip+
             dbt test --target bigquery --exclude tag:skip+ tag:temporary_skip+
 
+      - run:
+          name: "Run Tests - Databricks"
+          command: |
+            . dbt_venv/bin/activate
+            echo `pwd`
+            cd integration_tests
+            dbt deps --target databricks
+            dbt seed --target databricks --full-refresh
+            dbt compile --target databricks
+            dbt run --target databricks --exclude tag:skip+ tag:temporary_skip+
+            dbt test --target databricks --exclude tag:skip+ tag:temporary_skip+
 
       - save_cache:
           key: deps1-{{ .Branch }}
@@ -115,3 +126,4 @@ workflows:
             - profile-redshift
             - profile-snowflake
             - profile-bigquery
+            - profile-databricks
diff --git a/integration_tests/ci/sample.profiles.yml b/integration_tests/ci/sample.profiles.yml
index 66eac960..ea8effc1 100644
--- a/integration_tests/ci/sample.profiles.yml
+++ b/integration_tests/ci/sample.profiles.yml
@@ -43,3 +43,11 @@ integration_tests:
       warehouse: "{{ env_var('SNOWFLAKE_TEST_WAREHOUSE') }}"
       schema: audit_helper_integration_tests_snowflake
       threads: 8
+
+    databricks:
+      type: databricks
+      schema: dbt_project_evaluator_integration_tests_databricks
+      host: "{{ env_var('DATABRICKS_TEST_HOST') }}"
+      http_path: "{{ env_var('DATABRICKS_TEST_HTTP_PATH') }}"
+      token: "{{ env_var('DATABRICKS_TEST_ACCESS_TOKEN') }}"
+      threads: 10

From 0c192a949efa9e88648c14967c1a99827377f242 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Mon, 27 May 2024 15:15:14 +1200
Subject: [PATCH 56/59] Rename tests to data_tests

---
 .../models/data_tests/schema.yml              | 38 +++++++++----------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/integration_tests/models/data_tests/schema.yml b/integration_tests/models/data_tests/schema.yml
index 4bea9838..fbe74ff7 100644
--- a/integration_tests/models/data_tests/schema.yml
+++ b/integration_tests/models/data_tests/schema.yml
@@ -2,96 +2,96 @@ version: 2
 
 models:
   - name: compare_queries
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_relations_without_exclude')
 
   - name: compare_queries_concat_pk_without_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_without_summary')
           
   - name: compare_queries_with_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_with_summary')
 
   - name: compare_queries_without_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_without_summary')
           
   - name: compare_relations_with_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_with_summary')
 
   - name: compare_relations_without_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_without_summary')
 
   - name: compare_relations_with_exclude
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_relations_with_exclude')
 
   - name: compare_relations_without_exclude
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_relations_without_exclude')
 
   - name: compare_all_columns_with_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_all_columns_with_summary')
 
   - name: compare_all_columns_without_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_all_columns_without_summary')
 
   - name: compare_all_columns_concat_pk_with_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_all_columns_concat_pk_with_summary')
 
   - name: compare_all_columns_concat_pk_without_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_all_columns_concat_pk_without_summary')
 
   - name: compare_all_columns_with_summary_and_exclude
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_all_columns_with_summary_and_exclude')
   
   - name: compare_all_columns_where_clause
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_all_columns_where_clause')
 
   - name: compare_relation_columns
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_relation_columns')
 
   - name: compare_relations_concat_pk_without_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_without_summary')
 
   - name: compare_which_columns_differ
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_which_columns_differ')
 
   - name: compare_which_columns_differ_exclude_cols
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_which_columns_differ_exclude_cols')
           
   - name: compare_row_counts
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_row_counts')

From 317e4d706530879390d02a2d2b911e0f10e7363b Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Mon, 27 May 2024 15:44:38 +1200
Subject: [PATCH 57/59] Found a better workaround for missing count distinct
 window

---
 macros/utils/_count_num_rows_in_status.sql | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/macros/utils/_count_num_rows_in_status.sql b/macros/utils/_count_num_rows_in_status.sql
index c3781815..82730a2c 100644
--- a/macros/utils/_count_num_rows_in_status.sql
+++ b/macros/utils/_count_num_rows_in_status.sql
@@ -11,7 +11,18 @@
 {% endmacro %}
 
 {%- macro postgres___count_num_rows_in_status() -%}
-    {#- Postgres/Redshift doesn't support count(distinct) inside of window functions :( -#}
-    {#- modified rows are the only ones that return two rows per PK/row num pairing, so just need to be halved -#}
-    (count(*) over (partition by dbt_audit_row_status)) / case when dbt_audit_row_status = 'modified' then 2 else 1 end
+    _count_num_rows_in_status_without_distinct_window_func()
 {% endmacro %}
+
+{%- macro databricks___count_num_rows_in_status() -%}
+    _count_num_rows_in_status_without_distinct_window_func()
+{% endmacro %}
+
+{% macro _count_num_rows_in_status_without_distinct_window_func() %}
+    {#- Some platforms don't support count(distinct) inside of window functions -#}
+    {#- You can get the same outcome by dense_rank, assuming no nulls (we've already handled that) #}
+    {# https://stackoverflow.com/a/22347502 -#}
+    dense_rank() over (partition by dbt_audit_row_status order by dbt_audit_surrogate_key, dbt_audit_pk_row_num)
+    + dense_rank() over (partition by dbt_audit_row_status order by dbt_audit_surrogate_key desc, dbt_audit_pk_row_num desc)
+    - 1
+{% endmacro %}
\ No newline at end of file

From 0d1a1de45b151bd5d2c3472430e0ec151bacf36d Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Mon, 27 May 2024 15:47:51 +1200
Subject: [PATCH 58/59] actually call the macro

---
 macros/utils/_count_num_rows_in_status.sql | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/macros/utils/_count_num_rows_in_status.sql b/macros/utils/_count_num_rows_in_status.sql
index 82730a2c..fa81c591 100644
--- a/macros/utils/_count_num_rows_in_status.sql
+++ b/macros/utils/_count_num_rows_in_status.sql
@@ -11,11 +11,11 @@
 {% endmacro %}
 
 {%- macro postgres___count_num_rows_in_status() -%}
-    _count_num_rows_in_status_without_distinct_window_func()
+    {{ audit_helper._count_num_rows_in_status_without_distinct_window_func() }}
 {% endmacro %}
 
 {%- macro databricks___count_num_rows_in_status() -%}
-    _count_num_rows_in_status_without_distinct_window_func()
+    {{ audit_helper._count_num_rows_in_status_without_distinct_window_func() }}
 {% endmacro %}
 
 {% macro _count_num_rows_in_status_without_distinct_window_func() %}

From 559f8d51895a7af6e84f9c9002c469bda5f0a61c Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Mon, 27 May 2024 15:59:31 +1200
Subject: [PATCH 59/59] disable syntax-failing tests on dbx

---
 .../unit_test_placeholder_models/unit_test_struct_model_a.sql   | 2 +-
 .../unit_test_placeholder_models/unit_test_struct_model_b.sql   | 2 +-
 .../unit_test_wrappers/unit_quick_are_queries_identical.sql     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
index 55f280a9..24d584e8 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
@@ -1,3 +1,3 @@
-{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres']) else 'runnable']) }}
+{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres', 'databricks']) else 'runnable']) }}
 
 select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
index 55f280a9..24d584e8 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
@@ -1,3 +1,3 @@
-{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres']) else 'runnable']) }}
+{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres', 'databricks']) else 'runnable']) }}
 
 select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
index 1fc11811..e969b1e2 100644
--- a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
@@ -1,4 +1,4 @@
-{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres']) else 'runnable']) }}
+{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres', 'databricks']) else 'runnable']) }}
 
 {{ 
     audit_helper.quick_are_queries_identical(