From 9da3c5103349937535662113988d87b6bbedbee5 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Mon, 27 May 2024 16:17:41 +1200
Subject: [PATCH 01/13] Add new macros for diff calculation, and unit tests
 (#99)

* Add macro for new hash-based comparison strategy

* split out SF-focused version of macro

* Fix change to complex object

* Fix overuse of star

* switch from compare rels to compare queries

* provide wrapping parens

* switch to array of columns for PK

* split unit tests into own files, change unit tests to array pk

* tidy up get_comp_bounds

* fix arg rename

* add quick_are_queries_identical and unit tests

* Move data tests into own directory

* Add test for multiple PKs

* fix incorrect unit test configs

* make data types for id and id_2 big enough nums

* Mock event_time response

* fix hardcoded value in quick_are_qs_identical

* Add unit tests for null handling (still broken)

* Rename columsn to be more unique

* Steal surrogate key macro from utils

* Use generated surrogate key across the board in place of PK

* rm my profile reference

* Update quick_are_queries_identical.sql

* Add diagram explaining comparison bounds

* Add comments explaining warehouse-specific optimisations

* cross-db support

* subq

* no postgres or redshift for a sec

* add default var values for compare wrappers

* avoid lateral alias reference for BQ

* BQ doesn't support count(arg1, arg2)

* re-enable redshift

* Alias subq for redshift

* remove extra comma

* add row status of nonunique_pk

* remove redundant test and wrapper model

* Create json-y tests for snowflake

* Add workaround for redshift to support count num rows in status

* skip incompatible tests

* Fix redshift lack of bool_or support in window funcs

* add skip exclusions for everything else

* fix incorrect skip tag application

* Move user configs to project.yml from profiles

* Temporarily disable unpassable redshift tests

* add temp skip to circle's config.yml

* forgot tag: method

* Temporarily skip reworked_compare_all_statuses_different_column_set

* Skip another test redshift

* disable unsupported tests BQ

* postgres too?

* Fixes for postgres

* namespace macros

* It's a postgres problem, not a redshift problem

* Handle postgres 63 char limit

* Add databricks

* Rename tests to data_tests

* Found a better workaround for missing count distinct window

* actually call the macro

* disable syntax-failing tests on dbx
---
 .circleci/config.yml                          |  30 ++-
 .gitignore                                    |   3 +-
 .vscode/settings.json                         |  21 ++
 integration_tests/ci/sample.profiles.yml      |  18 +-
 integration_tests/dbt_project.yml             |  12 +
 ...are_all_columns_concat_pk_with_summary.sql |   0
 ..._all_columns_concat_pk_without_summary.sql |   0
 .../compare_all_columns_where_clause.sql      |   0
 .../compare_all_columns_with_summary.sql      |   0
 ...e_all_columns_with_summary_and_exclude.sql |   0
 .../compare_all_columns_without_summary.sql   |   0
 .../{ => data_tests}/compare_queries.sql      |   0
 ...pare_queries_concat_pk_without_summary.sql |   0
 .../compare_queries_with_summary.sql          |   0
 .../compare_queries_without_summary.sql       |   0
 .../compare_relation_columns.sql              |   0
 ...re_relations_concat_pk_without_summary.sql |   0
 .../compare_relations_with_exclude.sql        |   0
 .../compare_relations_with_summary.sql        |   0
 .../compare_relations_without_exclude.sql     |   0
 .../compare_relations_without_summary.sql     |   0
 .../{ => data_tests}/compare_row_counts.sql   |   0
 .../compare_which_columns_differ.sql          |   0
 ...pare_which_columns_differ_exclude_cols.sql |   0
 .../models/{ => data_tests}/schema.yml        |  38 +--
 .../unit_test_model_a.sql                     |   1 +
 .../unit_test_model_b.sql                     |   1 +
 .../unit_test_struct_model_a.sql              |   3 +
 .../unit_test_struct_model_b.sql              |   3 +
 .../unit_compare_queries.sql                  |   8 +
 .../unit_compare_queries.yml                  |  47 ++++
 .../unit_quick_are_queries_identical.sql      |  10 +
 .../unit_quick_are_queries_identical.yml      |  97 ++++++++
 .../unit_reworked_compare.sql                 |   9 +
 .../unit_reworked_compare.yml                 | 229 ++++++++++++++++++
 .../unit_reworked_compare_struct.sql          |   9 +
 .../unit_reworked_compare_struct.yml          | 130 ++++++++++
 macros/quick_are_queries_identical.sql        |  51 ++++
 macros/reworked_compare.sql                   |  63 +++++
 macros/utils/_classify_audit_row_status.sql   |  28 +++
 macros/utils/_count_num_rows_in_status.sql    |  28 +++
 macros/utils/_generate_set_results.sql        | 123 ++++++++++
 macros/utils/generate_null_safe_sk.sql        |  25 ++
 macros/utils/get_comparison_bounds.sql        |  42 ++++
 package-lock.yml                              |   4 +
 45 files changed, 997 insertions(+), 36 deletions(-)
 create mode 100644 .vscode/settings.json
 rename integration_tests/models/{ => data_tests}/compare_all_columns_concat_pk_with_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_all_columns_concat_pk_without_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_all_columns_where_clause.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_all_columns_with_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_all_columns_with_summary_and_exclude.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_all_columns_without_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_queries.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_queries_concat_pk_without_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_queries_with_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_queries_without_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_relation_columns.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_relations_concat_pk_without_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_relations_with_exclude.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_relations_with_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_relations_without_exclude.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_relations_without_summary.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_row_counts.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_which_columns_differ.sql (100%)
 rename integration_tests/models/{ => data_tests}/compare_which_columns_differ_exclude_cols.sql (100%)
 rename integration_tests/models/{ => data_tests}/schema.yml (90%)
 create mode 100644 integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql
 create mode 100644 integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql
 create mode 100644 integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
 create mode 100644 integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_compare_queries.sql
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_compare_queries.yml
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.sql
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml
 create mode 100644 macros/quick_are_queries_identical.sql
 create mode 100644 macros/reworked_compare.sql
 create mode 100644 macros/utils/_classify_audit_row_status.sql
 create mode 100644 macros/utils/_count_num_rows_in_status.sql
 create mode 100644 macros/utils/_generate_set_results.sql
 create mode 100644 macros/utils/generate_null_safe_sk.sql
 create mode 100644 macros/utils/get_comparison_bounds.sql
 create mode 100644 package-lock.yml

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 1701f721..cde7c9e7 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -33,7 +33,7 @@ jobs:
             . dbt_venv/bin/activate
 
             python -m pip install --upgrade pip setuptools
-            python -m pip install --pre dbt-core dbt-postgres dbt-redshift dbt-snowflake dbt-bigquery
+            python -m pip install --pre dbt-core dbt-postgres dbt-redshift dbt-snowflake dbt-bigquery dbt-databricks
 
             mkdir -p ~/.dbt
             cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml
@@ -52,8 +52,8 @@ jobs:
             dbt deps --target postgres
             dbt seed --target postgres --full-refresh
             dbt compile --target postgres
-            dbt run --target postgres
-            dbt test --target postgres
+            dbt run --target postgres --exclude tag:skip+ tag:temporary_skip+
+            dbt test --target postgres --exclude tag:skip+ tag:temporary_skip+
 
       - run:
           name: "Run Tests - Redshift"
@@ -64,8 +64,8 @@ jobs:
             dbt deps --target redshift
             dbt seed --target redshift --full-refresh
             dbt compile --target redshift
-            dbt run --target redshift
-            dbt test --target redshift
+            dbt run --target redshift --exclude tag:skip+ tag:temporary_skip+
+            dbt test --target redshift --exclude tag:skip+ tag:temporary_skip+
 
       - run:
           name: "Run Tests - Snowflake"
@@ -76,8 +76,8 @@ jobs:
             dbt deps --target snowflake
             dbt seed --target snowflake --full-refresh
             dbt compile --target snowflake
-            dbt run --target snowflake
-            dbt test --target snowflake
+            dbt run --target snowflake --exclude tag:skip+ tag:temporary_skip+
+            dbt test --target snowflake --exclude tag:skip+ tag:temporary_skip+
 
       - run:
           name: "Run Tests - BigQuery"
@@ -91,9 +91,20 @@ jobs:
             dbt deps --target bigquery
             dbt seed --target bigquery --full-refresh
             dbt compile --target bigquery
-            dbt run --target bigquery --full-refresh
-            dbt test --target bigquery
+            dbt run --target bigquery --full-refresh --exclude tag:skip+ tag:temporary_skip+
+            dbt test --target bigquery --exclude tag:skip+ tag:temporary_skip+
 
+      - run:
+          name: "Run Tests - Databricks"
+          command: |
+            . dbt_venv/bin/activate
+            echo `pwd`
+            cd integration_tests
+            dbt deps --target databricks
+            dbt seed --target databricks --full-refresh
+            dbt compile --target databricks
+            dbt run --target databricks --exclude tag:skip+ tag:temporary_skip+
+            dbt test --target databricks --exclude tag:skip+ tag:temporary_skip+
 
       - save_cache:
           key: deps1-{{ .Branch }}
@@ -115,3 +126,4 @@ workflows:
             - profile-redshift
             - profile-snowflake
             - profile-bigquery
+            - profile-databricks
diff --git a/.gitignore b/.gitignore
index a33e3f41..0606e5c3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 target/
 dbt_packages/
 logs/
-logfile
\ No newline at end of file
+logfile
+.DS_Store
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 00000000..437dcba6
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,21 @@
+{    
+    "yaml.schemas": {
+        "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/dbt_yml_files-latest.json": [
+            "/**/*.yml",
+            "!profiles.yml",
+            "!dbt_project.yml",
+            "!packages.yml",
+            "!selectors.yml",
+            "!profile_template.yml"
+        ],
+        "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/dbt_project-latest.json": [
+            "dbt_project.yml"
+        ],
+        "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/selectors-latest.json": [
+            "selectors.yml"
+        ],
+        "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/packages-latest.json": [
+            "packages.yml"
+        ]
+    },
+}
\ No newline at end of file
diff --git a/integration_tests/ci/sample.profiles.yml b/integration_tests/ci/sample.profiles.yml
index 843d659e..ea8effc1 100644
--- a/integration_tests/ci/sample.profiles.yml
+++ b/integration_tests/ci/sample.profiles.yml
@@ -2,10 +2,6 @@
 # HEY! This file is used in the dbt-audit-helper integrations tests with CircleCI.
 # You should __NEVER__ check credentials into version control. Thanks for reading :)
 
-config:
-    send_anonymous_usage_stats: False
-    use_colors: True
-
 integration_tests:
   target: postgres
   outputs:
@@ -27,7 +23,7 @@ integration_tests:
       dbname: "{{ env_var('REDSHIFT_TEST_DBNAME') }}"
       port: "{{ env_var('REDSHIFT_TEST_PORT') | as_number }}"
       schema: audit_helper_integration_tests_redshift
-      threads: 1
+      threads: 8
 
     bigquery:
       type: bigquery
@@ -35,7 +31,7 @@ integration_tests:
       keyfile: "{{ env_var('BIGQUERY_SERVICE_KEY_PATH') }}"
       project: "{{ env_var('BIGQUERY_TEST_DATABASE') }}"
       schema: audit_helper_integration_tests_bigquery
-      threads: 1
+      threads: 8
 
     snowflake:
       type: snowflake
@@ -46,4 +42,12 @@ integration_tests:
       database: "{{ env_var('SNOWFLAKE_TEST_DATABASE') }}"
       warehouse: "{{ env_var('SNOWFLAKE_TEST_WAREHOUSE') }}"
       schema: audit_helper_integration_tests_snowflake
-      threads: 1
+      threads: 8
+
+    databricks:
+      type: databricks
+      schema: dbt_project_evaluator_integration_tests_databricks
+      host: "{{ env_var('DATABRICKS_TEST_HOST') }}"
+      http_path: "{{ env_var('DATABRICKS_TEST_HTTP_PATH') }}"
+      token: "{{ env_var('DATABRICKS_TEST_ACCESS_TOKEN') }}"
+      threads: 10
diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml
index 07120e4c..ef906729 100644
--- a/integration_tests/dbt_project.yml
+++ b/integration_tests/dbt_project.yml
@@ -17,3 +17,15 @@ clean-targets:         # directories to be removed by `dbt clean`
 
 seeds:
   +quote_columns: false
+
+vars:
+  compare_queries_summarize: true
+  reworked_compare__primary_key_columns: ['col1']
+  reworked_compare__columns: ['col1']
+  reworked_compare__event_time:
+  quick_are_queries_identical_cols: ['col1']
+  quick_are_queries_identical_event_time:
+
+flags:
+  send_anonymous_usage_stats: False
+  use_colors: True
\ No newline at end of file
diff --git a/integration_tests/models/compare_all_columns_concat_pk_with_summary.sql b/integration_tests/models/data_tests/compare_all_columns_concat_pk_with_summary.sql
similarity index 100%
rename from integration_tests/models/compare_all_columns_concat_pk_with_summary.sql
rename to integration_tests/models/data_tests/compare_all_columns_concat_pk_with_summary.sql
diff --git a/integration_tests/models/compare_all_columns_concat_pk_without_summary.sql b/integration_tests/models/data_tests/compare_all_columns_concat_pk_without_summary.sql
similarity index 100%
rename from integration_tests/models/compare_all_columns_concat_pk_without_summary.sql
rename to integration_tests/models/data_tests/compare_all_columns_concat_pk_without_summary.sql
diff --git a/integration_tests/models/compare_all_columns_where_clause.sql b/integration_tests/models/data_tests/compare_all_columns_where_clause.sql
similarity index 100%
rename from integration_tests/models/compare_all_columns_where_clause.sql
rename to integration_tests/models/data_tests/compare_all_columns_where_clause.sql
diff --git a/integration_tests/models/compare_all_columns_with_summary.sql b/integration_tests/models/data_tests/compare_all_columns_with_summary.sql
similarity index 100%
rename from integration_tests/models/compare_all_columns_with_summary.sql
rename to integration_tests/models/data_tests/compare_all_columns_with_summary.sql
diff --git a/integration_tests/models/compare_all_columns_with_summary_and_exclude.sql b/integration_tests/models/data_tests/compare_all_columns_with_summary_and_exclude.sql
similarity index 100%
rename from integration_tests/models/compare_all_columns_with_summary_and_exclude.sql
rename to integration_tests/models/data_tests/compare_all_columns_with_summary_and_exclude.sql
diff --git a/integration_tests/models/compare_all_columns_without_summary.sql b/integration_tests/models/data_tests/compare_all_columns_without_summary.sql
similarity index 100%
rename from integration_tests/models/compare_all_columns_without_summary.sql
rename to integration_tests/models/data_tests/compare_all_columns_without_summary.sql
diff --git a/integration_tests/models/compare_queries.sql b/integration_tests/models/data_tests/compare_queries.sql
similarity index 100%
rename from integration_tests/models/compare_queries.sql
rename to integration_tests/models/data_tests/compare_queries.sql
diff --git a/integration_tests/models/compare_queries_concat_pk_without_summary.sql b/integration_tests/models/data_tests/compare_queries_concat_pk_without_summary.sql
similarity index 100%
rename from integration_tests/models/compare_queries_concat_pk_without_summary.sql
rename to integration_tests/models/data_tests/compare_queries_concat_pk_without_summary.sql
diff --git a/integration_tests/models/compare_queries_with_summary.sql b/integration_tests/models/data_tests/compare_queries_with_summary.sql
similarity index 100%
rename from integration_tests/models/compare_queries_with_summary.sql
rename to integration_tests/models/data_tests/compare_queries_with_summary.sql
diff --git a/integration_tests/models/compare_queries_without_summary.sql b/integration_tests/models/data_tests/compare_queries_without_summary.sql
similarity index 100%
rename from integration_tests/models/compare_queries_without_summary.sql
rename to integration_tests/models/data_tests/compare_queries_without_summary.sql
diff --git a/integration_tests/models/compare_relation_columns.sql b/integration_tests/models/data_tests/compare_relation_columns.sql
similarity index 100%
rename from integration_tests/models/compare_relation_columns.sql
rename to integration_tests/models/data_tests/compare_relation_columns.sql
diff --git a/integration_tests/models/compare_relations_concat_pk_without_summary.sql b/integration_tests/models/data_tests/compare_relations_concat_pk_without_summary.sql
similarity index 100%
rename from integration_tests/models/compare_relations_concat_pk_without_summary.sql
rename to integration_tests/models/data_tests/compare_relations_concat_pk_without_summary.sql
diff --git a/integration_tests/models/compare_relations_with_exclude.sql b/integration_tests/models/data_tests/compare_relations_with_exclude.sql
similarity index 100%
rename from integration_tests/models/compare_relations_with_exclude.sql
rename to integration_tests/models/data_tests/compare_relations_with_exclude.sql
diff --git a/integration_tests/models/compare_relations_with_summary.sql b/integration_tests/models/data_tests/compare_relations_with_summary.sql
similarity index 100%
rename from integration_tests/models/compare_relations_with_summary.sql
rename to integration_tests/models/data_tests/compare_relations_with_summary.sql
diff --git a/integration_tests/models/compare_relations_without_exclude.sql b/integration_tests/models/data_tests/compare_relations_without_exclude.sql
similarity index 100%
rename from integration_tests/models/compare_relations_without_exclude.sql
rename to integration_tests/models/data_tests/compare_relations_without_exclude.sql
diff --git a/integration_tests/models/compare_relations_without_summary.sql b/integration_tests/models/data_tests/compare_relations_without_summary.sql
similarity index 100%
rename from integration_tests/models/compare_relations_without_summary.sql
rename to integration_tests/models/data_tests/compare_relations_without_summary.sql
diff --git a/integration_tests/models/compare_row_counts.sql b/integration_tests/models/data_tests/compare_row_counts.sql
similarity index 100%
rename from integration_tests/models/compare_row_counts.sql
rename to integration_tests/models/data_tests/compare_row_counts.sql
diff --git a/integration_tests/models/compare_which_columns_differ.sql b/integration_tests/models/data_tests/compare_which_columns_differ.sql
similarity index 100%
rename from integration_tests/models/compare_which_columns_differ.sql
rename to integration_tests/models/data_tests/compare_which_columns_differ.sql
diff --git a/integration_tests/models/compare_which_columns_differ_exclude_cols.sql b/integration_tests/models/data_tests/compare_which_columns_differ_exclude_cols.sql
similarity index 100%
rename from integration_tests/models/compare_which_columns_differ_exclude_cols.sql
rename to integration_tests/models/data_tests/compare_which_columns_differ_exclude_cols.sql
diff --git a/integration_tests/models/schema.yml b/integration_tests/models/data_tests/schema.yml
similarity index 90%
rename from integration_tests/models/schema.yml
rename to integration_tests/models/data_tests/schema.yml
index 4bea9838..fbe74ff7 100644
--- a/integration_tests/models/schema.yml
+++ b/integration_tests/models/data_tests/schema.yml
@@ -2,96 +2,96 @@ version: 2
 
 models:
   - name: compare_queries
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_relations_without_exclude')
 
   - name: compare_queries_concat_pk_without_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_without_summary')
           
   - name: compare_queries_with_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_with_summary')
 
   - name: compare_queries_without_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_without_summary')
           
   - name: compare_relations_with_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_with_summary')
 
   - name: compare_relations_without_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_without_summary')
 
   - name: compare_relations_with_exclude
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_relations_with_exclude')
 
   - name: compare_relations_without_exclude
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_relations_without_exclude')
 
   - name: compare_all_columns_with_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_all_columns_with_summary')
 
   - name: compare_all_columns_without_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_all_columns_without_summary')
 
   - name: compare_all_columns_concat_pk_with_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_all_columns_concat_pk_with_summary')
 
   - name: compare_all_columns_concat_pk_without_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_all_columns_concat_pk_without_summary')
 
   - name: compare_all_columns_with_summary_and_exclude
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_all_columns_with_summary_and_exclude')
   
   - name: compare_all_columns_where_clause
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_all_columns_where_clause')
 
   - name: compare_relation_columns
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_relation_columns')
 
   - name: compare_relations_concat_pk_without_summary
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_without_summary')
 
   - name: compare_which_columns_differ
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_which_columns_differ')
 
   - name: compare_which_columns_differ_exclude_cols
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_which_columns_differ_exclude_cols')
           
   - name: compare_row_counts
-    tests:
+    data_tests:
       - dbt_utils.equality:
           compare_model: ref('expected_results__compare_row_counts')
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql
new file mode 100644
index 00000000..a4bc3985
--- /dev/null
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql
@@ -0,0 +1 @@
+select 12 as id, 22 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, {{ dbt.current_timestamp() }} as created_at
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql
new file mode 100644
index 00000000..a4bc3985
--- /dev/null
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql
@@ -0,0 +1 @@
+select 12 as id, 22 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, {{ dbt.current_timestamp() }} as created_at
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
new file mode 100644
index 00000000..24d584e8
--- /dev/null
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
@@ -0,0 +1,3 @@
+{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres', 'databricks']) else 'runnable']) }}
+
+select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
new file mode 100644
index 00000000..24d584e8
--- /dev/null
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
@@ -0,0 +1,3 @@
+{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres', 'databricks']) else 'runnable']) }}
+
+select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_compare_queries.sql b/integration_tests/models/unit_test_wrappers/unit_compare_queries.sql
new file mode 100644
index 00000000..c589ee53
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_compare_queries.sql
@@ -0,0 +1,8 @@
+
+{{ 
+    audit_helper.compare_queries(
+        "select * from " ~ ref('unit_test_model_a'),
+        "select * from " ~ ref('unit_test_model_b'),
+        summarize = var('compare_queries_summarize')
+    ) 
+}}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_compare_queries.yml b/integration_tests/models/unit_test_wrappers/unit_compare_queries.yml
new file mode 100644
index 00000000..0308e509
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_compare_queries.yml
@@ -0,0 +1,47 @@
+unit_tests:
+  - name: identical_records_compare_queries
+    model: unit_compare_queries
+    description: The world's most basic unit test. 
+
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+    
+    expect:
+      rows:
+        - {"in_a": true, "in_b": true}
+
+    overrides:
+      vars:
+        compare_queries_summarize: true
+
+  - name: identical_records_compare_queries_no_summarize
+    model: unit_compare_queries
+    description: The world's second most basic unit test.
+
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows: []
+
+    overrides:
+      vars:
+        compare_queries_summarize: false
diff --git a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
new file mode 100644
index 00000000..e969b1e2
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
@@ -0,0 +1,10 @@
+{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres', 'databricks']) else 'runnable']) }}
+
+{{ 
+    audit_helper.quick_are_queries_identical(
+        "select * from " ~ ref('unit_test_model_a'),
+        "select * from " ~ ref('unit_test_model_b'),
+        columns=var('quick_are_queries_identical_cols'),
+        event_time=var('quick_are_queries_identical_event_time')
+    ) 
+}}  
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
new file mode 100644
index 00000000..0d953506
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
@@ -0,0 +1,97 @@
+unit_tests:
+  - name: quick_are_queries_identical_identical_tables
+    model: unit_quick_are_queries_identical
+    
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"are_tables_identical": true}
+
+    overrides:
+      vars:
+        quick_are_queries_identical_cols: ['id', 'col1', 'col2']
+        quick_are_queries_identical_event_time:
+
+  - name: quick_are_queries_identical_identical_tables_event_time_filter
+    model: unit_quick_are_queries_identical
+    overrides:
+      vars:
+        quick_are_queries_identical_cols: ['id', 'col1', 'col2', 'created_at']
+        quick_are_queries_identical_event_time: 'created_at'
+      macros: 
+        audit_helper.get_comparison_bounds:
+          "min_event_time": "2024-01-02"
+          "max_event_time": "2024-01-03"
+          "event_time": 'created_at'
+      
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def", "created_at": '2024-01-01' }
+          - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
+          - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
+          - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
+        
+    expect:
+      rows:
+        - {"are_tables_identical": true}
+
+  - name: quick_are_queries_identical_differences
+    model: unit_quick_are_queries_identical
+    overrides:
+      vars:
+        quick_are_queries_identical_cols: ['id', 'col1', 'col2']
+        quick_are_queries_identical_event_time:
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "changed", "col2": "values" }
+          - { "id": 4, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"are_tables_identical": false}
+
+  - name: quick_are_queries_identical_identical_tables_with_null_pks
+    model: unit_quick_are_queries_identical
+    
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id":, "col1": "abc", "col2": "def" }
+          - { "id":, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id":, "col1": "abc", "col2": "def" }
+          - { "id":, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"are_tables_identical": true}
+
+    overrides:
+      vars:
+        quick_are_queries_identical_cols: ['id', 'col1', 'col2']
+        quick_are_queries_identical_event_time:
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
new file mode 100644
index 00000000..37473546
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
@@ -0,0 +1,9 @@
+{{ 
+    audit_helper.reworked_compare(
+        "select * from " ~ ref('unit_test_model_a'),
+        "select * from " ~ ref('unit_test_model_b'),
+        primary_key_columns=var('reworked_compare__primary_key_columns'),
+        columns=var('reworked_compare__columns'),
+        event_time=var('reworked_compare__event_time')
+    ) 
+}}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
new file mode 100644
index 00000000..fd2c0d02
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -0,0 +1,229 @@
+unit_tests:
+  - name: reworked_compare_identical_tables
+    model: unit_reworked_compare
+    
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"dbt_audit_row_status": 'identical', 'id': 1, dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 2, dbt_audit_num_rows_in_status: 3}
+
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+
+  - name: reworked_compare_identical_tables_event_time_filter
+    model: unit_reworked_compare
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2', 'created_at']
+        reworked_compare__event_time: 'created_at'
+        reworked_compare__primary_key_columns: ['id']
+      macros: 
+        audit_helper.get_comparison_bounds:
+          "min_event_time": "2024-01-02"
+          "max_event_time": "2024-01-03"
+          "event_time": 'created_at'
+      
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def", "created_at": '2024-01-01' }
+          - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
+          - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
+          - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
+        
+    expect:
+      rows:
+        - {"dbt_audit_row_status": 'identical', 'id': 2, dbt_audit_num_rows_in_status: 2}
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 2}
+    
+  - name: reworked_compare_all_statuses
+    model: unit_reworked_compare
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "changed", "col2": "values" }
+          - { "id": 4, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"dbt_audit_row_status": 'identical', 'id': 1, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'removed', 'id': 3, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'added', 'id': 4, dbt_audit_num_rows_in_status: 1}
+    config:
+      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-core/issues/10167
+
+  - name: reworked_compare_identical_tables_multiple_pk_cols
+    model: unit_reworked_compare
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'id_2', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id', 'id_2']
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 12, "id_2": 3, "col1": "abc", "col2": "def" }
+          - { "id": 1, "id_2": 23, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "id_2": 4, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 12, "id_2": 3, "col1": "abc", "col2": "def" }
+          - { "id": 1, "id_2": 23, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "id_2": 4, "col1": "nop", "col2": "qrs" }        
+    expect:
+      rows:
+        - {"dbt_audit_row_status": 'identical', 'id': 12, "id_2": 3, "dbt_audit_num_rows_in_status": 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 1, "id_2": 23, "dbt_audit_num_rows_in_status": 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 3, "id_2": 4, "dbt_audit_num_rows_in_status": 3}
+
+  - name: reworked_compare_identical_tables_single_null_pk
+    model: unit_reworked_compare
+    description: "`nonunique_pk` status checks whether a PK is unique. It's intended to avoid arbitrary comparisons, not protect against null records (that's what constraints or tests are for)."
+    
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"dbt_audit_row_status": 'identical', 'id': , dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 2, dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 3}
+
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+    config:
+      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
+
+  - name: reworked_compare_identical_tables_multiple_null_pk
+    model: unit_reworked_compare
+    
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": , "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": , "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 2}
+        - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 2}
+
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+    config:
+      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
+
+  - name: reworked_compare_identical_tables_multi_null_pk_dupe_rows
+    description: All rows with a null ID are identical. They should be returned as individual rows instead of being combined
+    model: unit_reworked_compare
+    
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": , "col1": "abc", "col2": "def" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 3}
+
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+    config:
+      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
+
+  - name: reworked_compare_all_statuses_different_column_set
+    model: unit_reworked_compare
+    overrides:
+      vars:
+        reworked_compare__primary_key_columns: ['id']
+        reworked_compare__columns: ['id', 'col1']
+        reworked_compare__event_time:
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc" }
+          - { "id": 2, "col1": "ddd" }
+          - { "id": 4, "col1": "nop" }
+        
+    expect:
+      rows:
+        - {"dbt_audit_row_status": 'added', 'id': 4, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'identical', 'id': 1, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
+        - {"dbt_audit_row_status": 'removed', 'id': 3, dbt_audit_num_rows_in_status: 1}
+    config:
+      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-core/issues/10167
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.sql b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.sql
new file mode 100644
index 00000000..7aab2177
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.sql
@@ -0,0 +1,9 @@
+{{ 
+    audit_helper.reworked_compare(
+        "select * from " ~ ref('unit_test_struct_model_a'),
+        "select * from " ~ ref('unit_test_struct_model_b'),
+        primary_key_columns=var('reworked_compare__primary_key_columns'),
+        columns=var('reworked_compare__columns'),
+        event_time=var('reworked_compare__event_time')
+    ) 
+}}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml
new file mode 100644
index 00000000..3139a570
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml
@@ -0,0 +1,130 @@
+unit_tests:
+  - name: reworked_compare_struct
+    model: unit_reworked_compare_struct
+    given:
+      - input: ref('unit_test_struct_model_a')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
+      - input: ref('unit_test_struct_model_b')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
+    expect:
+      rows:
+        - {"id": 1, "dbt_audit_row_status": "identical", "dbt_audit_num_rows_in_status": 1}
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+    config:
+      tags: "{{ 'skip' if (target.type in ['redshift']) else 'runnable' }}"
+
+  - name: unit_reworked_compare_struct_identical_values_different_order
+    model: unit_reworked_compare_struct
+    description: Snowflake sorts objects' keys alphabetically, so sort order is ignored.
+    given:
+      - input: ref('unit_test_struct_model_a')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
+      - input: ref('unit_test_struct_model_b')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('state', 'CA', 'street', '123 Main St', 'city', 'Anytown') as col2
+    expect:
+      rows:
+        - {"id": 1, "dbt_audit_row_status": "identical", "dbt_audit_num_rows_in_status": 1}
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+
+  - name: unit_reworked_compare_struct_removed_key
+    model: unit_reworked_compare_struct
+    given:
+      - input: ref('unit_test_struct_model_a')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
+      - input: ref('unit_test_struct_model_b')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'state', 'CA') as col2
+    expect:
+      rows:
+        - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
+        - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+
+  - name: reworked_compare_complex_struct
+    model: unit_reworked_compare_struct
+    given:
+      - input: ref('unit_test_struct_model_a')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.doe@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'home', 'number', '123-456-7890'), object_construct('type', 'work', 'number', '987-654-3210'))) as col2
+      - input: ref('unit_test_struct_model_b')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.doe@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'home', 'number', '123-456-7890'), object_construct('type', 'work', 'number', '987-654-3210'))) as col2
+    expect:
+      rows:
+        - {"id": 1, "dbt_audit_row_status": "identical", "dbt_audit_num_rows_in_status": 1}
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+
+
+  - name: reworked_compare_complex_struct_different_values
+    model: unit_reworked_compare_struct
+    given:
+      - input: ref('unit_test_struct_model_a')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.doe@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'home', 'number', '123-456-7890'), object_construct('type', 'work', 'number', '987-654-3210'))) as col2
+      - input: ref('unit_test_struct_model_b')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.smith@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'home', 'number', '123-456-7890'), object_construct('type', 'work', 'number', '987-654-3210'))) as col2
+    expect:
+      rows:
+        - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
+        - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+
+  - name: unit_reworked_compare_complex_struct_identical_values_different_order
+    model: unit_reworked_compare_struct
+    description: Snowflake sorts objects' keys alphabetically, but respects the order items are added to arrays so differences are detected.
+    given:
+      - input: ref('unit_test_struct_model_a')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.doe@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'home', 'number', '123-456-7890'), object_construct('type', 'work', 'number', '987-654-3210'))) as col2
+      - input: ref('unit_test_struct_model_b')
+        format: sql
+        rows: |
+          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.doe@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'work', 'number', '987-654-3210'), object_construct('type', 'home', 'number', '123-456-7890'))) as col2
+    expect:
+      rows:
+        - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
+        - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
+    overrides:
+      vars:
+        reworked_compare__columns: ['id', 'col1', 'col2']
+        reworked_compare__event_time:
+        reworked_compare__primary_key_columns: ['id']
+
+
diff --git a/macros/quick_are_queries_identical.sql b/macros/quick_are_queries_identical.sql
new file mode 100644
index 00000000..add26638
--- /dev/null
+++ b/macros/quick_are_queries_identical.sql
@@ -0,0 +1,51 @@
+/*
+As described by the Infinite Lambda team here: https://infinitelambda.com/data-validation-refactoring-snowflake/
+
+Some platforms let you take a hash of the whole table, which can be very very fast compared to comparing each row. 
+
+If you run this and it returns false, you still have to run the more in-depth queries to find out what specific changes there are, 
+but it's a good way to quickly verify identical results if that's what you're expecting. 
+*/
+
+{% macro quick_are_queries_identical(query_a, query_b, columns=[], event_time=None) %}
+    {{ return (adapter.dispatch('quick_are_queries_identical', 'audit_helper')(query_a, query_b, columns, event_time)) }}
+{% endmacro %}
+
+{% macro default__quick_are_queries_identical(query_a, query_b, columns, event_time) %}
+    {% set joined_cols = columns | join(", ") %}
+    {% if event_time %}
+        {% set event_time_props = audit_helper.get_comparison_bounds(a_query, b_query, event_time) %}
+    {% endif %}
+
+    select count(distinct hash_result) = 1 as are_tables_identical
+    from (
+        select hash_agg({{ joined_cols }}) as hash_result
+        from ({{ query_a }}) query_a_subq
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+
+        union all
+        
+        select hash_agg({{ joined_cols }}) as hash_result
+        from ({{ query_b }}) query_b_subq
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+
+    ) as hashes
+{% endmacro %}
+
+{% macro is_quick_are_queries_identical_supported() %}
+    {{ return (adapter.dispatch('is_quick_are_queries_identical_supported', 'audit_helper')()) }}
+{% endmacro %}
+
+{% macro default__is_quick_are_queries_identical_supported() %}
+    {{ return (False) }}
+{% endmacro %}
+
+{% macro snowflake__is_quick_are_queries_identical_supported() %}
+    {{ return (True) }}
+{% endmacro %}
\ No newline at end of file
diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
new file mode 100644
index 00000000..e7838833
--- /dev/null
+++ b/macros/reworked_compare.sql
@@ -0,0 +1,63 @@
+{% macro reworked_compare(a_query, b_query, primary_key_columns=[], columns=[], event_time=None, sample_limit=20) %}
+    
+    {% set joined_cols = columns | join(", ") %}
+
+    {% if event_time %}
+        {% set event_time_props = audit_helper.get_comparison_bounds(a_query, b_query, event_time) %}
+    {% endif %}
+
+    with 
+
+    {{ audit_helper._generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props)}}
+    
+    ,
+
+    all_records as (
+
+        select
+            *,
+            true as in_a,
+            true as in_b
+        from a_intersect_b
+
+        union all
+
+        select
+            *,
+            true as in_a,
+            false as in_b
+        from a_except_b
+
+        union all
+
+        select
+            *,
+            false as in_a,
+            true as in_b
+        from b_except_a
+
+    ),
+
+
+    classified as (
+        select 
+            *,
+            {{ audit_helper._classify_audit_row_status() }} as dbt_audit_row_status
+        from all_records
+    ),
+
+    final as (
+        select 
+            *,
+            {{ audit_helper._count_num_rows_in_status() }} as dbt_audit_num_rows_in_status,
+            dense_rank() over (partition by dbt_audit_row_status order by dbt_audit_surrogate_key, dbt_audit_pk_row_num) as dbt_audit_sample_number
+        from classified
+    )
+
+    select * from final
+    {% if sample_limit %}
+        where dbt_audit_sample_number <= {{ sample_limit }}
+    {% endif %}
+    order by dbt_audit_row_status, dbt_audit_sample_number
+
+{% endmacro %}
\ No newline at end of file
diff --git a/macros/utils/_classify_audit_row_status.sql b/macros/utils/_classify_audit_row_status.sql
new file mode 100644
index 00000000..e28e3f4e
--- /dev/null
+++ b/macros/utils/_classify_audit_row_status.sql
@@ -0,0 +1,28 @@
+{% macro _classify_audit_row_status() %}
+    {{ return(adapter.dispatch('_classify_audit_row_status', 'audit_helper')()) }}
+{% endmacro %}
+
+{%- macro default___classify_audit_row_status() -%}
+    case 
+        when max(dbt_audit_pk_row_num) over (partition by dbt_audit_surrogate_key) > 1 then 'nonunique_pk'
+        when in_a and in_b then 'identical'
+        when {{ dbt.bool_or('in_a') }} over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) 
+            and {{ dbt.bool_or('in_b') }} over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num)
+            then 'modified'
+        when in_a then 'removed'
+        when in_b then 'added'
+    end
+{% endmacro %}
+
+
+{%- macro redshift___classify_audit_row_status() -%}
+    {#- Redshift doesn't support bitwise operations (e.g. bool_or) inside of a window function :( -#}
+    case 
+        when max(dbt_audit_pk_row_num) over (partition by dbt_audit_surrogate_key) > 1 then 'nonunique_pk'
+        when in_a and in_b then 'identical'
+        when max(case when in_a then 1 else 0 end) over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) = 1
+            and max(case when in_b then 1 else 0 end) over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) = 1
+            then 'modified'
+        when in_a then 'removed'
+        when in_b then 'added'
+    end{% endmacro %}
\ No newline at end of file
diff --git a/macros/utils/_count_num_rows_in_status.sql b/macros/utils/_count_num_rows_in_status.sql
new file mode 100644
index 00000000..fa81c591
--- /dev/null
+++ b/macros/utils/_count_num_rows_in_status.sql
@@ -0,0 +1,28 @@
+{% macro _count_num_rows_in_status() %}
+    {{ return(adapter.dispatch('_count_num_rows_in_status', 'audit_helper')()) }}
+{% endmacro %}
+
+{%- macro default___count_num_rows_in_status() -%}
+    count(distinct dbt_audit_surrogate_key, dbt_audit_pk_row_num) over (partition by dbt_audit_row_status)
+{% endmacro %}
+
+{%- macro bigquery___count_num_rows_in_status() -%}
+    count(distinct {{ dbt.concat(["dbt_audit_surrogate_key", "dbt_audit_pk_row_num"]) }}) over (partition by dbt_audit_row_status)
+{% endmacro %}
+
+{%- macro postgres___count_num_rows_in_status() -%}
+    {{ audit_helper._count_num_rows_in_status_without_distinct_window_func() }}
+{% endmacro %}
+
+{%- macro databricks___count_num_rows_in_status() -%}
+    {{ audit_helper._count_num_rows_in_status_without_distinct_window_func() }}
+{% endmacro %}
+
+{% macro _count_num_rows_in_status_without_distinct_window_func() %}
+    {#- Some platforms don't support count(distinct) inside of window functions -#}
+    {#- You can get the same outcome by dense_rank, assuming no nulls (we've already handled that) #}
+    {# https://stackoverflow.com/a/22347502 -#}
+    dense_rank() over (partition by dbt_audit_row_status order by dbt_audit_surrogate_key, dbt_audit_pk_row_num)
+    + dense_rank() over (partition by dbt_audit_row_status order by dbt_audit_surrogate_key desc, dbt_audit_pk_row_num desc)
+    - 1
+{% endmacro %}
\ No newline at end of file
diff --git a/macros/utils/_generate_set_results.sql b/macros/utils/_generate_set_results.sql
new file mode 100644
index 00000000..848ff9e2
--- /dev/null
+++ b/macros/utils/_generate_set_results.sql
@@ -0,0 +1,123 @@
+{#-
+    Set generation is dispatched because it's possible to get performance optimisations 
+    on some platforms, while keeping the post-processing standardised
+    See https://infinitelambda.com/data-validation-refactoring-snowflake/ for an example and background
+-#}
+
+{% macro _generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props=None) %}
+  {{ return(adapter.dispatch('_generate_set_results', 'audit_helper')(a_query, b_query, primary_key_columns, columns, event_time_props)) }}
+{% endmacro %}
+
+{% macro default___generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
+    {% set joined_cols = columns | join(", ") %}
+
+    a_base as (
+        select 
+            {{ joined_cols }}, 
+            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
+        from ( {{-  a_query  -}} ) a_base_subq
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    b_base as (
+        select 
+            {{ joined_cols }}, 
+            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
+        from ( {{-  b_query  -}} ) b_base_subq
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    a as (
+        select 
+            *, 
+            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key) as dbt_audit_pk_row_num
+        from a_base
+    ),
+
+    b as (
+        select 
+            *, 
+            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key) as dbt_audit_pk_row_num
+        from b_base
+    ),
+
+    a_intersect_b as (
+
+        select * from a
+        {{ dbt.intersect() }}
+        select * from b
+
+    ),
+
+    a_except_b as (
+
+        select * from a
+        {{ dbt.except() }}
+        select * from b
+
+    ),
+
+    b_except_a as (
+
+        select * from b
+        {{ dbt.except() }}
+        select * from a
+
+    )
+{% endmacro %}
+
+{% macro snowflake___generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
+    {% set joined_cols = columns | join(", ") %}
+    a as (
+        select 
+            {{ joined_cols }}, 
+            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
+            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num,
+            hash({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
+        from ( {{-  a_query  -}} )
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    b as (
+        select 
+            {{ joined_cols }}, 
+            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
+            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num,
+            hash({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
+        from ( {{-  b_query  -}} )
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    a_intersect_b as (
+
+        select * from a
+        where a.dbt_audit_row_hash in (select b.dbt_audit_row_hash from b)
+
+    ),
+
+    a_except_b as (
+
+        select * from a
+        where a.dbt_audit_row_hash not in (select b.dbt_audit_row_hash from b)
+
+    ),
+
+    b_except_a as (
+
+        select * from b
+        where b.dbt_audit_row_hash not in (select a.dbt_audit_row_hash from a)
+
+    )
+{% endmacro %}
\ No newline at end of file
diff --git a/macros/utils/generate_null_safe_sk.sql b/macros/utils/generate_null_safe_sk.sql
new file mode 100644
index 00000000..4078c334
--- /dev/null
+++ b/macros/utils/generate_null_safe_sk.sql
@@ -0,0 +1,25 @@
+{# Taken from https://github.com/dbt-labs/dbt-utils/blob/main/macros/sql/generate_surrogate_key.sql but without the option to treat nulls as empty strings #}
+
+{%- macro generate_null_safe_surrogate_key(field_list) -%}
+    {{ return(adapter.dispatch('generate_null_safe_surrogate_key', 'audit_helper')(field_list)) }}
+{% endmacro %}
+
+{%- macro default__generate_null_safe_surrogate_key(field_list) -%}
+
+{%- set fields = [] -%}
+
+{%- for field in field_list -%}
+
+    {%- do fields.append(
+        "coalesce(cast(" ~ field ~ " as " ~ dbt.type_string() ~ "), '_dbt_audit_helper_surrogate_key_null_')"
+    ) -%}
+
+    {%- if not loop.last %}
+        {%- do fields.append("'-'") -%}
+    {%- endif -%}
+
+{%- endfor -%}
+
+{{ dbt.hash(dbt.concat(fields)) }}
+
+{%- endmacro -%}
\ No newline at end of file
diff --git a/macros/utils/get_comparison_bounds.sql b/macros/utils/get_comparison_bounds.sql
new file mode 100644
index 00000000..4f224f5f
--- /dev/null
+++ b/macros/utils/get_comparison_bounds.sql
@@ -0,0 +1,42 @@
+/*
+The idea here is that if the event_time is set, we will only compare records enclosed in both models.
+This improves performance and allows us to compare apples to apples, instead of detecting millions/billions
+of "deletions" identified due to prod having all data while CI only has a few days' worth.
+
+In the diagram below, the thatched section is the comparison bounds. You can think of it as
+                                                         
+         greatest(model_a.min_value, model_b.min_value)  
+            least(model_a.max_value, model_b.max_value)  
+                                                         
+                 ┌────────────────────────────┐          
+  a min_value    │                a max_value │        
+    └──► ┌───────┼────────────────────┐ ◄───┘ │        
+         │       │┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼│       │        
+model_a  │       │┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼│       │ model_b
+         │       │┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼│       │        
+         └───────┼────────────────────┘       │        
+            ┌──► └────────────────────────────┘ ◄────┐ 
+           b min_value                      b max_value 
+*/
+{% macro get_comparison_bounds(a_relation, b_relation, event_time) %}
+    {% set min_max_queries %}
+        with min_maxes as (
+            select min({{ event_time }}) as min_event_time, max({{ event_time }}) as max_event_time
+            from {{ a_relation }}
+            union all 
+            select min({{ event_time }}) as min_event_time, max({{ event_time }}) as max_event_time
+            from {{ b_relation }}
+        )
+        select max(min_event_time) as min_event_time, min(max_event_time) as max_event_time
+        from min_maxes
+    {% endset %}
+
+    {% set query_response = dbt_utils.get_query_results_as_dict(min_max_queries) %}
+    
+    {% set event_time_props = {"event_time": event_time} %}
+    {% for k in query_response.keys() %}
+        {% do event_time_props.update({k | lower: query_response[k][0]}) %}
+    {% endfor %}
+    
+    {% do return(event_time_props) %}
+{% endmacro %}
\ No newline at end of file
diff --git a/package-lock.yml b/package-lock.yml
new file mode 100644
index 00000000..32c6ccc0
--- /dev/null
+++ b/package-lock.yml
@@ -0,0 +1,4 @@
+packages:
+  - package: dbt-labs/dbt_utils
+    version: 1.1.1
+sha1_hash: 106400343ad0c92a7417f5156d0d6c3893bb2429

From d28f3e13c30d4b78e89e6ed21b3763e4819c9314 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 28 May 2024 10:24:37 +1200
Subject: [PATCH 02/13] try to install core from main to get sorting fix

---
 .circleci/config.yml                          |  3 ++-
 .../unit_reworked_compare.yml                 | 20 +++++++++----------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index cde7c9e7..fb1fc0e0 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -33,7 +33,8 @@ jobs:
             . dbt_venv/bin/activate
 
             python -m pip install --upgrade pip setuptools
-            python -m pip install --pre dbt-core dbt-postgres dbt-redshift dbt-snowflake dbt-bigquery dbt-databricks
+            python -m pip install --pre dbt-postgres dbt-redshift dbt-snowflake dbt-bigquery dbt-databricks
+            python -m pip install git+https://github.com/dbt-labs/dbt-core.git@main
 
             mkdir -p ~/.dbt
             cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
index fd2c0d02..e94a6176 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -81,8 +81,8 @@ unit_tests:
         - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'removed', 'id': 3, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'added', 'id': 4, dbt_audit_num_rows_in_status: 1}
-    config:
-      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-core/issues/10167
+    # config:
+    #   tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-core/issues/10167
 
   - name: reworked_compare_identical_tables_multiple_pk_cols
     model: unit_reworked_compare
@@ -135,8 +135,8 @@ unit_tests:
         reworked_compare__columns: ['id', 'col1', 'col2']
         reworked_compare__event_time:
         reworked_compare__primary_key_columns: ['id']
-    config:
-      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
+    # config:
+    #   tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
   - name: reworked_compare_identical_tables_multiple_null_pk
     model: unit_reworked_compare
@@ -164,8 +164,8 @@ unit_tests:
         reworked_compare__columns: ['id', 'col1', 'col2']
         reworked_compare__event_time:
         reworked_compare__primary_key_columns: ['id']
-    config:
-      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
+    # config:
+    #   tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
   - name: reworked_compare_identical_tables_multi_null_pk_dupe_rows
     description: All rows with a null ID are identical. They should be returned as individual rows instead of being combined
@@ -196,8 +196,8 @@ unit_tests:
         reworked_compare__columns: ['id', 'col1', 'col2']
         reworked_compare__event_time:
         reworked_compare__primary_key_columns: ['id']
-    config:
-      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
+    # config:
+    #   tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
   - name: reworked_compare_all_statuses_different_column_set
     model: unit_reworked_compare
@@ -225,5 +225,5 @@ unit_tests:
         - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'removed', 'id': 3, dbt_audit_num_rows_in_status: 1}
-    config:
-      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-core/issues/10167
+    # config:
+    #   tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-core/issues/10167

From dfcdcc9bbafb2c284371a5555d7cc8222348b2e3 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 28 May 2024 10:40:56 +1200
Subject: [PATCH 03/13] Revert "try to install core from main to get sorting
 fix"

This reverts commit d28f3e13c30d4b78e89e6ed21b3763e4819c9314.
---
 .circleci/config.yml                          |  3 +--
 .../unit_reworked_compare.yml                 | 20 +++++++++----------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index fb1fc0e0..cde7c9e7 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -33,8 +33,7 @@ jobs:
             . dbt_venv/bin/activate
 
             python -m pip install --upgrade pip setuptools
-            python -m pip install --pre dbt-postgres dbt-redshift dbt-snowflake dbt-bigquery dbt-databricks
-            python -m pip install git+https://github.com/dbt-labs/dbt-core.git@main
+            python -m pip install --pre dbt-core dbt-postgres dbt-redshift dbt-snowflake dbt-bigquery dbt-databricks
 
             mkdir -p ~/.dbt
             cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
index e94a6176..fd2c0d02 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -81,8 +81,8 @@ unit_tests:
         - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'removed', 'id': 3, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'added', 'id': 4, dbt_audit_num_rows_in_status: 1}
-    # config:
-    #   tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-core/issues/10167
+    config:
+      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-core/issues/10167
 
   - name: reworked_compare_identical_tables_multiple_pk_cols
     model: unit_reworked_compare
@@ -135,8 +135,8 @@ unit_tests:
         reworked_compare__columns: ['id', 'col1', 'col2']
         reworked_compare__event_time:
         reworked_compare__primary_key_columns: ['id']
-    # config:
-    #   tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
+    config:
+      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
   - name: reworked_compare_identical_tables_multiple_null_pk
     model: unit_reworked_compare
@@ -164,8 +164,8 @@ unit_tests:
         reworked_compare__columns: ['id', 'col1', 'col2']
         reworked_compare__event_time:
         reworked_compare__primary_key_columns: ['id']
-    # config:
-    #   tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
+    config:
+      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
   - name: reworked_compare_identical_tables_multi_null_pk_dupe_rows
     description: All rows with a null ID are identical. They should be returned as individual rows instead of being combined
@@ -196,8 +196,8 @@ unit_tests:
         reworked_compare__columns: ['id', 'col1', 'col2']
         reworked_compare__event_time:
         reworked_compare__primary_key_columns: ['id']
-    # config:
-    #   tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
+    config:
+      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
   - name: reworked_compare_all_statuses_different_column_set
     model: unit_reworked_compare
@@ -225,5 +225,5 @@ unit_tests:
         - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'removed', 'id': 3, dbt_audit_num_rows_in_status: 1}
-    # config:
-    #   tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-core/issues/10167
+    config:
+      tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-core/issues/10167

From 9de9bb137699a6a604344a941b9e28de3aa040d3 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 28 May 2024 22:26:03 +1200
Subject: [PATCH 04/13] Audit helper code review changes

* add BQ support for qucik are queries identical

* explain why using dense_rank

* remove the compile step to avoid compilation error

* Don't throw incompatible quick compare error during parse

* add where clause to check we're not assuming its absence

* enable first basic struct tests

* Skip raising exception during parsing

* json_build_object doesn't work on rs

* changed behaviour redshift

* skip complex structs on rs for now

* temp disable all complex structs

* skip some currently failoing bq tests

* Properly exclude tests to skip, add comments

* dbx too

* rename reworked_compare to compare_and_classify_query_results
---
 .circleci/config.yml                          |  5 --
 integration_tests/dbt_project.yml             |  6 +-
 .../unit_tests/struct_generation_macros.sql   | 12 +++
 .../unit_test_struct_model_a.sql              | 17 +++-
 .../unit_test_struct_model_b.sql              | 17 +++-
 .../unit_compare_queries.sql                  |  4 +-
 .../unit_quick_are_queries_identical.sql      |  6 +-
 .../unit_reworked_compare.sql                 | 12 +--
 .../unit_reworked_compare.yml                 | 64 +++++++-------
 .../unit_reworked_compare_struct.sql          | 12 +--
 .../unit_reworked_compare_struct.yml          | 83 ++++++++++---------
 .../tests/fixtures/simple_struct.sql          | 14 ++++
 .../simple_struct_different_order.sql         | 14 ++++
 .../fixtures/simple_struct_removed_key.sql    | 14 ++++
 macros/quick_are_queries_identical.sql        | 55 +++++++++---
 macros/reworked_compare.sql                   |  5 +-
 16 files changed, 224 insertions(+), 116 deletions(-)
 create mode 100644 integration_tests/macros/unit_tests/struct_generation_macros.sql
 create mode 100644 integration_tests/tests/fixtures/simple_struct.sql
 create mode 100644 integration_tests/tests/fixtures/simple_struct_different_order.sql
 create mode 100644 integration_tests/tests/fixtures/simple_struct_removed_key.sql

diff --git a/.circleci/config.yml b/.circleci/config.yml
index cde7c9e7..950a6cfd 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -51,7 +51,6 @@ jobs:
             cd integration_tests
             dbt deps --target postgres
             dbt seed --target postgres --full-refresh
-            dbt compile --target postgres
             dbt run --target postgres --exclude tag:skip+ tag:temporary_skip+
             dbt test --target postgres --exclude tag:skip+ tag:temporary_skip+
 
@@ -63,7 +62,6 @@ jobs:
             cd integration_tests
             dbt deps --target redshift
             dbt seed --target redshift --full-refresh
-            dbt compile --target redshift
             dbt run --target redshift --exclude tag:skip+ tag:temporary_skip+
             dbt test --target redshift --exclude tag:skip+ tag:temporary_skip+
 
@@ -75,7 +73,6 @@ jobs:
             cd integration_tests
             dbt deps --target snowflake
             dbt seed --target snowflake --full-refresh
-            dbt compile --target snowflake
             dbt run --target snowflake --exclude tag:skip+ tag:temporary_skip+
             dbt test --target snowflake --exclude tag:skip+ tag:temporary_skip+
 
@@ -90,7 +87,6 @@ jobs:
             cd integration_tests
             dbt deps --target bigquery
             dbt seed --target bigquery --full-refresh
-            dbt compile --target bigquery
             dbt run --target bigquery --full-refresh --exclude tag:skip+ tag:temporary_skip+
             dbt test --target bigquery --exclude tag:skip+ tag:temporary_skip+
 
@@ -102,7 +98,6 @@ jobs:
             cd integration_tests
             dbt deps --target databricks
             dbt seed --target databricks --full-refresh
-            dbt compile --target databricks
             dbt run --target databricks --exclude tag:skip+ tag:temporary_skip+
             dbt test --target databricks --exclude tag:skip+ tag:temporary_skip+
 
diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml
index ef906729..873057e5 100644
--- a/integration_tests/dbt_project.yml
+++ b/integration_tests/dbt_project.yml
@@ -20,9 +20,9 @@ seeds:
 
 vars:
   compare_queries_summarize: true
-  reworked_compare__primary_key_columns: ['col1']
-  reworked_compare__columns: ['col1']
-  reworked_compare__event_time:
+  compare_classify__primary_key_columns: ['col1']
+  compare_classify__columns: ['col1']
+  compare_classify__event_time:
   quick_are_queries_identical_cols: ['col1']
   quick_are_queries_identical_event_time:
 
diff --git a/integration_tests/macros/unit_tests/struct_generation_macros.sql b/integration_tests/macros/unit_tests/struct_generation_macros.sql
new file mode 100644
index 00000000..187c64b4
--- /dev/null
+++ b/integration_tests/macros/unit_tests/struct_generation_macros.sql
@@ -0,0 +1,12 @@
+{%- macro _basic_json_function() -%}
+    {%- if target.type == 'snowflake' -%}
+        object_construct
+    {%- elif target.type == 'bigquery' -%}
+        json_object
+    {%- elif target.type == 'databricks' -%}
+        map
+    {%- elif execute -%}
+        {# Only raise exception if it's actually being called, not during parsing #}
+        {%- do exceptions.raise_compiler_error("Unknown adapter '"~ target.type ~ "'")-%}
+    {%- endif -%}
+{%- endmacro -%}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
index 24d584e8..1cfabba6 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql
@@ -1,3 +1,16 @@
-{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres', 'databricks']) else 'runnable']) }}
+{{ config(tags=['skip' if (target.type in ['postgres']) else 'runnable']) }}
 
-select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
\ No newline at end of file
+{% if target.name != 'redshift' %}
+
+select 
+    1 as id, 
+    'John Doe' as col1, 
+    {{ audit_helper_integration_tests._basic_json_function() -}}('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
+
+{% else %}
+
+select 
+  1 AS id, 
+  'John Doe' AS col1, 
+  json_parse('{"street": "123 Main St", "city": "Anytown", "state": "CA"}') AS col2
+{% endif %}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
index 24d584e8..1cfabba6 100644
--- a/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
+++ b/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql
@@ -1,3 +1,16 @@
-{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres', 'databricks']) else 'runnable']) }}
+{{ config(tags=['skip' if (target.type in ['postgres']) else 'runnable']) }}
 
-select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
\ No newline at end of file
+{% if target.name != 'redshift' %}
+
+select 
+    1 as id, 
+    'John Doe' as col1, 
+    {{ audit_helper_integration_tests._basic_json_function() -}}('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
+
+{% else %}
+
+select 
+  1 AS id, 
+  'John Doe' AS col1, 
+  json_parse('{"street": "123 Main St", "city": "Anytown", "state": "CA"}') AS col2
+{% endif %}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_compare_queries.sql b/integration_tests/models/unit_test_wrappers/unit_compare_queries.sql
index c589ee53..03272c9f 100644
--- a/integration_tests/models/unit_test_wrappers/unit_compare_queries.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_compare_queries.sql
@@ -1,8 +1,8 @@
 
 {{ 
     audit_helper.compare_queries(
-        "select * from " ~ ref('unit_test_model_a'),
-        "select * from " ~ ref('unit_test_model_b'),
+        "select * from " ~ ref('unit_test_model_a') ~ " where 1=1",
+        "select * from " ~ ref('unit_test_model_b') ~ " where 1=1",
         summarize = var('compare_queries_summarize')
     ) 
 }}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
index e969b1e2..5eea8d81 100644
--- a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
@@ -1,9 +1,9 @@
-{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres', 'databricks']) else 'runnable']) }}
+{{ config(tags=['skip' if (target.type in ['redshift', 'postgres', 'databricks']) else 'runnable']) }}
 
 {{ 
     audit_helper.quick_are_queries_identical(
-        "select * from " ~ ref('unit_test_model_a'),
-        "select * from " ~ ref('unit_test_model_b'),
+        "select * from " ~ ref('unit_test_model_a') ~ " where 1=1",
+        "select * from " ~ ref('unit_test_model_b') ~ " where 1=1",
         columns=var('quick_are_queries_identical_cols'),
         event_time=var('quick_are_queries_identical_event_time')
     ) 
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
index 37473546..fbef7dcb 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
@@ -1,9 +1,9 @@
 {{ 
-    audit_helper.reworked_compare(
-        "select * from " ~ ref('unit_test_model_a'),
-        "select * from " ~ ref('unit_test_model_b'),
-        primary_key_columns=var('reworked_compare__primary_key_columns'),
-        columns=var('reworked_compare__columns'),
-        event_time=var('reworked_compare__event_time')
+    audit_helper.compare_and_classify_query_results(
+        "select * from " ~ ref('unit_test_model_a') ~ " where 1=1",
+        "select * from " ~ ref('unit_test_model_b') ~ " where 1=1",
+        primary_key_columns=var('compare_classify__primary_key_columns'),
+        columns=var('compare_classify__columns'),
+        event_time=var('compare_classify__event_time')
     ) 
 }}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
index fd2c0d02..ffa30c9a 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
@@ -1,5 +1,5 @@
 unit_tests:
-  - name: reworked_compare_identical_tables
+  - name: compare_classify_identical_tables
     model: unit_reworked_compare
     
     given:
@@ -22,17 +22,17 @@ unit_tests:
 
     overrides:
       vars:
-        reworked_compare__columns: ['id', 'col1', 'col2']
-        reworked_compare__event_time:
-        reworked_compare__primary_key_columns: ['id']
+        compare_classify__columns: ['id', 'col1', 'col2']
+        compare_classify__event_time:
+        compare_classify__primary_key_columns: ['id']
 
-  - name: reworked_compare_identical_tables_event_time_filter
+  - name: compare_classify_identical_tables_event_time_filter
     model: unit_reworked_compare
     overrides:
       vars:
-        reworked_compare__columns: ['id', 'col1', 'col2', 'created_at']
-        reworked_compare__event_time: 'created_at'
-        reworked_compare__primary_key_columns: ['id']
+        compare_classify__columns: ['id', 'col1', 'col2', 'created_at']
+        compare_classify__event_time: 'created_at'
+        compare_classify__primary_key_columns: ['id']
       macros: 
         audit_helper.get_comparison_bounds:
           "min_event_time": "2024-01-02"
@@ -55,13 +55,13 @@ unit_tests:
         - {"dbt_audit_row_status": 'identical', 'id': 2, dbt_audit_num_rows_in_status: 2}
         - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 2}
     
-  - name: reworked_compare_all_statuses
+  - name: compare_classify_all_statuses
     model: unit_reworked_compare
     overrides:
       vars:
-        reworked_compare__columns: ['id', 'col1', 'col2']
-        reworked_compare__event_time:
-        reworked_compare__primary_key_columns: ['id']
+        compare_classify__columns: ['id', 'col1', 'col2']
+        compare_classify__event_time:
+        compare_classify__primary_key_columns: ['id']
     given:
       - input: ref('unit_test_model_a')
         rows:
@@ -84,13 +84,13 @@ unit_tests:
     config:
       tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-core/issues/10167
 
-  - name: reworked_compare_identical_tables_multiple_pk_cols
+  - name: compare_classify_identical_tables_multiple_pk_cols
     model: unit_reworked_compare
     overrides:
       vars:
-        reworked_compare__columns: ['id', 'id_2', 'col1', 'col2']
-        reworked_compare__event_time:
-        reworked_compare__primary_key_columns: ['id', 'id_2']
+        compare_classify__columns: ['id', 'id_2', 'col1', 'col2']
+        compare_classify__event_time:
+        compare_classify__primary_key_columns: ['id', 'id_2']
     given:
       - input: ref('unit_test_model_a')
         rows:
@@ -108,7 +108,7 @@ unit_tests:
         - {"dbt_audit_row_status": 'identical', 'id': 1, "id_2": 23, "dbt_audit_num_rows_in_status": 3}
         - {"dbt_audit_row_status": 'identical', 'id': 3, "id_2": 4, "dbt_audit_num_rows_in_status": 3}
 
-  - name: reworked_compare_identical_tables_single_null_pk
+  - name: compare_classify_identical_tables_single_null_pk
     model: unit_reworked_compare
     description: "`nonunique_pk` status checks whether a PK is unique. It's intended to avoid arbitrary comparisons, not protect against null records (that's what constraints or tests are for)."
     
@@ -132,13 +132,13 @@ unit_tests:
 
     overrides:
       vars:
-        reworked_compare__columns: ['id', 'col1', 'col2']
-        reworked_compare__event_time:
-        reworked_compare__primary_key_columns: ['id']
+        compare_classify__columns: ['id', 'col1', 'col2']
+        compare_classify__event_time:
+        compare_classify__primary_key_columns: ['id']
     config:
       tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
-  - name: reworked_compare_identical_tables_multiple_null_pk
+  - name: compare_classify_identical_tables_multiple_null_pk
     model: unit_reworked_compare
     
     given:
@@ -161,13 +161,13 @@ unit_tests:
 
     overrides:
       vars:
-        reworked_compare__columns: ['id', 'col1', 'col2']
-        reworked_compare__event_time:
-        reworked_compare__primary_key_columns: ['id']
+        compare_classify__columns: ['id', 'col1', 'col2']
+        compare_classify__event_time:
+        compare_classify__primary_key_columns: ['id']
     config:
       tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
-  - name: reworked_compare_identical_tables_multi_null_pk_dupe_rows
+  - name: compare_classify_identical_tables_multi_null_pk_dupe_rows
     description: All rows with a null ID are identical. They should be returned as individual rows instead of being combined
     model: unit_reworked_compare
     
@@ -193,19 +193,19 @@ unit_tests:
 
     overrides:
       vars:
-        reworked_compare__columns: ['id', 'col1', 'col2']
-        reworked_compare__event_time:
-        reworked_compare__primary_key_columns: ['id']
+        compare_classify__columns: ['id', 'col1', 'col2']
+        compare_classify__event_time:
+        compare_classify__primary_key_columns: ['id']
     config:
       tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
-  - name: reworked_compare_all_statuses_different_column_set
+  - name: compare_classify_all_statuses_different_column_set
     model: unit_reworked_compare
     overrides:
       vars:
-        reworked_compare__primary_key_columns: ['id']
-        reworked_compare__columns: ['id', 'col1']
-        reworked_compare__event_time:
+        compare_classify__primary_key_columns: ['id']
+        compare_classify__columns: ['id', 'col1']
+        compare_classify__event_time:
     given:
       - input: ref('unit_test_model_a')
         rows:
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.sql b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.sql
index 7aab2177..82b04ea4 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.sql
@@ -1,9 +1,9 @@
 {{ 
-    audit_helper.reworked_compare(
-        "select * from " ~ ref('unit_test_struct_model_a'),
-        "select * from " ~ ref('unit_test_struct_model_b'),
-        primary_key_columns=var('reworked_compare__primary_key_columns'),
-        columns=var('reworked_compare__columns'),
-        event_time=var('reworked_compare__event_time')
+    audit_helper.compare_and_classify_query_results(
+        "select * from " ~ ref('unit_test_struct_model_a') ~ " where 1=1",
+        "select * from " ~ ref('unit_test_struct_model_b') ~ " where 1=1",
+        primary_key_columns=var('compare_classify__primary_key_columns'),
+        columns=var('compare_classify__columns'),
+        event_time=var('compare_classify__event_time')
     ) 
 }}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml
index 3139a570..ac4cf8f3 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml
@@ -1,69 +1,67 @@
 unit_tests:
-  - name: reworked_compare_struct
+  - name: compare_classify_simple_struct
     model: unit_reworked_compare_struct
     given:
       - input: ref('unit_test_struct_model_a')
         format: sql
-        rows: |
-          select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
+        fixture: simple_struct
       - input: ref('unit_test_struct_model_b')
         format: sql
-        rows: |
-          select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
+        fixture: simple_struct
     expect:
       rows:
         - {"id": 1, "dbt_audit_row_status": "identical", "dbt_audit_num_rows_in_status": 1}
     overrides:
       vars:
-        reworked_compare__columns: ['id', 'col1', 'col2']
-        reworked_compare__event_time:
-        reworked_compare__primary_key_columns: ['id']
+        compare_classify__columns: ['id', 'col1', 'col2']
+        compare_classify__event_time:
+        compare_classify__primary_key_columns: ['id']
     config:
-      tags: "{{ 'skip' if (target.type in ['redshift']) else 'runnable' }}"
+      tags: "{{ 'skip' if (target.type in ['bigquery', 'databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
 
-  - name: unit_reworked_compare_struct_identical_values_different_order
+  - name: unit_compare_classify_struct_identical_values_different_order
     model: unit_reworked_compare_struct
-    description: Snowflake sorts objects' keys alphabetically, so sort order is ignored.
+    description: Objects' keys are sorted alphabetically, so sort order is ignored.
     given:
       - input: ref('unit_test_struct_model_a')
         format: sql
-        rows: |
-          select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
+        fixture: simple_struct
       - input: ref('unit_test_struct_model_b')
         format: sql
-        rows: |
-          select 1 as id, 'John Doe' as col1, object_construct('state', 'CA', 'street', '123 Main St', 'city', 'Anytown') as col2
+        fixture: simple_struct_different_order
     expect:
       rows:
         - {"id": 1, "dbt_audit_row_status": "identical", "dbt_audit_num_rows_in_status": 1}
     overrides:
       vars:
-        reworked_compare__columns: ['id', 'col1', 'col2']
-        reworked_compare__event_time:
-        reworked_compare__primary_key_columns: ['id']
+        compare_classify__columns: ['id', 'col1', 'col2']
+        compare_classify__event_time:
+        compare_classify__primary_key_columns: ['id']
+    config:
+      tags: "{{ 'skip' if (target.type in ['bigquery', 'databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
 
-  - name: unit_reworked_compare_struct_removed_key
+  - name: unit_compare_classify_struct_removed_key
     model: unit_reworked_compare_struct
     given:
       - input: ref('unit_test_struct_model_a')
         format: sql
-        rows: |
-          select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
+        fixture: simple_struct
       - input: ref('unit_test_struct_model_b')
         format: sql
-        rows: |
-          select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'state', 'CA') as col2
+        fixture: simple_struct_removed_key
     expect:
       rows:
         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
     overrides:
       vars:
-        reworked_compare__columns: ['id', 'col1', 'col2']
-        reworked_compare__event_time:
-        reworked_compare__primary_key_columns: ['id']
+        compare_classify__columns: ['id', 'col1', 'col2']
+        compare_classify__event_time:
+        compare_classify__primary_key_columns: ['id']
+    config:
+      tags: "{{ 'skip' if (target.type in ['bigquery', 'databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
 
-  - name: reworked_compare_complex_struct
+  - name: compare_classify_complex_struct
     model: unit_reworked_compare_struct
     given:
       - input: ref('unit_test_struct_model_a')
@@ -79,12 +77,13 @@ unit_tests:
         - {"id": 1, "dbt_audit_row_status": "identical", "dbt_audit_num_rows_in_status": 1}
     overrides:
       vars:
-        reworked_compare__columns: ['id', 'col1', 'col2']
-        reworked_compare__event_time:
-        reworked_compare__primary_key_columns: ['id']
-
+        compare_classify__columns: ['id', 'col1', 'col2']
+        compare_classify__event_time:
+        compare_classify__primary_key_columns: ['id']
+    config:
+      tags: "{{ 'skip' if (target.type in ['redshift', 'bigquery', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
 
-  - name: reworked_compare_complex_struct_different_values
+  - name: compare_classify_complex_struct_different_values
     model: unit_reworked_compare_struct
     given:
       - input: ref('unit_test_struct_model_a')
@@ -101,11 +100,13 @@ unit_tests:
         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
     overrides:
       vars:
-        reworked_compare__columns: ['id', 'col1', 'col2']
-        reworked_compare__event_time:
-        reworked_compare__primary_key_columns: ['id']
+        compare_classify__columns: ['id', 'col1', 'col2']
+        compare_classify__event_time:
+        compare_classify__primary_key_columns: ['id']
+    config:
+      tags: "{{ 'skip' if (target.type in ['redshift', 'bigquery', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
 
-  - name: unit_reworked_compare_complex_struct_identical_values_different_order
+  - name: unit_compare_classify_complex_struct_identical_values_different_order
     model: unit_reworked_compare_struct
     description: Snowflake sorts objects' keys alphabetically, but respects the order items are added to arrays so differences are detected.
     given:
@@ -123,8 +124,8 @@ unit_tests:
         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
     overrides:
       vars:
-        reworked_compare__columns: ['id', 'col1', 'col2']
-        reworked_compare__event_time:
-        reworked_compare__primary_key_columns: ['id']
-
-
+        compare_classify__columns: ['id', 'col1', 'col2']
+        compare_classify__event_time:
+        compare_classify__primary_key_columns: ['id']
+    config:
+      tags: "{{ 'skip' if (target.type in ['redshift', 'bigquery', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
diff --git a/integration_tests/tests/fixtures/simple_struct.sql b/integration_tests/tests/fixtures/simple_struct.sql
new file mode 100644
index 00000000..006e62f6
--- /dev/null
+++ b/integration_tests/tests/fixtures/simple_struct.sql
@@ -0,0 +1,14 @@
+{% if target.name != 'redshift' %}
+
+select 
+    1 as id, 
+    'John Doe' as col1, 
+    {{ audit_helper_integration_tests._basic_json_function() -}}('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
+
+{% else %}
+
+select 
+  1 AS id, 
+  'John Doe' AS col1, 
+  json_parse('{"street": "123 Main St", "city": "Anytown", "state": "CA"}') AS col2
+{% endif %}
\ No newline at end of file
diff --git a/integration_tests/tests/fixtures/simple_struct_different_order.sql b/integration_tests/tests/fixtures/simple_struct_different_order.sql
new file mode 100644
index 00000000..ee89fb70
--- /dev/null
+++ b/integration_tests/tests/fixtures/simple_struct_different_order.sql
@@ -0,0 +1,14 @@
+{% if target.name != 'redshift' %}
+
+select 
+    1 as id, 
+    'John Doe' as col1, 
+    {{ audit_helper_integration_tests._basic_json_function() -}}( 'state', 'CA', 'street', '123 Main St', 'city', 'Anytown') as col2
+
+{% else %}
+
+select 
+  1 AS id, 
+  'John Doe' AS col1, 
+  json_parse('{"state": "CA", "street": "123 Main St", "city": "Anytown"}') AS col2
+{% endif %}
\ No newline at end of file
diff --git a/integration_tests/tests/fixtures/simple_struct_removed_key.sql b/integration_tests/tests/fixtures/simple_struct_removed_key.sql
new file mode 100644
index 00000000..ae3084bd
--- /dev/null
+++ b/integration_tests/tests/fixtures/simple_struct_removed_key.sql
@@ -0,0 +1,14 @@
+{% if target.name != 'redshift' %}
+
+select 
+    1 as id, 
+    'John Doe' as col1, 
+    {{ audit_helper_integration_tests._basic_json_function() -}}('street', '123 Main St', 'state', 'CA') as col2
+
+{% else %}
+
+select 
+  1 AS id, 
+  'John Doe' AS col1, 
+  json_parse('{"street": "123 Main St", "state": "CA"}') AS col2
+{% endif %}
\ No newline at end of file
diff --git a/macros/quick_are_queries_identical.sql b/macros/quick_are_queries_identical.sql
index add26638..0a049ad4 100644
--- a/macros/quick_are_queries_identical.sql
+++ b/macros/quick_are_queries_identical.sql
@@ -12,6 +12,49 @@ but it's a good way to quickly verify identical results if that's what you're ex
 {% endmacro %}
 
 {% macro default__quick_are_queries_identical(query_a, query_b, columns, event_time) %}
+    {% if execute %}
+        {# Need to only throw this error when the macro is actually trying to be used, not during intial parse phase #}
+        {# if/when unit tests get support for `enabled` config, this check can be removed as they won't be supplied for parse anyway #}
+        {% do exceptions.raise_compiler_error("quick_are_queries_identical() is not implemented for adapter '"~ target.type ~ "'" ) %}
+    {% endif %}
+{% endmacro %}
+
+{% macro bigquery__quick_are_queries_identical(query_a, query_b, columns, event_time) %}
+    {% set joined_cols = columns | join(", ") %}
+    {% if event_time %}
+        {% set event_time_props = audit_helper.get_comparison_bounds(a_query, b_query, event_time) %}
+    {% endif %}
+
+    with query_a as (
+        select {{ joined_cols }}
+        from ({{ query_a }})
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ), 
+    query_b as (
+        select {{ joined_cols }}
+        from ({{ query_b }})
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    )
+
+    select count(distinct hash_result) = 1 as are_tables_identical
+    from (
+        select bit_xor(farm_fingerprint(to_json_string(query_a))) as hash_result
+        from query_a
+
+        union all
+        
+        select bit_xor(farm_fingerprint(to_json_string(query_b))) as hash_result
+        from query_b
+    ) as hashes
+{% endmacro %}
+
+{% macro snowflake__quick_are_queries_identical(query_a, query_b, columns, event_time) %}
     {% set joined_cols = columns | join(", ") %}
     {% if event_time %}
         {% set event_time_props = audit_helper.get_comparison_bounds(a_query, b_query, event_time) %}
@@ -36,16 +79,4 @@ but it's a good way to quickly verify identical results if that's what you're ex
         {% endif %}
 
     ) as hashes
-{% endmacro %}
-
-{% macro is_quick_are_queries_identical_supported() %}
-    {{ return (adapter.dispatch('is_quick_are_queries_identical_supported', 'audit_helper')()) }}
-{% endmacro %}
-
-{% macro default__is_quick_are_queries_identical_supported() %}
-    {{ return (False) }}
-{% endmacro %}
-
-{% macro snowflake__is_quick_are_queries_identical_supported() %}
-    {{ return (True) }}
 {% endmacro %}
\ No newline at end of file
diff --git a/macros/reworked_compare.sql b/macros/reworked_compare.sql
index e7838833..0025bf21 100644
--- a/macros/reworked_compare.sql
+++ b/macros/reworked_compare.sql
@@ -1,4 +1,4 @@
-{% macro reworked_compare(a_query, b_query, primary_key_columns=[], columns=[], event_time=None, sample_limit=20) %}
+{% macro compare_and_classify_query_results(a_query, b_query, primary_key_columns=[], columns=[], event_time=None, sample_limit=20) %}
     
     {% set joined_cols = columns | join(", ") %}
 
@@ -38,7 +38,6 @@
 
     ),
 
-
     classified as (
         select 
             *,
@@ -50,6 +49,8 @@
         select 
             *,
             {{ audit_helper._count_num_rows_in_status() }} as dbt_audit_num_rows_in_status,
+            -- using dense_rank so that modified rows (which have a full row for both the left and right side) both get picked up in the sample. 
+            -- For every other type this is equivalent to a row_number()
             dense_rank() over (partition by dbt_audit_row_status order by dbt_audit_surrogate_key, dbt_audit_pk_row_num) as dbt_audit_sample_number
         from classified
     )

From 98d0c8ab36afa3323f9f1fac998b53ca764ad5a4 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 28 May 2024 22:40:38 +1200
Subject: [PATCH 05/13] Rename files

---
 ...ked_compare.sql => unit_compare_classify.sql} |  0
 ...ked_compare.yml => unit_compare_classify.yml} | 16 ++++++++--------
 ...ruct.sql => unit_compare_classify_struct.sql} |  0
 ...ruct.yml => unit_compare_classify_struct.yml} | 12 ++++++------
 4 files changed, 14 insertions(+), 14 deletions(-)
 rename integration_tests/models/unit_test_wrappers/{unit_reworked_compare.sql => unit_compare_classify.sql} (100%)
 rename integration_tests/models/unit_test_wrappers/{unit_reworked_compare.yml => unit_compare_classify.yml} (97%)
 rename integration_tests/models/unit_test_wrappers/{unit_reworked_compare_struct.sql => unit_compare_classify_struct.sql} (100%)
 rename integration_tests/models/unit_test_wrappers/{unit_reworked_compare_struct.yml => unit_compare_classify_struct.yml} (96%)

diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql b/integration_tests/models/unit_test_wrappers/unit_compare_classify.sql
similarity index 100%
rename from integration_tests/models/unit_test_wrappers/unit_reworked_compare.sql
rename to integration_tests/models/unit_test_wrappers/unit_compare_classify.sql
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml b/integration_tests/models/unit_test_wrappers/unit_compare_classify.yml
similarity index 97%
rename from integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
rename to integration_tests/models/unit_test_wrappers/unit_compare_classify.yml
index ffa30c9a..4dbf6c59 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_compare_classify.yml
@@ -1,6 +1,6 @@
 unit_tests:
   - name: compare_classify_identical_tables
-    model: unit_reworked_compare
+    model: unit_compare_classify
     
     given:
       - input: ref('unit_test_model_a')
@@ -27,7 +27,7 @@ unit_tests:
         compare_classify__primary_key_columns: ['id']
 
   - name: compare_classify_identical_tables_event_time_filter
-    model: unit_reworked_compare
+    model: unit_compare_classify
     overrides:
       vars:
         compare_classify__columns: ['id', 'col1', 'col2', 'created_at']
@@ -56,7 +56,7 @@ unit_tests:
         - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 2}
     
   - name: compare_classify_all_statuses
-    model: unit_reworked_compare
+    model: unit_compare_classify
     overrides:
       vars:
         compare_classify__columns: ['id', 'col1', 'col2']
@@ -85,7 +85,7 @@ unit_tests:
       tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-core/issues/10167
 
   - name: compare_classify_identical_tables_multiple_pk_cols
-    model: unit_reworked_compare
+    model: unit_compare_classify
     overrides:
       vars:
         compare_classify__columns: ['id', 'id_2', 'col1', 'col2']
@@ -109,7 +109,7 @@ unit_tests:
         - {"dbt_audit_row_status": 'identical', 'id': 3, "id_2": 4, "dbt_audit_num_rows_in_status": 3}
 
   - name: compare_classify_identical_tables_single_null_pk
-    model: unit_reworked_compare
+    model: unit_compare_classify
     description: "`nonunique_pk` status checks whether a PK is unique. It's intended to avoid arbitrary comparisons, not protect against null records (that's what constraints or tests are for)."
     
     given:
@@ -139,7 +139,7 @@ unit_tests:
       tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
   - name: compare_classify_identical_tables_multiple_null_pk
-    model: unit_reworked_compare
+    model: unit_compare_classify
     
     given:
       - input: ref('unit_test_model_a')
@@ -169,7 +169,7 @@ unit_tests:
 
   - name: compare_classify_identical_tables_multi_null_pk_dupe_rows
     description: All rows with a null ID are identical. They should be returned as individual rows instead of being combined
-    model: unit_reworked_compare
+    model: unit_compare_classify
     
     given:
       - input: ref('unit_test_model_a')
@@ -200,7 +200,7 @@ unit_tests:
       tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
   - name: compare_classify_all_statuses_different_column_set
-    model: unit_reworked_compare
+    model: unit_compare_classify
     overrides:
       vars:
         compare_classify__primary_key_columns: ['id']
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.sql b/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.sql
similarity index 100%
rename from integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.sql
rename to integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.sql
diff --git a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml b/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.yml
similarity index 96%
rename from integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml
rename to integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.yml
index ac4cf8f3..f44d5e1a 100644
--- a/integration_tests/models/unit_test_wrappers/unit_reworked_compare_struct.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.yml
@@ -1,6 +1,6 @@
 unit_tests:
   - name: compare_classify_simple_struct
-    model: unit_reworked_compare_struct
+    model: unit_compare_classify_struct
     given:
       - input: ref('unit_test_struct_model_a')
         format: sql
@@ -20,7 +20,7 @@ unit_tests:
       tags: "{{ 'skip' if (target.type in ['bigquery', 'databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
 
   - name: unit_compare_classify_struct_identical_values_different_order
-    model: unit_reworked_compare_struct
+    model: unit_compare_classify_struct
     description: Objects' keys are sorted alphabetically, so sort order is ignored.
     given:
       - input: ref('unit_test_struct_model_a')
@@ -41,7 +41,7 @@ unit_tests:
       tags: "{{ 'skip' if (target.type in ['bigquery', 'databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
 
   - name: unit_compare_classify_struct_removed_key
-    model: unit_reworked_compare_struct
+    model: unit_compare_classify_struct
     given:
       - input: ref('unit_test_struct_model_a')
         format: sql
@@ -62,7 +62,7 @@ unit_tests:
       tags: "{{ 'skip' if (target.type in ['bigquery', 'databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
 
   - name: compare_classify_complex_struct
-    model: unit_reworked_compare_struct
+    model: unit_compare_classify_struct
     given:
       - input: ref('unit_test_struct_model_a')
         format: sql
@@ -84,7 +84,7 @@ unit_tests:
       tags: "{{ 'skip' if (target.type in ['redshift', 'bigquery', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
 
   - name: compare_classify_complex_struct_different_values
-    model: unit_reworked_compare_struct
+    model: unit_compare_classify_struct
     given:
       - input: ref('unit_test_struct_model_a')
         format: sql
@@ -107,7 +107,7 @@ unit_tests:
       tags: "{{ 'skip' if (target.type in ['redshift', 'bigquery', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
 
   - name: unit_compare_classify_complex_struct_identical_values_different_order
-    model: unit_reworked_compare_struct
+    model: unit_compare_classify_struct
     description: Snowflake sorts objects' keys alphabetically, but respects the order items are added to arrays so differences are detected.
     given:
       - input: ref('unit_test_struct_model_a')

From d744647897914b75f507c55e88ec3159803fafbc Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Tue, 28 May 2024 22:41:03 +1200
Subject: [PATCH 06/13] rename macro file

---
 ...eworked_compare.sql => compare_and_classify_query_results.sql} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename macros/{reworked_compare.sql => compare_and_classify_query_results.sql} (100%)

diff --git a/macros/reworked_compare.sql b/macros/compare_and_classify_query_results.sql
similarity index 100%
rename from macros/reworked_compare.sql
rename to macros/compare_and_classify_query_results.sql

From 6835ec90d1d9a998bb5b29ca5cc63b81331beab2 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Wed, 29 May 2024 13:48:29 +1200
Subject: [PATCH 07/13] Add relation_focused macros

---
 macros/compare_and_classify_relation_rows.sql    | 16 ++++++++++++++++
 macros/quick_are_relations_identical.sql         | 14 ++++++++++++++
 .../_get_intersecting_columns_from_relations.sql | 13 +++++++++++++
 3 files changed, 43 insertions(+)
 create mode 100644 macros/compare_and_classify_relation_rows.sql
 create mode 100644 macros/quick_are_relations_identical.sql
 create mode 100644 macros/utils/_get_intersecting_columns_from_relations.sql

diff --git a/macros/compare_and_classify_relation_rows.sql b/macros/compare_and_classify_relation_rows.sql
new file mode 100644
index 00000000..d7c6f0f3
--- /dev/null
+++ b/macros/compare_and_classify_relation_rows.sql
@@ -0,0 +1,16 @@
+{% macro compare_and_classify_relation_rows(a_relation, b_relation, primary_key_columns=[], columns=None, event_time=None, sample_limit=20) %}
+    {%- if not columns -%}
+        {%- set columns = audit_helper._get_intersecting_columns_from_relations(a_relation, b_relation) -%}
+    {%- endif -%}
+
+    {{ 
+        audit_helper.compare_and_classify_query_results(
+            "select * from " ~ a_relation,
+            "select * from " ~ b_relation,
+            primary_key_columns,
+            columns,
+            event_time,
+            sample_limit
+        )
+    }}
+{% endmacro %}
\ No newline at end of file
diff --git a/macros/quick_are_relations_identical.sql b/macros/quick_are_relations_identical.sql
new file mode 100644
index 00000000..2b3173da
--- /dev/null
+++ b/macros/quick_are_relations_identical.sql
@@ -0,0 +1,14 @@
+{% macro quick_are_relations_identical(a_relation, b_relation, columns=None, event_time=None) %}
+    {% if not columns %}
+        {% set columns = audit_helper._get_intersecting_columns_from_relations(a_relation, b_relation) %}
+    {% endif %}
+
+    {{
+        audit_helper.quick_are_queries_identical(
+            "select * from " ~ a_relation,
+            "select * from " ~ b_relation,
+            columns, 
+            event_time
+        )
+    }}
+{% endmacro %}
\ No newline at end of file
diff --git a/macros/utils/_get_intersecting_columns_from_relations.sql b/macros/utils/_get_intersecting_columns_from_relations.sql
new file mode 100644
index 00000000..18d2ccb1
--- /dev/null
+++ b/macros/utils/_get_intersecting_columns_from_relations.sql
@@ -0,0 +1,13 @@
+{% macro _get_intersecting_columns_from_relations(a_relation, b_relation) %}        
+    {%- set a_cols = dbt_utils.get_filtered_columns_in_relation(a_relation) -%}
+    {%- set b_cols = dbt_utils.get_filtered_columns_in_relation(b_relation) -%}
+    
+    {%- set intersection = [] -%}
+    {%- for col in a_cols -%}
+        {%- if col in b_cols -%}
+            {%- do intersection.append(col) -%}
+        {%- endif -%}
+    {%- endfor -%}
+
+    {% do return(intersection) %}
+{% endmacro %}
\ No newline at end of file

From da194ec1f65f74fb2a237c56c2d11a8dac101834 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Wed, 5 Jun 2024 15:46:19 +1200
Subject: [PATCH 08/13] Add BQ-specific generate_set_results for hashes, enable
 json tests

---
 .../unit_compare_classify_struct.yml          | 12 ++--
 macros/utils/_generate_set_results.sql        | 64 +++++++++++++++++++
 2 files changed, 70 insertions(+), 6 deletions(-)

diff --git a/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.yml b/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.yml
index f44d5e1a..b7f82a48 100644
--- a/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.yml
@@ -17,7 +17,7 @@ unit_tests:
         compare_classify__event_time:
         compare_classify__primary_key_columns: ['id']
     config:
-      tags: "{{ 'skip' if (target.type in ['bigquery', 'databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
+      tags: "{{ 'skip' if (target.type in ['databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
 
   - name: unit_compare_classify_struct_identical_values_different_order
     model: unit_compare_classify_struct
@@ -38,7 +38,7 @@ unit_tests:
         compare_classify__event_time:
         compare_classify__primary_key_columns: ['id']
     config:
-      tags: "{{ 'skip' if (target.type in ['bigquery', 'databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
+      tags: "{{ 'skip' if (target.type in ['databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
 
   - name: unit_compare_classify_struct_removed_key
     model: unit_compare_classify_struct
@@ -59,7 +59,7 @@ unit_tests:
         compare_classify__event_time:
         compare_classify__primary_key_columns: ['id']
     config:
-      tags: "{{ 'skip' if (target.type in ['bigquery', 'databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
+      tags: "{{ 'skip' if (target.type in ['databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
 
   - name: compare_classify_complex_struct
     model: unit_compare_classify_struct
@@ -81,7 +81,7 @@ unit_tests:
         compare_classify__event_time:
         compare_classify__primary_key_columns: ['id']
     config:
-      tags: "{{ 'skip' if (target.type in ['redshift', 'bigquery', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
+      tags: "{{ 'skip' if (target.type in ['redshift', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
 
   - name: compare_classify_complex_struct_different_values
     model: unit_compare_classify_struct
@@ -104,7 +104,7 @@ unit_tests:
         compare_classify__event_time:
         compare_classify__primary_key_columns: ['id']
     config:
-      tags: "{{ 'skip' if (target.type in ['redshift', 'bigquery', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
+      tags: "{{ 'skip' if (target.type in ['redshift', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
 
   - name: unit_compare_classify_complex_struct_identical_values_different_order
     model: unit_compare_classify_struct
@@ -128,4 +128,4 @@ unit_tests:
         compare_classify__event_time:
         compare_classify__primary_key_columns: ['id']
     config:
-      tags: "{{ 'skip' if (target.type in ['redshift', 'bigquery', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
+      tags: "{{ 'skip' if (target.type in ['redshift', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
diff --git a/macros/utils/_generate_set_results.sql b/macros/utils/_generate_set_results.sql
index 848ff9e2..a5400714 100644
--- a/macros/utils/_generate_set_results.sql
+++ b/macros/utils/_generate_set_results.sql
@@ -72,6 +72,70 @@
     )
 {% endmacro %}
 
+
+
+{% macro bigquery___generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
+    {% set joined_cols = columns | join(", ") %}
+    subset_columns_a as (
+        select 
+            {{ joined_cols }}, 
+            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
+            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num
+        from ( {{-  a_query  -}} )
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    subset_columns_b as (
+        select 
+            {{ joined_cols }}, 
+            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
+            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num
+        from ( {{-  b_query  -}} )
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    a as (
+        select
+            *,
+            farm_fingerprint(to_json_string(subset_columns_a)) as dbt_audit_row_hash
+        from subset_columns_a
+    ), 
+
+    b as (
+        select
+            *,
+            farm_fingerprint(to_json_string(subset_columns_b)) as dbt_audit_row_hash
+        from subset_columns_b
+    ),
+
+    a_intersect_b as (
+
+        select * from a
+        where a.dbt_audit_row_hash in (select b.dbt_audit_row_hash from b)
+
+    ),
+
+    a_except_b as (
+
+        select * from a
+        where a.dbt_audit_row_hash not in (select b.dbt_audit_row_hash from b)
+
+    ),
+
+    b_except_a as (
+
+        select * from b
+        where b.dbt_audit_row_hash not in (select a.dbt_audit_row_hash from a)
+
+    )
+{% endmacro %}
+
 {% macro snowflake___generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
     {% set joined_cols = columns | join(", ") %}
     a as (

From eb1e6227b8d77e6a6edcad587c7fa3444325176f Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Thu, 6 Jun 2024 13:38:54 +1200
Subject: [PATCH 09/13] Implement hash comparisons for BQ and DBX (#103)

* disable tests for unrelated adapters

* Avoid lateral column aliasing

* First cross-db complex struct fixture

* Add final fixtures

* Initial work on dbx compatibility

* remove lateral column alias dbx

* cast everything as string before hashing

* add comment, enable all tests again

* rename to dbt_audit_in_a instead of in_a

* Protect against missing PK columns

* gitignore package-lock.yml

* add dbx variant of simple structs
---
 .gitignore                                    |  2 +
 .../unit_tests/struct_generation_macros.sql   | 18 ++++-
 .../unit_compare_classify.yml                 | 29 +++++++-
 .../unit_compare_classify_struct.yml          | 67 +++++++++++-------
 .../tests/fixtures/complex_struct.sql         |  8 +++
 .../complex_struct_different_order.sql        |  8 +++
 .../complex_struct_different_value.sql        |  8 +++
 macros/compare_and_classify_query_results.sql | 13 ++--
 macros/utils/_classify_audit_row_status.sql   | 20 +++---
 .../_ensure_all_pks_are_in_column_set.sql     | 19 ++++++
 macros/utils/_generate_set_results.sql        | 68 +++++++++++++++++--
 11 files changed, 211 insertions(+), 49 deletions(-)
 create mode 100644 integration_tests/tests/fixtures/complex_struct.sql
 create mode 100644 integration_tests/tests/fixtures/complex_struct_different_order.sql
 create mode 100644 integration_tests/tests/fixtures/complex_struct_different_value.sql
 create mode 100644 macros/utils/_ensure_all_pks_are_in_column_set.sql

diff --git a/.gitignore b/.gitignore
index 0606e5c3..20eb2532 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@ dbt_packages/
 logs/
 logfile
 .DS_Store
+package-lock.yml
+integration_tests/package-lock.yml
diff --git a/integration_tests/macros/unit_tests/struct_generation_macros.sql b/integration_tests/macros/unit_tests/struct_generation_macros.sql
index 187c64b4..2c6767e4 100644
--- a/integration_tests/macros/unit_tests/struct_generation_macros.sql
+++ b/integration_tests/macros/unit_tests/struct_generation_macros.sql
@@ -7,6 +7,20 @@
         map
     {%- elif execute -%}
         {# Only raise exception if it's actually being called, not during parsing #}
-        {%- do exceptions.raise_compiler_error("Unknown adapter '"~ target.type ~ "'")-%}
+        {%- do exceptions.raise_compiler_error("Unknown adapter '"~ target.type ~ "'") -%}
     {%- endif -%}
-{%- endmacro -%}
\ No newline at end of file
+{%- endmacro -%}
+
+{% macro _complex_json_function(json) %}
+
+    {% if target.type == 'redshift' %}
+        json_parse({{ json }})
+    {% elif target.type == 'databricks' %}
+        from_json({{ json }}, schema_of_json({{ json }}))
+    {% elif target.type in ['snowflake', 'bigquery'] %}
+        parse_json({{ json }})
+    {% elif execute %}
+        {# Only raise exception if it's actually being called, not during parsing #}
+        {%- do exceptions.raise_compiler_error("Unknown adapter '"~ target.type ~ "'") -%}    
+    {% endif %}
+{% endmacro %}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_compare_classify.yml b/integration_tests/models/unit_test_wrappers/unit_compare_classify.yml
index 4dbf6c59..c38753b1 100644
--- a/integration_tests/models/unit_test_wrappers/unit_compare_classify.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_compare_classify.yml
@@ -155,9 +155,9 @@ unit_tests:
         
     expect:
       rows:
-        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 1}
         - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 2}
         - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 2}
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 1}
 
     overrides:
       vars:
@@ -227,3 +227,30 @@ unit_tests:
         - {"dbt_audit_row_status": 'removed', 'id': 3, dbt_audit_num_rows_in_status: 1}
     config:
       tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-core/issues/10167
+
+  - name: compare_classify_identical_tables_without_pk_in_cols_list
+    model: unit_compare_classify
+    
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"dbt_audit_row_status": 'identical', 'id': 1, dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 3}
+        - {"dbt_audit_row_status": 'identical', 'id': 2, dbt_audit_num_rows_in_status: 3}
+
+    overrides:
+      vars:
+        compare_classify__columns: ['col1', 'col2']
+        compare_classify__event_time:
+        compare_classify__primary_key_columns: ['id']
diff --git a/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.yml b/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.yml
index b7f82a48..98f634d1 100644
--- a/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.yml
@@ -16,12 +16,12 @@ unit_tests:
         compare_classify__columns: ['id', 'col1', 'col2']
         compare_classify__event_time:
         compare_classify__primary_key_columns: ['id']
-    config:
-      tags: "{{ 'skip' if (target.type in ['databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
+    # config:
+    #   tags: "{{ 'skip' if (target.type in ['databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
 
   - name: unit_compare_classify_struct_identical_values_different_order
     model: unit_compare_classify_struct
-    description: Objects' keys are sorted alphabetically, so sort order is ignored.
+    description: Objects' keys are generally sorted alphabetically, so sort order is ignored.
     given:
       - input: ref('unit_test_struct_model_a')
         format: sql
@@ -38,7 +38,31 @@ unit_tests:
         compare_classify__event_time:
         compare_classify__primary_key_columns: ['id']
     config:
-      tags: "{{ 'skip' if (target.type in ['databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
+      #Databricks cares about the order and considers it a difference. We're not trying to have identical behaviour across warehouses so that's OK.
+      tags: "{{ 'skip' if (target.type in ['databricks']) else 'runnable' }}"
+
+  - name: unit_compare_classify_struct_identical_values_different_order_dbx
+    model: unit_compare_classify_struct
+    description: Most platforms don't care about sort order. Databricks does.
+    given:
+      - input: ref('unit_test_struct_model_a')
+        format: sql
+        fixture: simple_struct
+      - input: ref('unit_test_struct_model_b')
+        format: sql
+        fixture: simple_struct_different_order
+    expect:
+      rows:
+        - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
+        - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
+    overrides:
+      vars:
+        compare_classify__columns: ['id', 'col1', 'col2']
+        compare_classify__event_time:
+        compare_classify__primary_key_columns: ['id']
+    config:
+      #Only for databricks
+      tags: "{{ 'skip' if (target.type not in ['databricks']) else 'runnable' }}"
 
   - name: unit_compare_classify_struct_removed_key
     model: unit_compare_classify_struct
@@ -58,20 +82,18 @@ unit_tests:
         compare_classify__columns: ['id', 'col1', 'col2']
         compare_classify__event_time:
         compare_classify__primary_key_columns: ['id']
-    config:
-      tags: "{{ 'skip' if (target.type in ['databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
+    # config:
+    #   tags: "{{ 'skip' if (target.type in ['databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
 
   - name: compare_classify_complex_struct
     model: unit_compare_classify_struct
     given:
       - input: ref('unit_test_struct_model_a')
         format: sql
-        rows: |
-          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.doe@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'home', 'number', '123-456-7890'), object_construct('type', 'work', 'number', '987-654-3210'))) as col2
+        fixture: complex_struct
       - input: ref('unit_test_struct_model_b')
         format: sql
-        rows: |
-          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.doe@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'home', 'number', '123-456-7890'), object_construct('type', 'work', 'number', '987-654-3210'))) as col2
+        fixture: complex_struct
     expect:
       rows:
         - {"id": 1, "dbt_audit_row_status": "identical", "dbt_audit_num_rows_in_status": 1}
@@ -80,20 +102,19 @@ unit_tests:
         compare_classify__columns: ['id', 'col1', 'col2']
         compare_classify__event_time:
         compare_classify__primary_key_columns: ['id']
-    config:
-      tags: "{{ 'skip' if (target.type in ['redshift', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
+    # config:
+    #   tags: "{{ 'skip' if (target.type in ['redshift', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
 
   - name: compare_classify_complex_struct_different_values
     model: unit_compare_classify_struct
     given:
       - input: ref('unit_test_struct_model_a')
         format: sql
-        rows: |
-          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.doe@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'home', 'number', '123-456-7890'), object_construct('type', 'work', 'number', '987-654-3210'))) as col2
+        fixture: complex_struct
       - input: ref('unit_test_struct_model_b')
         format: sql
-        rows: |
-          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.smith@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'home', 'number', '123-456-7890'), object_construct('type', 'work', 'number', '987-654-3210'))) as col2
+        fixture: complex_struct_different_value
+          
     expect:
       rows:
         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
@@ -103,8 +124,8 @@ unit_tests:
         compare_classify__columns: ['id', 'col1', 'col2']
         compare_classify__event_time:
         compare_classify__primary_key_columns: ['id']
-    config:
-      tags: "{{ 'skip' if (target.type in ['redshift', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
+    # config:
+    #   tags: "{{ 'skip' if (target.type in ['redshift', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
 
   - name: unit_compare_classify_complex_struct_identical_values_different_order
     model: unit_compare_classify_struct
@@ -112,12 +133,10 @@ unit_tests:
     given:
       - input: ref('unit_test_struct_model_a')
         format: sql
-        rows: |
-          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.doe@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'home', 'number', '123-456-7890'), object_construct('type', 'work', 'number', '987-654-3210'))) as col2
+        fixture: complex_struct
       - input: ref('unit_test_struct_model_b')
         format: sql
-        rows: |
-          select 1 as id, 'John Doe' as col1, object_construct('emails', array_construct('john.doe@example.com', 'john.d@example.com'), 'phones', array_construct(object_construct('type', 'work', 'number', '987-654-3210'), object_construct('type', 'home', 'number', '123-456-7890'))) as col2
+        fixture: complex_struct_different_order
     expect:
       rows:
         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
@@ -127,5 +146,5 @@ unit_tests:
         compare_classify__columns: ['id', 'col1', 'col2']
         compare_classify__event_time:
         compare_classify__primary_key_columns: ['id']
-    config:
-      tags: "{{ 'skip' if (target.type in ['redshift', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
+    # config:
+    #   tags: "{{ 'skip' if (target.type in ['redshift', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
diff --git a/integration_tests/tests/fixtures/complex_struct.sql b/integration_tests/tests/fixtures/complex_struct.sql
new file mode 100644
index 00000000..b96206c3
--- /dev/null
+++ b/integration_tests/tests/fixtures/complex_struct.sql
@@ -0,0 +1,8 @@
+{% set json %}
+    '{"emails":["john.doe@example.com","john.d@example.com"],"phones":[{"number":"123-456-7890","type":"home"},{"number":"987-654-3210","type":"work"}]}'
+{% endset %}
+
+select 
+    1 as id, 
+    'John Doe' as col1, 
+    {{ audit_helper_integration_tests._complex_json_function(json) }} as col2
\ No newline at end of file
diff --git a/integration_tests/tests/fixtures/complex_struct_different_order.sql b/integration_tests/tests/fixtures/complex_struct_different_order.sql
new file mode 100644
index 00000000..24ead4fc
--- /dev/null
+++ b/integration_tests/tests/fixtures/complex_struct_different_order.sql
@@ -0,0 +1,8 @@
+{% set json %}
+    '{"emails":["john.doe@example.com","john.d@example.com"],"phones":[{"number":"987-654-3210","type":"work"}, {"number":"123-456-7890","type":"home"}]}'
+{% endset %}
+
+select 
+    1 as id, 
+    'John Doe' as col1, 
+    {{ audit_helper_integration_tests._complex_json_function(json) }} as col2
\ No newline at end of file
diff --git a/integration_tests/tests/fixtures/complex_struct_different_value.sql b/integration_tests/tests/fixtures/complex_struct_different_value.sql
new file mode 100644
index 00000000..5446b11a
--- /dev/null
+++ b/integration_tests/tests/fixtures/complex_struct_different_value.sql
@@ -0,0 +1,8 @@
+{% set json %}
+'{"emails":["john.smith@example.com","john.s@example.com"],"phones":[{"number":"123-456-7890","type":"home"},{"number":"987-654-3210","type":"work"}]}'
+{% endset %}
+
+select 
+    1 as id, 
+    'John Doe' as col1, 
+    {{ audit_helper_integration_tests._complex_json_function(json) }} as col2
\ No newline at end of file
diff --git a/macros/compare_and_classify_query_results.sql b/macros/compare_and_classify_query_results.sql
index 0025bf21..b5b40c58 100644
--- a/macros/compare_and_classify_query_results.sql
+++ b/macros/compare_and_classify_query_results.sql
@@ -1,5 +1,6 @@
 {% macro compare_and_classify_query_results(a_query, b_query, primary_key_columns=[], columns=[], event_time=None, sample_limit=20) %}
     
+    {% set columns = audit_helper._ensure_all_pks_are_in_column_set(primary_key_columns, columns) %}
     {% set joined_cols = columns | join(", ") %}
 
     {% if event_time %}
@@ -16,24 +17,24 @@
 
         select
             *,
-            true as in_a,
-            true as in_b
+            true as dbt_audit_in_a,
+            true as dbt_audit_in_b
         from a_intersect_b
 
         union all
 
         select
             *,
-            true as in_a,
-            false as in_b
+            true as dbt_audit_in_a,
+            false as dbt_audit_in_b
         from a_except_b
 
         union all
 
         select
             *,
-            false as in_a,
-            true as in_b
+            false as dbt_audit_in_a,
+            true as dbt_audit_in_b
         from b_except_a
 
     ),
diff --git a/macros/utils/_classify_audit_row_status.sql b/macros/utils/_classify_audit_row_status.sql
index e28e3f4e..73dd631d 100644
--- a/macros/utils/_classify_audit_row_status.sql
+++ b/macros/utils/_classify_audit_row_status.sql
@@ -5,12 +5,12 @@
 {%- macro default___classify_audit_row_status() -%}
     case 
         when max(dbt_audit_pk_row_num) over (partition by dbt_audit_surrogate_key) > 1 then 'nonunique_pk'
-        when in_a and in_b then 'identical'
-        when {{ dbt.bool_or('in_a') }} over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) 
-            and {{ dbt.bool_or('in_b') }} over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num)
+        when dbt_audit_in_a and dbt_audit_in_b then 'identical'
+        when {{ dbt.bool_or('dbt_audit_in_a') }} over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) 
+            and {{ dbt.bool_or('dbt_audit_in_b') }} over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num)
             then 'modified'
-        when in_a then 'removed'
-        when in_b then 'added'
+        when dbt_audit_in_a then 'removed'
+        when dbt_audit_in_b then 'added'
     end
 {% endmacro %}
 
@@ -19,10 +19,10 @@
     {#- Redshift doesn't support bitwise operations (e.g. bool_or) inside of a window function :( -#}
     case 
         when max(dbt_audit_pk_row_num) over (partition by dbt_audit_surrogate_key) > 1 then 'nonunique_pk'
-        when in_a and in_b then 'identical'
-        when max(case when in_a then 1 else 0 end) over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) = 1
-            and max(case when in_b then 1 else 0 end) over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) = 1
+        when dbt_audit_in_a and dbt_audit_in_b then 'identical'
+        when max(case when dbt_audit_in_a then 1 else 0 end) over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) = 1
+            and max(case when dbt_audit_in_b then 1 else 0 end) over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) = 1
             then 'modified'
-        when in_a then 'removed'
-        when in_b then 'added'
+        when dbt_audit_in_a then 'removed'
+        when dbt_audit_in_b then 'added'
     end{% endmacro %}
\ No newline at end of file
diff --git a/macros/utils/_ensure_all_pks_are_in_column_set.sql b/macros/utils/_ensure_all_pks_are_in_column_set.sql
new file mode 100644
index 00000000..5c190541
--- /dev/null
+++ b/macros/utils/_ensure_all_pks_are_in_column_set.sql
@@ -0,0 +1,19 @@
+{# If someone forgot to include the PK columns in their main set of columns, fix it up for them #}
+{# Assuming that the PKs are the most important columns, so they go to the front of the list #}
+
+{% macro _ensure_all_pks_are_in_column_set(primary_key_columns, columns) %}
+    {% set lower_cols = columns | map('lower') | list %}
+    {% set missing_pks = [] %}
+
+    {% for pk in primary_key_columns %}
+        {% if pk | lower not in lower_cols %}
+            {% do missing_pks.append(pk) %}
+        {% endif %}
+    {% endfor %}
+
+    {% if missing_pks | length > 0 %}
+        {% set columns = missing_pks + columns %}
+    {% endif %}
+    
+    {% do return (columns) %}
+{% endmacro %}
\ No newline at end of file
diff --git a/macros/utils/_generate_set_results.sql b/macros/utils/_generate_set_results.sql
index a5400714..2705061a 100644
--- a/macros/utils/_generate_set_results.sql
+++ b/macros/utils/_generate_set_results.sql
@@ -72,15 +72,14 @@
     )
 {% endmacro %}
 
-
-
 {% macro bigquery___generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
     {% set joined_cols = columns | join(", ") %}
+    {% set surrogate_key = audit_helper.generate_null_safe_surrogate_key(primary_key_columns) %}
     subset_columns_a as (
         select 
             {{ joined_cols }}, 
-            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
-            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num
+            {{ surrogate_key }} as dbt_audit_surrogate_key,
+            row_number() over (partition by {{ surrogate_key }} order by 1 ) as dbt_audit_pk_row_num
         from ( {{-  a_query  -}} )
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
@@ -91,8 +90,8 @@
     subset_columns_b as (
         select 
             {{ joined_cols }}, 
-            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
-            row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num
+            {{ surrogate_key }} as dbt_audit_surrogate_key,
+            row_number() over (partition by {{ surrogate_key }} order by 1 ) as dbt_audit_pk_row_num
         from ( {{-  b_query  -}} )
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
@@ -136,6 +135,63 @@
     )
 {% endmacro %}
 
+{% macro databricks___generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
+    {% set cast_columns = [] %}
+    {# Map types can't be compared by default (you need to opt in to a legacy behaviour flag) #}
+    {# so everything needs to be cast as a string first :( #}
+    {% for col in columns %}
+        {% do cast_columns.append(dbt.cast(col, api.Column.translate_type("string"))) %}
+    {% endfor %}
+    {% set joined_cols = cast_columns | join(", ") %}
+    {% set surrogate_key = audit_helper.generate_null_safe_surrogate_key(primary_key_columns) %}
+    a as (
+        select 
+            {{ joined_cols }}, 
+            {{ surrogate_key }} as dbt_audit_surrogate_key,
+            row_number() over (partition by {{ surrogate_key }} order by 1 ) as dbt_audit_pk_row_num,
+            xxhash64({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
+        from ( {{-  a_query  -}} )
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    b as (
+        select 
+            {{ joined_cols }}, 
+            {{ surrogate_key }} as dbt_audit_surrogate_key,
+            row_number() over (partition by {{ surrogate_key }} order by 1 ) as dbt_audit_pk_row_num,
+            xxhash64({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
+        from ( {{-  b_query  -}} )
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    a_intersect_b as (
+
+        select * from a
+        where a.dbt_audit_row_hash in (select b.dbt_audit_row_hash from b)
+
+    ),
+
+    a_except_b as (
+
+        select * from a
+        where a.dbt_audit_row_hash not in (select b.dbt_audit_row_hash from b)
+
+    ),
+
+    b_except_a as (
+
+        select * from b
+        where b.dbt_audit_row_hash not in (select a.dbt_audit_row_hash from a)
+
+    )   
+{% endmacro %}
+
 {% macro snowflake___generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
     {% set joined_cols = columns | join(", ") %}
     a as (

From f0ebf2f7dcadc34f2a297dcd2968ca9e298b4f30 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Thu, 6 Jun 2024 14:11:39 +1200
Subject: [PATCH 10/13] Rename private macros to have _ prefix

---
 .../unit_compare_classify.yml                 |  2 +-
 .../unit_quick_are_queries_identical.yml      |  2 +-
 macros/compare_and_classify_query_results.sql |  2 +-
 macros/quick_are_queries_identical.sql        |  4 +-
 ...safe_sk.sql => _generate_null_safe_sk.sql} |  6 +-
 macros/utils/_generate_set_results.sql        | 66 +++++++++++++++++--
 ..._bounds.sql => _get_comparison_bounds.sql} |  2 +-
 7 files changed, 71 insertions(+), 13 deletions(-)
 rename macros/utils/{generate_null_safe_sk.sql => _generate_null_safe_sk.sql} (70%)
 rename macros/utils/{get_comparison_bounds.sql => _get_comparison_bounds.sql} (97%)

diff --git a/integration_tests/models/unit_test_wrappers/unit_compare_classify.yml b/integration_tests/models/unit_test_wrappers/unit_compare_classify.yml
index 4dbf6c59..2e8a24ff 100644
--- a/integration_tests/models/unit_test_wrappers/unit_compare_classify.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_compare_classify.yml
@@ -34,7 +34,7 @@ unit_tests:
         compare_classify__event_time: 'created_at'
         compare_classify__primary_key_columns: ['id']
       macros: 
-        audit_helper.get_comparison_bounds:
+        audit_helper._get_comparison_bounds:
           "min_event_time": "2024-01-02"
           "max_event_time": "2024-01-03"
           "event_time": 'created_at'
diff --git a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
index 0d953506..ddfddc2d 100644
--- a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
@@ -30,7 +30,7 @@ unit_tests:
         quick_are_queries_identical_cols: ['id', 'col1', 'col2', 'created_at']
         quick_are_queries_identical_event_time: 'created_at'
       macros: 
-        audit_helper.get_comparison_bounds:
+        audit_helper._get_comparison_bounds:
           "min_event_time": "2024-01-02"
           "max_event_time": "2024-01-03"
           "event_time": 'created_at'
diff --git a/macros/compare_and_classify_query_results.sql b/macros/compare_and_classify_query_results.sql
index 0025bf21..c81f8b58 100644
--- a/macros/compare_and_classify_query_results.sql
+++ b/macros/compare_and_classify_query_results.sql
@@ -3,7 +3,7 @@
     {% set joined_cols = columns | join(", ") %}
 
     {% if event_time %}
-        {% set event_time_props = audit_helper.get_comparison_bounds(a_query, b_query, event_time) %}
+        {% set event_time_props = audit_helper._get_comparison_bounds(a_query, b_query, event_time) %}
     {% endif %}
 
     with 
diff --git a/macros/quick_are_queries_identical.sql b/macros/quick_are_queries_identical.sql
index 0a049ad4..af00c8cd 100644
--- a/macros/quick_are_queries_identical.sql
+++ b/macros/quick_are_queries_identical.sql
@@ -22,7 +22,7 @@ but it's a good way to quickly verify identical results if that's what you're ex
 {% macro bigquery__quick_are_queries_identical(query_a, query_b, columns, event_time) %}
     {% set joined_cols = columns | join(", ") %}
     {% if event_time %}
-        {% set event_time_props = audit_helper.get_comparison_bounds(a_query, b_query, event_time) %}
+        {% set event_time_props = audit_helper._get_comparison_bounds(a_query, b_query, event_time) %}
     {% endif %}
 
     with query_a as (
@@ -57,7 +57,7 @@ but it's a good way to quickly verify identical results if that's what you're ex
 {% macro snowflake__quick_are_queries_identical(query_a, query_b, columns, event_time) %}
     {% set joined_cols = columns | join(", ") %}
     {% if event_time %}
-        {% set event_time_props = audit_helper.get_comparison_bounds(a_query, b_query, event_time) %}
+        {% set event_time_props = audit_helper._get_comparison_bounds(a_query, b_query, event_time) %}
     {% endif %}
 
     select count(distinct hash_result) = 1 as are_tables_identical
diff --git a/macros/utils/generate_null_safe_sk.sql b/macros/utils/_generate_null_safe_sk.sql
similarity index 70%
rename from macros/utils/generate_null_safe_sk.sql
rename to macros/utils/_generate_null_safe_sk.sql
index 4078c334..26ed6450 100644
--- a/macros/utils/generate_null_safe_sk.sql
+++ b/macros/utils/_generate_null_safe_sk.sql
@@ -1,10 +1,10 @@
 {# Taken from https://github.com/dbt-labs/dbt-utils/blob/main/macros/sql/generate_surrogate_key.sql but without the option to treat nulls as empty strings #}
 
-{%- macro generate_null_safe_surrogate_key(field_list) -%}
-    {{ return(adapter.dispatch('generate_null_safe_surrogate_key', 'audit_helper')(field_list)) }}
+{%- macro _generate_null_safe_surrogate_key(field_list) -%}
+    {{ return(adapter.dispatch('_generate_null_safe_surrogate_key', 'audit_helper')(field_list)) }}
 {% endmacro %}
 
-{%- macro default__generate_null_safe_surrogate_key(field_list) -%}
+{%- macro default___generate_null_safe_surrogate_key(field_list) -%}
 
 {%- set fields = [] -%}
 
diff --git a/macros/utils/_generate_set_results.sql b/macros/utils/_generate_set_results.sql
index a5400714..0bc409f0 100644
--- a/macros/utils/_generate_set_results.sql
+++ b/macros/utils/_generate_set_results.sql
@@ -14,7 +14,7 @@
     a_base as (
         select 
             {{ joined_cols }}, 
-            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
+            {{ audit_helper._generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
         from ( {{-  a_query  -}} ) a_base_subq
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
@@ -25,7 +25,7 @@
     b_base as (
         select 
             {{ joined_cols }}, 
-            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
+            {{ audit_helper._generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
         from ( {{-  b_query  -}} ) b_base_subq
         {% if event_time_props %}
             where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
@@ -76,6 +76,7 @@
 
 {% macro bigquery___generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
     {% set joined_cols = columns | join(", ") %}
+    {% set surrogate_key = audit_helper._generate_null_safe_surrogate_key(primary_key_columns) %}
     subset_columns_a as (
         select 
             {{ joined_cols }}, 
@@ -136,12 +137,69 @@
     )
 {% endmacro %}
 
+{% macro databricks___generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
+    {% set cast_columns = [] %}
+    {# Map types can't be compared by default (you need to opt in to a legacy behaviour flag) #}
+    {# so everything needs to be cast as a string first :( #}
+    {% for col in columns %}
+        {% do cast_columns.append(dbt.cast(col, api.Column.translate_type("string"))) %}
+    {% endfor %}
+    {% set joined_cols = cast_columns | join(", ") %}
+    {% set surrogate_key = audit_helper._generate_null_safe_surrogate_key(primary_key_columns) %}
+    a as (
+        select 
+            {{ joined_cols }}, 
+            {{ surrogate_key }} as dbt_audit_surrogate_key,
+            row_number() over (partition by {{ surrogate_key }} order by 1 ) as dbt_audit_pk_row_num,
+            xxhash64({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
+        from ( {{-  a_query  -}} )
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    b as (
+        select 
+            {{ joined_cols }}, 
+            {{ surrogate_key }} as dbt_audit_surrogate_key,
+            row_number() over (partition by {{ surrogate_key }} order by 1 ) as dbt_audit_pk_row_num,
+            xxhash64({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
+        from ( {{-  b_query  -}} )
+        {% if event_time_props %}
+            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+        {% endif %}
+    ),
+
+    a_intersect_b as (
+
+        select * from a
+        where a.dbt_audit_row_hash in (select b.dbt_audit_row_hash from b)
+
+    ),
+
+    a_except_b as (
+
+        select * from a
+        where a.dbt_audit_row_hash not in (select b.dbt_audit_row_hash from b)
+
+    ),
+
+    b_except_a as (
+
+        select * from b
+        where b.dbt_audit_row_hash not in (select a.dbt_audit_row_hash from a)
+
+    )   
+{% endmacro %}
+
 {% macro snowflake___generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
     {% set joined_cols = columns | join(", ") %}
     a as (
         select 
             {{ joined_cols }}, 
-            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
+            {{ audit_helper._generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
             row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num,
             hash({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
         from ( {{-  a_query  -}} )
@@ -154,7 +212,7 @@
     b as (
         select 
             {{ joined_cols }}, 
-            {{ audit_helper.generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
+            {{ audit_helper._generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
             row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num,
             hash({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
         from ( {{-  b_query  -}} )
diff --git a/macros/utils/get_comparison_bounds.sql b/macros/utils/_get_comparison_bounds.sql
similarity index 97%
rename from macros/utils/get_comparison_bounds.sql
rename to macros/utils/_get_comparison_bounds.sql
index 4f224f5f..75369071 100644
--- a/macros/utils/get_comparison_bounds.sql
+++ b/macros/utils/_get_comparison_bounds.sql
@@ -18,7 +18,7 @@ model_a  │       │┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼
             ┌──► └────────────────────────────┘ ◄────┐ 
            b min_value                      b max_value 
 */
-{% macro get_comparison_bounds(a_relation, b_relation, event_time) %}
+{% macro _get_comparison_bounds(a_relation, b_relation, event_time) %}
     {% set min_max_queries %}
         with min_maxes as (
             select min({{ event_time }}) as min_event_time, max({{ event_time }}) as max_event_time

From 593f2177c726b89508b03b9348eb708ac5e4163a Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Thu, 6 Jun 2024 15:17:06 +1200
Subject: [PATCH 11/13] Fix get comparison bounds (#104)

* change to getting comparison bounds for queries not relations

* add test for introspective queries
---
 .../data_tests/compare_and_classify_query_results.sql | 11 +++++++++++
 macros/utils/_get_comparison_bounds.sql               |  9 ++++++---
 2 files changed, 17 insertions(+), 3 deletions(-)
 create mode 100644 integration_tests/models/data_tests/compare_and_classify_query_results.sql

diff --git a/integration_tests/models/data_tests/compare_and_classify_query_results.sql b/integration_tests/models/data_tests/compare_and_classify_query_results.sql
new file mode 100644
index 00000000..747f146f
--- /dev/null
+++ b/integration_tests/models/data_tests/compare_and_classify_query_results.sql
@@ -0,0 +1,11 @@
+-- this has no tests, it's just making sure that the introspecive queries for event_time actually run
+
+{{
+    audit_helper.compare_and_classify_query_results(
+        a_query="select * from " ~ ref('unit_test_model_a') ~ " where 1=1",
+        b_query="select * from " ~ ref('unit_test_model_b') ~ " where 1=1",
+        primary_key_columns=['id'],
+        columns=['id', 'col1', 'col2'],
+        event_time='created_at'
+    )
+}}
\ No newline at end of file
diff --git a/macros/utils/_get_comparison_bounds.sql b/macros/utils/_get_comparison_bounds.sql
index 75369071..c644f062 100644
--- a/macros/utils/_get_comparison_bounds.sql
+++ b/macros/utils/_get_comparison_bounds.sql
@@ -18,14 +18,14 @@ model_a  │       │┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼
             ┌──► └────────────────────────────┘ ◄────┐ 
            b min_value                      b max_value 
 */
-{% macro _get_comparison_bounds(a_relation, b_relation, event_time) %}
+{% macro _get_comparison_bounds(a_query, b_query, event_time) %}
     {% set min_max_queries %}
         with min_maxes as (
             select min({{ event_time }}) as min_event_time, max({{ event_time }}) as max_event_time
-            from {{ a_relation }}
+            from ({{ a_query }}) a_subq
             union all 
             select min({{ event_time }}) as min_event_time, max({{ event_time }}) as max_event_time
-            from {{ b_relation }}
+            from ({{ b_query }}) b_subq
         )
         select max(min_event_time) as min_event_time, min(max_event_time) as max_event_time
         from min_maxes
@@ -34,6 +34,9 @@ model_a  │       │┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼
     {% set query_response = dbt_utils.get_query_results_as_dict(min_max_queries) %}
     
     {% set event_time_props = {"event_time": event_time} %}
+    
+    {# query_response.keys() are only `min_event_time` and `max_event_time`, but they have indeterminate capitalisation #}
+    {# hence the dynamic approach for what is otherwise just two well-known values #}
     {% for k in query_response.keys() %}
         {% do event_time_props.update({k | lower: query_response[k][0]}) %}
     {% endfor %}

From dee003bdd9e3dc1ea86e45aa963493ebef8c653d Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Thu, 13 Jun 2024 16:18:07 +1200
Subject: [PATCH 12/13] Make compare query columns multi pk (#105)

---
 integration_tests/dbt_project.yml             |   7 +-
 .../compare_which_columns_differ.sql          |   4 +-
 ...pare_which_columns_differ_exclude_cols.sql |  13 +-
 .../unit_compare_classify.sql                 |   6 +-
 .../unit_compare_classify.yml                 |  54 ++++----
 .../unit_compare_classify_struct.sql          |   6 +-
 .../unit_compare_classify_struct.yml          |  44 +++---
 ...nit_compare_which_query_columns_differ.sql |  17 +++
 ...nit_compare_which_query_columns_differ.yml | 124 +++++++++++++++++
 .../unit_ensure_all_pks_are_in_column_set.sql |  19 +++
 .../unit_ensure_all_pks_are_in_column_set.yml | 130 ++++++++++++++++++
 .../unit_quick_are_queries_identical.sql      |   2 +-
 .../unit_quick_are_queries_identical.yml      |   8 +-
 macros/compare_which_columns_differ.sql       |  46 -------
 macros/compare_which_query_columns_differ.sql |  64 +++++++++
 .../compare_which_relation_columns_differ.sql |  15 ++
 macros/quick_are_queries_identical.sql        |  20 +--
 macros/utils/_generate_set_results.sql        |  40 ++----
 macros/utils/_get_comparison_bounds.sql       |   7 +
 19 files changed, 462 insertions(+), 164 deletions(-)
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_compare_which_query_columns_differ.sql
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_compare_which_query_columns_differ.yml
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_ensure_all_pks_are_in_column_set.sql
 create mode 100644 integration_tests/models/unit_test_wrappers/unit_ensure_all_pks_are_in_column_set.yml
 delete mode 100644 macros/compare_which_columns_differ.sql
 create mode 100644 macros/compare_which_query_columns_differ.sql
 create mode 100644 macros/compare_which_relation_columns_differ.sql

diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml
index 873057e5..2f4cb84d 100644
--- a/integration_tests/dbt_project.yml
+++ b/integration_tests/dbt_project.yml
@@ -20,11 +20,10 @@ seeds:
 
 vars:
   compare_queries_summarize: true
-  compare_classify__primary_key_columns: ['col1']
-  compare_classify__columns: ['col1']
-  compare_classify__event_time:
+  primary_key_columns_var: ['col1']
+  columns_var: ['col1']
+  event_time_var:
   quick_are_queries_identical_cols: ['col1']
-  quick_are_queries_identical_event_time:
 
 flags:
   send_anonymous_usage_stats: False
diff --git a/integration_tests/models/data_tests/compare_which_columns_differ.sql b/integration_tests/models/data_tests/compare_which_columns_differ.sql
index a68523d3..ef158803 100644
--- a/integration_tests/models/data_tests/compare_which_columns_differ.sql
+++ b/integration_tests/models/data_tests/compare_which_columns_differ.sql
@@ -9,9 +9,9 @@ select
     has_difference
 from (
 
-    {{ audit_helper.compare_which_columns_differ(
+    {{ audit_helper.compare_which_relation_columns_differ(
         a_relation=a_relation,
         b_relation=b_relation,
-        primary_key="id"
+        primary_key_columns=["id"]
     ) }}
 ) as macro_output
diff --git a/integration_tests/models/data_tests/compare_which_columns_differ_exclude_cols.sql b/integration_tests/models/data_tests/compare_which_columns_differ_exclude_cols.sql
index 7630f549..8d2d5aa2 100644
--- a/integration_tests/models/data_tests/compare_which_columns_differ_exclude_cols.sql
+++ b/integration_tests/models/data_tests/compare_which_columns_differ_exclude_cols.sql
@@ -2,17 +2,24 @@
 
 {% set b_relation=ref('data_compare_which_columns_differ_b') %}
 
+{% set pk_cols = ['id'] %}
+{% set cols = ['id','value_changes','becomes_not_null','does_not_change'] %}
+
+{% if target.type == 'snowflake' %}
+    {% set pk_cols = pk_cols | map("upper") | list %}
+    {% set cols = cols | map("upper") | list %}
+{% endif %}
 
 select 
     lower(column_name) as column_name,
     has_difference
 from (
 
-    {{ audit_helper.compare_which_columns_differ(
+    {{ audit_helper.compare_which_relation_columns_differ(
         a_relation=a_relation,
         b_relation=b_relation,
-        primary_key="id",
-        exclude_columns=["becomes_null"]
+        primary_key_columns=pk_cols,
+        columns=cols
     ) }}
 
 ) as macro_output
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_compare_classify.sql b/integration_tests/models/unit_test_wrappers/unit_compare_classify.sql
index fbef7dcb..e2c707a8 100644
--- a/integration_tests/models/unit_test_wrappers/unit_compare_classify.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_compare_classify.sql
@@ -2,8 +2,8 @@
     audit_helper.compare_and_classify_query_results(
         "select * from " ~ ref('unit_test_model_a') ~ " where 1=1",
         "select * from " ~ ref('unit_test_model_b') ~ " where 1=1",
-        primary_key_columns=var('compare_classify__primary_key_columns'),
-        columns=var('compare_classify__columns'),
-        event_time=var('compare_classify__event_time')
+        primary_key_columns=var('primary_key_columns_var'),
+        columns=var('columns_var'),
+        event_time=var('event_time_var')
     ) 
 }}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_compare_classify.yml b/integration_tests/models/unit_test_wrappers/unit_compare_classify.yml
index f808f7e8..759526fe 100644
--- a/integration_tests/models/unit_test_wrappers/unit_compare_classify.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_compare_classify.yml
@@ -22,17 +22,17 @@ unit_tests:
 
     overrides:
       vars:
-        compare_classify__columns: ['id', 'col1', 'col2']
-        compare_classify__event_time:
-        compare_classify__primary_key_columns: ['id']
+        columns_var: ['id', 'col1', 'col2']
+        event_time_var:
+        primary_key_columns_var: ['id']
 
   - name: compare_classify_identical_tables_event_time_filter
     model: unit_compare_classify
     overrides:
       vars:
-        compare_classify__columns: ['id', 'col1', 'col2', 'created_at']
-        compare_classify__event_time: 'created_at'
-        compare_classify__primary_key_columns: ['id']
+        columns_var: ['id', 'col1', 'col2', 'created_at']
+        event_time_var: 'created_at'
+        primary_key_columns_var: ['id']
       macros: 
         audit_helper._get_comparison_bounds:
           "min_event_time": "2024-01-02"
@@ -59,9 +59,9 @@ unit_tests:
     model: unit_compare_classify
     overrides:
       vars:
-        compare_classify__columns: ['id', 'col1', 'col2']
-        compare_classify__event_time:
-        compare_classify__primary_key_columns: ['id']
+        columns_var: ['id', 'col1', 'col2']
+        event_time_var:
+        primary_key_columns_var: ['id']
     given:
       - input: ref('unit_test_model_a')
         rows:
@@ -88,9 +88,9 @@ unit_tests:
     model: unit_compare_classify
     overrides:
       vars:
-        compare_classify__columns: ['id', 'id_2', 'col1', 'col2']
-        compare_classify__event_time:
-        compare_classify__primary_key_columns: ['id', 'id_2']
+        columns_var: ['id', 'id_2', 'col1', 'col2']
+        event_time_var:
+        primary_key_columns_var: ['id', 'id_2']
     given:
       - input: ref('unit_test_model_a')
         rows:
@@ -132,9 +132,9 @@ unit_tests:
 
     overrides:
       vars:
-        compare_classify__columns: ['id', 'col1', 'col2']
-        compare_classify__event_time:
-        compare_classify__primary_key_columns: ['id']
+        columns_var: ['id', 'col1', 'col2']
+        event_time_var:
+        primary_key_columns_var: ['id']
     config:
       tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
@@ -161,9 +161,9 @@ unit_tests:
 
     overrides:
       vars:
-        compare_classify__columns: ['id', 'col1', 'col2']
-        compare_classify__event_time:
-        compare_classify__primary_key_columns: ['id']
+        columns_var: ['id', 'col1', 'col2']
+        event_time_var:
+        primary_key_columns_var: ['id']
     config:
       tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
@@ -193,9 +193,9 @@ unit_tests:
 
     overrides:
       vars:
-        compare_classify__columns: ['id', 'col1', 'col2']
-        compare_classify__event_time:
-        compare_classify__primary_key_columns: ['id']
+        columns_var: ['id', 'col1', 'col2']
+        event_time_var:
+        primary_key_columns_var: ['id']
     config:
       tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
 
@@ -203,9 +203,9 @@ unit_tests:
     model: unit_compare_classify
     overrides:
       vars:
-        compare_classify__primary_key_columns: ['id']
-        compare_classify__columns: ['id', 'col1']
-        compare_classify__event_time:
+        primary_key_columns_var: ['id']
+        columns_var: ['id', 'col1']
+        event_time_var:
     given:
       - input: ref('unit_test_model_a')
         rows:
@@ -251,6 +251,6 @@ unit_tests:
 
     overrides:
       vars:
-        compare_classify__columns: ['col1', 'col2']
-        compare_classify__event_time:
-        compare_classify__primary_key_columns: ['id']
+        columns_var: ['col1', 'col2']
+        event_time_var:
+        primary_key_columns_var: ['id']
diff --git a/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.sql b/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.sql
index 82b04ea4..4184f3dc 100644
--- a/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.sql
@@ -2,8 +2,8 @@
     audit_helper.compare_and_classify_query_results(
         "select * from " ~ ref('unit_test_struct_model_a') ~ " where 1=1",
         "select * from " ~ ref('unit_test_struct_model_b') ~ " where 1=1",
-        primary_key_columns=var('compare_classify__primary_key_columns'),
-        columns=var('compare_classify__columns'),
-        event_time=var('compare_classify__event_time')
+        primary_key_columns=var('primary_key_columns_var'),
+        columns=var('columns_var'),
+        event_time=var('event_time_var')
     ) 
 }}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.yml b/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.yml
index 98f634d1..ab86ed44 100644
--- a/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.yml
@@ -13,11 +13,9 @@ unit_tests:
         - {"id": 1, "dbt_audit_row_status": "identical", "dbt_audit_num_rows_in_status": 1}
     overrides:
       vars:
-        compare_classify__columns: ['id', 'col1', 'col2']
-        compare_classify__event_time:
-        compare_classify__primary_key_columns: ['id']
-    # config:
-    #   tags: "{{ 'skip' if (target.type in ['databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
+        columns_var: ['id', 'col1', 'col2']
+        event_time_var:
+        primary_key_columns_var: ['id']
 
   - name: unit_compare_classify_struct_identical_values_different_order
     model: unit_compare_classify_struct
@@ -34,9 +32,9 @@ unit_tests:
         - {"id": 1, "dbt_audit_row_status": "identical", "dbt_audit_num_rows_in_status": 1}
     overrides:
       vars:
-        compare_classify__columns: ['id', 'col1', 'col2']
-        compare_classify__event_time:
-        compare_classify__primary_key_columns: ['id']
+        columns_var: ['id', 'col1', 'col2']
+        event_time_var:
+        primary_key_columns_var: ['id']
     config:
       #Databricks cares about the order and considers it a difference. We're not trying to have identical behaviour across warehouses so that's OK.
       tags: "{{ 'skip' if (target.type in ['databricks']) else 'runnable' }}"
@@ -57,9 +55,9 @@ unit_tests:
         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
     overrides:
       vars:
-        compare_classify__columns: ['id', 'col1', 'col2']
-        compare_classify__event_time:
-        compare_classify__primary_key_columns: ['id']
+        columns_var: ['id', 'col1', 'col2']
+        event_time_var:
+        primary_key_columns_var: ['id']
     config:
       #Only for databricks
       tags: "{{ 'skip' if (target.type not in ['databricks']) else 'runnable' }}"
@@ -79,9 +77,9 @@ unit_tests:
         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
     overrides:
       vars:
-        compare_classify__columns: ['id', 'col1', 'col2']
-        compare_classify__event_time:
-        compare_classify__primary_key_columns: ['id']
+        columns_var: ['id', 'col1', 'col2']
+        event_time_var:
+        primary_key_columns_var: ['id']
     # config:
     #   tags: "{{ 'skip' if (target.type in ['databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
 
@@ -99,9 +97,9 @@ unit_tests:
         - {"id": 1, "dbt_audit_row_status": "identical", "dbt_audit_num_rows_in_status": 1}
     overrides:
       vars:
-        compare_classify__columns: ['id', 'col1', 'col2']
-        compare_classify__event_time:
-        compare_classify__primary_key_columns: ['id']
+        columns_var: ['id', 'col1', 'col2']
+        event_time_var:
+        primary_key_columns_var: ['id']
     # config:
     #   tags: "{{ 'skip' if (target.type in ['redshift', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
 
@@ -121,9 +119,9 @@ unit_tests:
         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
     overrides:
       vars:
-        compare_classify__columns: ['id', 'col1', 'col2']
-        compare_classify__event_time:
-        compare_classify__primary_key_columns: ['id']
+        columns_var: ['id', 'col1', 'col2']
+        event_time_var:
+        primary_key_columns_var: ['id']
     # config:
     #   tags: "{{ 'skip' if (target.type in ['redshift', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
 
@@ -143,8 +141,8 @@ unit_tests:
         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
     overrides:
       vars:
-        compare_classify__columns: ['id', 'col1', 'col2']
-        compare_classify__event_time:
-        compare_classify__primary_key_columns: ['id']
+        columns_var: ['id', 'col1', 'col2']
+        event_time_var:
+        primary_key_columns_var: ['id']
     # config:
     #   tags: "{{ 'skip' if (target.type in ['redshift', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
diff --git a/integration_tests/models/unit_test_wrappers/unit_compare_which_query_columns_differ.sql b/integration_tests/models/unit_test_wrappers/unit_compare_which_query_columns_differ.sql
new file mode 100644
index 00000000..d2c12fde
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_compare_which_query_columns_differ.sql
@@ -0,0 +1,17 @@
+{% set pk_cols = var('primary_key_columns_var') %}
+{% set cols = var('columns_var') %}
+
+{% if target.type == 'snowflake' and flags.WHICH == 'run' %}
+    {% set pk_cols = pk_cols | map("upper") | list %}
+    {% set cols = cols | map("upper") | list %}
+{% endif %}
+
+{{ 
+    audit_helper.compare_which_query_columns_differ(
+        a_query = "select * from " ~ ref('unit_test_model_a') ~ " where 1=1",
+        b_query = "select * from " ~ ref('unit_test_model_b') ~ " where 1=1",
+        primary_key_columns = pk_cols, 
+        columns = cols,
+        event_time = var('event_time_var')
+    )
+}}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_compare_which_query_columns_differ.yml b/integration_tests/models/unit_test_wrappers/unit_compare_which_query_columns_differ.yml
new file mode 100644
index 00000000..49007dd1
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_compare_which_query_columns_differ.yml
@@ -0,0 +1,124 @@
+unit_tests:
+  - name: compare_cols_identical_tables
+    model: unit_compare_which_query_columns_differ
+    
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"column_name": 'id', 'has_difference': false}
+        - {"column_name": 'col1', 'has_difference': false}
+        - {"column_name": 'col2', 'has_difference': false}
+
+    overrides:
+      vars:
+        columns_var: ['id', 'col1', 'col2']
+        event_time_var:
+        primary_key_columns_var: ['id']
+    config:
+      tags: "{{ 'skip' if (target.type in ['snowflake']) else 'runnable' }}" #Case sensitivity 
+
+  - name: compare_cols_identical_tables_event_time_filter
+    model: unit_compare_which_query_columns_differ
+    overrides:
+      vars:
+        columns_var: ['id', 'col1', 'col2', 'created_at']
+        event_time_var: 'created_at'
+        primary_key_columns_var: ['id']
+      macros: 
+        audit_helper._get_comparison_bounds:
+          "min_event_time": "2024-01-02"
+          "max_event_time": "2024-01-03"
+          "event_time": 'created_at'
+      
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def", "created_at": '2024-01-01' }
+          - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
+          - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
+          - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
+        
+    expect:
+      rows:
+        - {"column_name": 'id', "has_difference": false}
+        - {"column_name": 'col1', "has_difference": false}
+        - {"column_name": 'col2', "has_difference": false}
+        - {"column_name": 'created_at', "has_difference": false}
+    config:
+      tags: "{{ 'skip' if (target.type in ['snowflake']) else 'runnable' }}" #Case sensitivity 
+
+  - name: compare_cols_identical_tables_snowflake
+    model: unit_compare_which_query_columns_differ
+    
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def" }
+          - { "id": 2, "col1": "hij", "col2": "klm" }
+          - { "id": 3, "col1": "nop", "col2": "qrs" }
+        
+    expect:
+      rows:
+        - {"column_name": 'ID', 'has_difference': false}
+        - {"column_name": 'COL1', 'has_difference': false}
+        - {"column_name": 'COL2', 'has_difference': false}
+
+    overrides:
+      vars:
+        columns_var: ['ID', 'COL1', 'COL2']
+        event_time_var:
+        primary_key_columns_var: ['ID']
+    config:
+      tags: "{{ 'skip' if (target.type not in ['snowflake']) else 'runnable' }}" #Case sensitivity 
+
+  - name: compare_cols_identical_tables_event_time_filter_snowflake
+    model: unit_compare_which_query_columns_differ
+    overrides:
+      vars:
+        columns_var: ['ID', 'COL1', 'COL2', 'CREATED_AT']
+        event_time_var: 'CREATED_AT'
+        primary_key_columns_var: ['ID']
+      macros: 
+        audit_helper._get_comparison_bounds:
+          "min_event_time": "2024-01-02"
+          "max_event_time": "2024-01-03"
+          "event_time": 'created_at'
+      
+    given:
+      - input: ref('unit_test_model_a')
+        rows:
+          - { "id": 1, "col1": "abc", "col2": "def", "created_at": '2024-01-01' }
+          - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
+          - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
+      - input: ref('unit_test_model_b')
+        rows:
+          - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
+          - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
+        
+    expect:
+      rows:
+        - {"column_name": 'ID', "has_difference": false}
+        - {"column_name": 'COL1', "has_difference": false}
+        - {"column_name": 'COL2', "has_difference": false}
+        - {"column_name": 'CREATED_AT', "has_difference": false}
+    config:
+      tags: "{{ 'skip' if (target.type not in ['snowflake']) else 'runnable' }}" #Case sensitivity 
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_ensure_all_pks_are_in_column_set.sql b/integration_tests/models/unit_test_wrappers/unit_ensure_all_pks_are_in_column_set.sql
new file mode 100644
index 00000000..a76f30cd
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_ensure_all_pks_are_in_column_set.sql
@@ -0,0 +1,19 @@
+{% set results = 
+    audit_helper._ensure_all_pks_are_in_column_set(
+        primary_key_columns=var('primary_key_columns_var', ['a_column_with_a_large_unwieldy_name']),
+        columns=var('columns_var', ['b_column_with_a_large_unwieldy_name']),
+    )
+%}
+
+{% if (var('primary_key_columns_var') | length == 0) and (var('columns_var') | length == 0) %}
+-- need to still provide a table shape
+select 'abcdefabcdef' as col, 1 as row_index
+limit 0
+{% endif %}
+
+{% for result in results %}
+    select '{{ result }}' as col, {{ loop.index }} as row_index
+    {% if not loop.last %}
+    union all 
+    {% endif %}
+{% endfor %}
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_ensure_all_pks_are_in_column_set.yml b/integration_tests/models/unit_test_wrappers/unit_ensure_all_pks_are_in_column_set.yml
new file mode 100644
index 00000000..35767c9d
--- /dev/null
+++ b/integration_tests/models/unit_test_wrappers/unit_ensure_all_pks_are_in_column_set.yml
@@ -0,0 +1,130 @@
+unit_tests:
+  - name: ensure_all_pks_in_columns
+    model: unit_ensure_all_pks_are_in_column_set
+    given: []
+    overrides:
+      vars:
+        primary_key_columns_var: ['pk1', 'pk2']
+        columns_var: ['pk1', 'pk2', 'column_a', 'column_b']
+        
+    expect:
+      rows:
+        - {"col": 'pk1', "row_index": 1}
+        - {"col": 'pk2', "row_index": 2}
+        - {"col": 'column_a', "row_index": 3}
+        - {"col": 'column_b', "row_index": 4}
+  
+  - name: ensure_all_pks_in_columns_pks_at_end
+    model: unit_ensure_all_pks_are_in_column_set
+    description: PKs are specified in `columns` so should be at end of list
+    given: []
+    overrides:
+      vars:
+        primary_key_columns_var: ['pk1', 'pk2']
+        columns_var: ['column_a', 'column_b', 'pk1', 'pk2']
+        
+    expect:
+      rows:
+        - {"col": 'column_a', "row_index": 1}
+        - {"col": 'column_b', "row_index": 2}
+        - {"col": 'pk1', "row_index": 3}
+        - {"col": 'pk2', "row_index": 4}
+      
+  - name: ensure_all_pks_in_columns_one_missing_pk
+    model: unit_ensure_all_pks_are_in_column_set
+    description: PK specified in `columns` should be at end of list, missing PK will be added at front
+    given: []
+    overrides:
+      vars:
+        primary_key_columns_var: ['pk1', 'pk2']
+        columns_var: ['column_a', 'column_b', 'pk2']
+        
+    expect:
+      rows:
+        - {"col": 'pk1', "row_index": 1}
+        - {"col": 'column_a', "row_index": 2}
+        - {"col": 'column_b', "row_index": 3}
+        - {"col": 'pk2', "row_index": 4}
+      
+  - name: ensure_all_pks_in_columns_empty_sets
+    model: unit_ensure_all_pks_are_in_column_set
+    given: []
+    overrides:
+      vars:
+        primary_key_columns_var: []
+        columns_var: []
+        
+    expect:
+      rows: []
+
+  - name: ensure_all_pks_in_columns_no_pks
+    model: unit_ensure_all_pks_are_in_column_set
+    given: []
+    overrides:
+      vars:
+        primary_key_columns_var: []
+        columns_var: ['column_a', 'column_b']
+        
+    expect:
+      rows:
+        - {"col": 'column_a', "row_index": 1}
+        - {"col": 'column_b', "row_index": 2}
+
+  - name: ensure_all_pks_in_columns_no_cols
+    model: unit_ensure_all_pks_are_in_column_set
+    given: []
+    overrides:
+      vars:
+        primary_key_columns_var: ['pk1', 'pk2']
+        columns_var: []
+        
+    expect:
+      rows:
+        - {"col": 'pk1', "row_index": 1}
+        - {"col": 'pk2', "row_index": 2}
+
+  - name: ensure_all_pks_in_columns_caps_pk
+    model: unit_ensure_all_pks_are_in_column_set
+    given: []
+    overrides:
+      vars:
+        primary_key_columns_var: ['pk2', 'PK1']
+        columns_var: ['pk1', 'pk2', 'column_a', 'column_b']
+        
+    expect:
+      rows:
+        - {"col": 'pk1', "row_index": 1}
+        - {"col": 'pk2', "row_index": 2}
+        - {"col": 'column_a', "row_index": 3}
+        - {"col": 'column_b', "row_index": 4}
+  
+  - name: ensure_all_pks_in_columns_caps_col
+    model: unit_ensure_all_pks_are_in_column_set
+    given: []
+    overrides:
+      vars:
+        primary_key_columns_var: ['pk2', 'pk1']
+        columns_var: ['pk1', 'pk2', 'COLUMN_A', 'column_b']
+        
+    expect:
+      rows:
+        - {"col": 'pk1', "row_index": 1}
+        - {"col": 'pk2', "row_index": 2}
+        - {"col": 'COLUMN_A', "row_index": 3}
+        - {"col": 'column_b', "row_index": 4}
+  
+  - name: ensure_all_pks_in_columns_caps_pk_in_both
+    model: unit_ensure_all_pks_are_in_column_set
+    given: []
+    overrides:
+      vars:
+        primary_key_columns_var: ['pk2', 'PK1']
+        columns_var: ['PK1', 'pk2', 'column_a', 'column_b']
+        
+    expect:
+      rows:
+        - {"col": 'PK1', "row_index": 1}
+        - {"col": 'pk2', "row_index": 2}
+        - {"col": 'column_a', "row_index": 3}
+        - {"col": 'column_b', "row_index": 4}
+  
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
index 5eea8d81..92661c51 100644
--- a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
+++ b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql
@@ -5,6 +5,6 @@
         "select * from " ~ ref('unit_test_model_a') ~ " where 1=1",
         "select * from " ~ ref('unit_test_model_b') ~ " where 1=1",
         columns=var('quick_are_queries_identical_cols'),
-        event_time=var('quick_are_queries_identical_event_time')
+        event_time=var('event_time_var')
     ) 
 }}  
\ No newline at end of file
diff --git a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
index ddfddc2d..66f56a27 100644
--- a/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
+++ b/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml
@@ -21,14 +21,14 @@ unit_tests:
     overrides:
       vars:
         quick_are_queries_identical_cols: ['id', 'col1', 'col2']
-        quick_are_queries_identical_event_time:
+        event_time_var:
 
   - name: quick_are_queries_identical_identical_tables_event_time_filter
     model: unit_quick_are_queries_identical
     overrides:
       vars:
         quick_are_queries_identical_cols: ['id', 'col1', 'col2', 'created_at']
-        quick_are_queries_identical_event_time: 'created_at'
+        event_time_var: 'created_at'
       macros: 
         audit_helper._get_comparison_bounds:
           "min_event_time": "2024-01-02"
@@ -55,7 +55,7 @@ unit_tests:
     overrides:
       vars:
         quick_are_queries_identical_cols: ['id', 'col1', 'col2']
-        quick_are_queries_identical_event_time:
+        event_time_var:
     given:
       - input: ref('unit_test_model_a')
         rows:
@@ -94,4 +94,4 @@ unit_tests:
     overrides:
       vars:
         quick_are_queries_identical_cols: ['id', 'col1', 'col2']
-        quick_are_queries_identical_event_time:
+        event_time_var:
diff --git a/macros/compare_which_columns_differ.sql b/macros/compare_which_columns_differ.sql
deleted file mode 100644
index bc7c16cc..00000000
--- a/macros/compare_which_columns_differ.sql
+++ /dev/null
@@ -1,46 +0,0 @@
-{% macro compare_which_columns_differ(a_relation, b_relation, primary_key, exclude_columns=[]) %}
-    {{ return(adapter.dispatch('compare_which_columns_differ', 'audit_helper')(a_relation, b_relation, primary_key, exclude_columns)) }}
-{% endmacro %}
-
-{% macro default__compare_which_columns_differ(a_relation, b_relation, primary_key, exclude_columns=[]) %}  
-
-{% set column_names = dbt_utils.get_filtered_columns_in_relation(from=a_relation, except=exclude_columns) %}
-
-with bool_or as (
-
-    select 
-        true as anchor
-        {% for column in column_names %}
-            {% set column_name = adapter.quote(column) %}
-            {% set compare_statement %}
-                ((a.{{ column_name }} != b.{{ column_name }})
-                or (a.{{ column_name }} is null and b.{{ column_name }} is not null)
-                or (a.{{ column_name }} is not null and b.{{ column_name }} is null))
-            {% endset %}
-        
-        , {{ dbt.bool_or(compare_statement) }} as {{ column | lower }}_has_difference
-    
-        {% endfor %}
-    from {{ a_relation }} as a
-    inner join {{ b_relation }} as b
-        on a.{{ primary_key }} = b.{{ primary_key }}
-
-)
-
-{% for column in column_names %}
-    
-    select 
-        '{{ column }}' as column_name, 
-        {{ column | lower }}_has_difference as has_difference
-    
-    from bool_or
-
-    {% if not loop.last %}
-        
-    union all 
-
-    {% endif %}
-
-{% endfor %}
-
-{% endmacro %}
diff --git a/macros/compare_which_query_columns_differ.sql b/macros/compare_which_query_columns_differ.sql
new file mode 100644
index 00000000..139b8c17
--- /dev/null
+++ b/macros/compare_which_query_columns_differ.sql
@@ -0,0 +1,64 @@
+{% macro compare_which_query_columns_differ(a_query, b_query, primary_key_columns=[], columns=[], event_time=None) %}
+    {{ return(adapter.dispatch('compare_which_query_columns_differ', 'audit_helper')(a_query, b_query, primary_key_columns, columns, event_time)) }}
+{% endmacro %}
+
+{% macro default__compare_which_query_columns_differ(a_query, b_query, primary_key_columns, columns, event_time) %}
+    {% set columns = audit_helper._ensure_all_pks_are_in_column_set(primary_key_columns, columns) %}
+    {% if event_time %}
+        {% set event_time_props = audit_helper._get_comparison_bounds(event_time) %}
+    {% endif %}
+
+    {% set joined_cols = columns | join (", ") %}
+
+        with a as (
+            select 
+                {{ joined_cols }},
+                {{ audit_helper._generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
+            from ({{ a_query }}) as a_subq
+            {{ audit_helper.event_time_filter(event_time_props) }}
+        ),
+        b as (
+            select 
+                {{ joined_cols }},
+                {{ audit_helper._generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
+            from ({{ b_query }}) as b_subq
+            {{ audit_helper.event_time_filter(event_time_props) }}
+        ),
+
+        calculated as (
+            select 
+                {% for column in columns %}
+                    {% set quoted_column = adapter.quote(column) %}
+                    {% set compare_statement %}
+                        (
+                            (a.{{ quoted_column }} != b.{{ quoted_column }})
+                            or (a.{{ quoted_column }} is null and b.{{ quoted_column }} is not null)
+                            or (a.{{ quoted_column }} is not null and b.{{ quoted_column }} is null)
+                        )
+                    {% endset %}
+                
+                {{ dbt.bool_or(compare_statement) }} as {{ column | lower }}_has_difference
+
+                {%- if not loop.last %}, {% endif %}
+                {% endfor %}
+            from a
+            inner join b on a.dbt_audit_surrogate_key = b.dbt_audit_surrogate_key
+        )
+
+    {% for column in columns %}
+    
+    select 
+        '{{ column }}' as column_name, 
+        {{ column | lower }}_has_difference as has_difference
+    
+    from calculated
+
+    {% if not loop.last %}
+        
+    union all 
+
+    {% endif %}
+
+    {% endfor %}
+
+{% endmacro %}
diff --git a/macros/compare_which_relation_columns_differ.sql b/macros/compare_which_relation_columns_differ.sql
new file mode 100644
index 00000000..ac6efe12
--- /dev/null
+++ b/macros/compare_which_relation_columns_differ.sql
@@ -0,0 +1,15 @@
+{% macro compare_which_relation_columns_differ(a_relation, b_relation, primary_key_columns=[], columns=[], event_time=None) %}
+    {%- if not columns -%}
+        {%- set columns = audit_helper._get_intersecting_columns_from_relations(a_relation, b_relation) -%}
+    {%- endif -%}
+
+    {{ 
+        audit_helper.compare_which_query_columns_differ(
+            "select * from " ~ a_relation,
+            "select * from " ~ b_relation,
+            primary_key_columns,
+            columns,
+            event_time
+        )
+    }}
+{% endmacro %}
\ No newline at end of file
diff --git a/macros/quick_are_queries_identical.sql b/macros/quick_are_queries_identical.sql
index af00c8cd..a0800c3d 100644
--- a/macros/quick_are_queries_identical.sql
+++ b/macros/quick_are_queries_identical.sql
@@ -28,18 +28,12 @@ but it's a good way to quickly verify identical results if that's what you're ex
     with query_a as (
         select {{ joined_cols }}
         from ({{ query_a }})
-        {% if event_time_props %}
-            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
-        {% endif %}
+        {{ audit_helper.event_time_filter(event_time_props) }}
     ), 
     query_b as (
         select {{ joined_cols }}
         from ({{ query_b }})
-        {% if event_time_props %}
-            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
-        {% endif %}
+        {{ audit_helper.event_time_filter(event_time_props) }}
     )
 
     select count(distinct hash_result) = 1 as are_tables_identical
@@ -64,19 +58,13 @@ but it's a good way to quickly verify identical results if that's what you're ex
     from (
         select hash_agg({{ joined_cols }}) as hash_result
         from ({{ query_a }}) query_a_subq
-        {% if event_time_props %}
-            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
-        {% endif %}
+        {{ audit_helper.event_time_filter(event_time_props) }}
 
         union all
         
         select hash_agg({{ joined_cols }}) as hash_result
         from ({{ query_b }}) query_b_subq
-        {% if event_time_props %}
-            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
-        {% endif %}
+        {{ audit_helper.event_time_filter(event_time_props) }}
 
     ) as hashes
 {% endmacro %}
\ No newline at end of file
diff --git a/macros/utils/_generate_set_results.sql b/macros/utils/_generate_set_results.sql
index b4fcc473..890cb9d1 100644
--- a/macros/utils/_generate_set_results.sql
+++ b/macros/utils/_generate_set_results.sql
@@ -16,10 +16,7 @@
             {{ joined_cols }}, 
             {{ audit_helper._generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
         from ( {{-  a_query  -}} ) a_base_subq
-        {% if event_time_props %}
-            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
-        {% endif %}
+        {{ audit_helper.event_time_filter(event_time_props) }}
     ),
 
     b_base as (
@@ -27,10 +24,7 @@
             {{ joined_cols }}, 
             {{ audit_helper._generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
         from ( {{-  b_query  -}} ) b_base_subq
-        {% if event_time_props %}
-            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
-        {% endif %}
+        {{ audit_helper.event_time_filter(event_time_props) }}
     ),
 
     a as (
@@ -81,10 +75,7 @@
             {{ surrogate_key }} as dbt_audit_surrogate_key,
             row_number() over (partition by {{ surrogate_key }} order by 1 ) as dbt_audit_pk_row_num
         from ( {{-  a_query  -}} )
-        {% if event_time_props %}
-            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
-        {% endif %}
+        {{ audit_helper.event_time_filter(event_time_props) }}
     ),
 
     subset_columns_b as (
@@ -93,10 +84,7 @@
             {{ surrogate_key }} as dbt_audit_surrogate_key,
             row_number() over (partition by {{ surrogate_key }} order by 1 ) as dbt_audit_pk_row_num
         from ( {{-  b_query  -}} )
-        {% if event_time_props %}
-            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
-        {% endif %}
+        {{ audit_helper.event_time_filter(event_time_props) }}
     ),
 
     a as (
@@ -151,10 +139,7 @@
             row_number() over (partition by {{ surrogate_key }} order by 1 ) as dbt_audit_pk_row_num,
             xxhash64({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
         from ( {{-  a_query  -}} )
-        {% if event_time_props %}
-            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
-        {% endif %}
+        {{ audit_helper.event_time_filter(event_time_props) }}
     ),
 
     b as (
@@ -164,10 +149,7 @@
             row_number() over (partition by {{ surrogate_key }} order by 1 ) as dbt_audit_pk_row_num,
             xxhash64({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
         from ( {{-  b_query  -}} )
-        {% if event_time_props %}
-            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
-        {% endif %}
+        {{ audit_helper.event_time_filter(event_time_props) }}
     ),
 
     a_intersect_b as (
@@ -201,10 +183,7 @@
             row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num,
             hash({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
         from ( {{-  a_query  -}} )
-        {% if event_time_props %}
-            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
-        {% endif %}
+        {{ audit_helper.event_time_filter(event_time_props) }}
     ),
 
     b as (
@@ -214,10 +193,7 @@
             row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num,
             hash({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
         from ( {{-  b_query  -}} )
-        {% if event_time_props %}
-            where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
-            and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
-        {% endif %}
+        {{ audit_helper.event_time_filter(event_time_props) }}
     ),
 
     a_intersect_b as (
diff --git a/macros/utils/_get_comparison_bounds.sql b/macros/utils/_get_comparison_bounds.sql
index c644f062..8ac05731 100644
--- a/macros/utils/_get_comparison_bounds.sql
+++ b/macros/utils/_get_comparison_bounds.sql
@@ -42,4 +42,11 @@ model_a  │       │┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼
     {% endfor %}
     
     {% do return(event_time_props) %}
+{% endmacro %}
+
+{% macro event_time_filter(event_time_props) %}
+    {% if event_time_props %}
+        where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
+        and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
+    {% endif %}
 {% endmacro %}
\ No newline at end of file

From 2c3cc94441939ea0ea078960d70b121c8e7ce2c4 Mon Sep 17 00:00:00 2001
From: Joel Labes <joel.labes@dbtlabs.com>
Date: Fri, 14 Jun 2024 08:55:32 +1200
Subject: [PATCH 13/13] rm packagelock.yml

---
 package-lock.yml | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 package-lock.yml

diff --git a/package-lock.yml b/package-lock.yml
deleted file mode 100644
index 32c6ccc0..00000000
--- a/package-lock.yml
+++ /dev/null
@@ -1,4 +0,0 @@
-packages:
-  - package: dbt-labs/dbt_utils
-    version: 1.1.1
-sha1_hash: 106400343ad0c92a7417f5156d0d6c3893bb2429