From 2a329b78400490a0a61af220a67a1d08cc408519 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 18 May 2022 12:47:58 +0200 Subject: [PATCH 01/15] Take some educated guesses --- .circleci/config.yml | 8 +- dev-requirements.txt | 3 + macros/dbt_utils/cross_db_utils/dateadd.sql | 61 +--------- macros/dbt_utils/cross_db_utils/datediff.sql | 106 +----------------- pytest.ini | 8 ++ tests/conftest.py | 111 +++++++++++++++++++ tests/functional/test_utils.py | 37 +++++++ 7 files changed, 168 insertions(+), 166 deletions(-) create mode 100644 dev-requirements.txt create mode 100644 pytest.ini create mode 100644 tests/conftest.py create mode 100644 tests/functional/test_utils.py diff --git a/.circleci/config.yml b/.circleci/config.yml index 7f31af4..faf88a3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -30,10 +30,16 @@ jobs: python3.8 -m venv venv . venv/bin/activate pip install --upgrade pip setuptools - pip install --pre --upgrade dbt-spark[ODBC] + pip install -r dev-requirements.txt mkdir -p ~/.dbt cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml + - run: + name: "Run Functional Tests" + command: | + . venv/bin/activate + python3 pytest tests/functional --profile databricks_sql_endpoint + - run: name: "Run Tests - dbt-utils" diff --git a/dev-requirements.txt b/dev-requirements.txt new file mode 100644 index 0000000..ff10434 --- /dev/null +++ b/dev-requirements.txt @@ -0,0 +1,3 @@ +pytest +git+https://github.com/dbt-labs/dbt-spark.git@jerco/utils-lift-shift#egg=dbt-spark[ODBC] +git+https://github.com/dbt-labs/dbt-core.git@jerco/utils-lift-shift#egg=dbt-tests-adapter&subdirectory=tests/adapter \ No newline at end of file diff --git a/macros/dbt_utils/cross_db_utils/dateadd.sql b/macros/dbt_utils/cross_db_utils/dateadd.sql index e433bc7..b144a2d 100644 --- a/macros/dbt_utils/cross_db_utils/dateadd.sql +++ b/macros/dbt_utils/cross_db_utils/dateadd.sql @@ -1,62 +1,3 @@ {% macro spark__dateadd(datepart, interval, from_date_or_timestamp) %} - - {%- set clock_component -%} - {# make sure the dates + timestamps are real, otherwise raise an error asap #} - to_unix_timestamp({{ spark_utils.assert_not_null('to_timestamp', from_date_or_timestamp) }}) - - to_unix_timestamp({{ spark_utils.assert_not_null('date', from_date_or_timestamp) }}) - {%- endset -%} - - {%- if datepart in ['day', 'week'] -%} - - {%- set multiplier = 7 if datepart == 'week' else 1 -%} - - to_timestamp( - to_unix_timestamp( - date_add( - {{ spark_utils.assert_not_null('date', from_date_or_timestamp) }}, - cast({{interval}} * {{multiplier}} as int) - ) - ) + {{clock_component}} - ) - - {%- elif datepart in ['month', 'quarter', 'year'] -%} - - {%- set multiplier -%} - {%- if datepart == 'month' -%} 1 - {%- elif datepart == 'quarter' -%} 3 - {%- elif datepart == 'year' -%} 12 - {%- endif -%} - {%- endset -%} - - to_timestamp( - to_unix_timestamp( - add_months( - {{ spark_utils.assert_not_null('date', from_date_or_timestamp) }}, - cast({{interval}} * {{multiplier}} as int) - ) - ) + {{clock_component}} - ) - - {%- elif datepart in ('hour', 'minute', 'second', 'millisecond', 'microsecond') -%} - - {%- set multiplier -%} - {%- if datepart == 'hour' -%} 3600 - {%- elif datepart == 'minute' -%} 60 - {%- elif datepart == 'second' -%} 1 - {%- elif datepart == 'millisecond' -%} (1/1000000) - {%- elif datepart == 'microsecond' -%} (1/1000000) - {%- endif -%} - {%- endset -%} - - to_timestamp( - {{ spark_utils.assert_not_null('to_unix_timestamp', from_date_or_timestamp) }} - + cast({{interval}} * {{multiplier}} as int) - ) - - {%- else -%} - - {{ exceptions.raise_compiler_error("macro dateadd not implemented for datepart ~ '" ~ datepart ~ "' ~ on Spark") }} - - {%- endif -%} - + {{ return(adapter.dispatch('dateadd', 'dbt')(datepart, interval, from_date_or_timestamp)) }} {% endmacro %} diff --git a/macros/dbt_utils/cross_db_utils/datediff.sql b/macros/dbt_utils/cross_db_utils/datediff.sql index 0496cfa..190f29f 100644 --- a/macros/dbt_utils/cross_db_utils/datediff.sql +++ b/macros/dbt_utils/cross_db_utils/datediff.sql @@ -1,107 +1,3 @@ {% macro spark__datediff(first_date, second_date, datepart) %} - - {%- if datepart in ['day', 'week', 'month', 'quarter', 'year'] -%} - - {# make sure the dates are real, otherwise raise an error asap #} - {% set first_date = spark_utils.assert_not_null('date', first_date) %} - {% set second_date = spark_utils.assert_not_null('date', second_date) %} - - {%- endif -%} - - {%- if datepart == 'day' -%} - - datediff({{second_date}}, {{first_date}}) - - {%- elif datepart == 'week' -%} - - case when {{first_date}} < {{second_date}} - then floor(datediff({{second_date}}, {{first_date}})/7) - else ceil(datediff({{second_date}}, {{first_date}})/7) - end - - -- did we cross a week boundary (Sunday)? - + case - when {{first_date}} < {{second_date}} and dayofweek({{second_date}}) < dayofweek({{first_date}}) then 1 - when {{first_date}} > {{second_date}} and dayofweek({{second_date}}) > dayofweek({{first_date}}) then -1 - else 0 end - - {%- elif datepart == 'month' -%} - - case when {{first_date}} < {{second_date}} - then floor(months_between(date({{second_date}}), date({{first_date}}))) - else ceil(months_between(date({{second_date}}), date({{first_date}}))) - end - - -- did we cross a month boundary? - + case - when {{first_date}} < {{second_date}} and dayofmonth({{second_date}}) < dayofmonth({{first_date}}) then 1 - when {{first_date}} > {{second_date}} and dayofmonth({{second_date}}) > dayofmonth({{first_date}}) then -1 - else 0 end - - {%- elif datepart == 'quarter' -%} - - case when {{first_date}} < {{second_date}} - then floor(months_between(date({{second_date}}), date({{first_date}}))/3) - else ceil(months_between(date({{second_date}}), date({{first_date}}))/3) - end - - -- did we cross a quarter boundary? - + case - when {{first_date}} < {{second_date}} and ( - (dayofyear({{second_date}}) - (quarter({{second_date}}) * 365/4)) - < (dayofyear({{first_date}}) - (quarter({{first_date}}) * 365/4)) - ) then 1 - when {{first_date}} > {{second_date}} and ( - (dayofyear({{second_date}}) - (quarter({{second_date}}) * 365/4)) - > (dayofyear({{first_date}}) - (quarter({{first_date}}) * 365/4)) - ) then -1 - else 0 end - - {%- elif datepart == 'year' -%} - - year({{second_date}}) - year({{first_date}}) - - {%- elif datepart in ('hour', 'minute', 'second', 'millisecond', 'microsecond') -%} - - {%- set divisor -%} - {%- if datepart == 'hour' -%} 3600 - {%- elif datepart == 'minute' -%} 60 - {%- elif datepart == 'second' -%} 1 - {%- elif datepart == 'millisecond' -%} (1/1000) - {%- elif datepart == 'microsecond' -%} (1/1000000) - {%- endif -%} - {%- endset -%} - - case when {{first_date}} < {{second_date}} - then ceil(( - {# make sure the timestamps are real, otherwise raise an error asap #} - {{ spark_utils.assert_not_null('to_unix_timestamp', spark_utils.assert_not_null('to_timestamp', second_date)) }} - - {{ spark_utils.assert_not_null('to_unix_timestamp', spark_utils.assert_not_null('to_timestamp', first_date)) }} - ) / {{divisor}}) - else floor(( - {{ spark_utils.assert_not_null('to_unix_timestamp', spark_utils.assert_not_null('to_timestamp', second_date)) }} - - {{ spark_utils.assert_not_null('to_unix_timestamp', spark_utils.assert_not_null('to_timestamp', first_date)) }} - ) / {{divisor}}) - end - - {% if datepart == 'millisecond' %} - + cast(date_format({{second_date}}, 'SSS') as int) - - cast(date_format({{first_date}}, 'SSS') as int) - {% endif %} - - {% if datepart == 'microsecond' %} - {% set capture_str = '[0-9]{4}-[0-9]{2}-[0-9]{2}.[0-9]{2}:[0-9]{2}:[0-9]{2}.([0-9]{6})' %} - -- Spark doesn't really support microseconds, so this is a massive hack! - -- It will only work if the timestamp-string is of the format - -- 'yyyy-MM-dd-HH mm.ss.SSSSSS' - + cast(regexp_extract({{second_date}}, '{{capture_str}}', 1) as int) - - cast(regexp_extract({{first_date}}, '{{capture_str}}', 1) as int) - {% endif %} - - {%- else -%} - - {{ exceptions.raise_compiler_error("macro datediff not implemented for datepart ~ '" ~ datepart ~ "' ~ on Spark") }} - - {%- endif -%} - + {{ return(adapter.dispatch('dateadd', 'dbt')(datepart, interval, from_date_or_timestamp)) }} {% endmacro %} diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..c0ef765 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,8 @@ +[pytest] +filterwarnings = + ignore:.*'soft_unicode' has been renamed to 'soft_str'*:DeprecationWarning + ignore:unclosed file .*:ResourceWarning +env_files = + test.env +testpaths = + tests/functional diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..0c62471 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,111 @@ +import pytest +import os + +pytest_plugins = ["dbt.tests.fixtures.project"] + + +def pytest_addoption(parser): + parser.addoption("--profile", action="store", default="apache_spark", type=str) + + +# Using @pytest.mark.skip_profile('apache_spark') uses the 'skip_by_profile_type' +# autouse fixture below +def pytest_configure(config): + config.addinivalue_line( + "markers", + "skip_profile(profile): skip test for the given profile", + ) + + +@pytest.fixture(scope="session") +def dbt_profile_target(request): + profile_type = request.config.getoption("--profile") + if profile_type == "databricks_cluster": + target = databricks_cluster_target() + elif profile_type == "databricks_sql_endpoint": + target = databricks_sql_endpoint_target() + elif profile_type == "apache_spark": + target = apache_spark_target() + elif profile_type == "databricks_http_cluster": + target = databricks_http_cluster_target() + elif profile_type == "spark_session": + target = spark_session_target() + else: + raise ValueError(f"Invalid profile type '{profile_type}'") + return target + + +def apache_spark_target(): + return { + "type": "spark", + "host": "localhost", + "user": "dbt", + "method": "thrift", + "port": 10000, + "connect_retries": 3, + "connect_timeout": 5, + "retry_all": True, + } + + +def databricks_cluster_target(): + return { + "type": "spark", + "method": "odbc", + "host": os.getenv("DBT_DATABRICKS_HOST_NAME"), + "cluster": os.getenv("DBT_DATABRICKS_CLUSTER_NAME"), + "token": os.getenv("DBT_DATABRICKS_TOKEN"), + "driver": os.getenv("ODBC_DRIVER"), + "port": 443, + "connect_retries": 3, + "connect_timeout": 5, + "retry_all": True, + } + + +def databricks_sql_endpoint_target(): + return { + "type": "spark", + "method": "odbc", + "host": os.getenv("DBT_DATABRICKS_HOST_NAME"), + "endpoint": os.getenv("DBT_DATABRICKS_ENDPOINT"), + "token": os.getenv("DBT_DATABRICKS_TOKEN"), + "driver": os.getenv("ODBC_DRIVER"), + "port": 443, + "connect_retries": 3, + "connect_timeout": 5, + "retry_all": True, + } + + +def databricks_http_cluster_target(): + return { + "type": "spark", + "host": os.getenv('DBT_DATABRICKS_HOST_NAME'), + "cluster": os.getenv('DBT_DATABRICKS_CLUSTER_NAME'), + "token": os.getenv('DBT_DATABRICKS_TOKEN'), + "method": "http", + "port": 443, + # more retries + longer timout to handle unavailability while cluster is restarting + # return failures quickly in dev, retry all failures in CI (up to 5 min) + "connect_retries": 5, + "connect_timeout": 60, + "retry_all": bool(os.getenv('DBT_DATABRICKS_RETRY_ALL', False)), + } + + +def spark_session_target(): + return { + "type": "spark", + "host": "localhost", + "method": "session", + } + + +@pytest.fixture(autouse=True) +def skip_by_profile_type(request): + profile_type = request.config.getoption("--profile") + if request.node.get_closest_marker("skip_profile"): + for skip_profile_type in request.node.get_closest_marker("skip_profile").args: + if skip_profile_type == profile_type: + pytest.skip("skipped on '{profile_type}' profile") diff --git a/tests/functional/test_utils.py b/tests/functional/test_utils.py new file mode 100644 index 0000000..483347a --- /dev/null +++ b/tests/functional/test_utils.py @@ -0,0 +1,37 @@ +from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd +from dbt.tests.adapter.utils.test_datediff import BaseDateDiff + + +class BaseSparkUtilsBackCompat(BaseUtils): + # install this repo as a package + @pytest.fixture(scope="class") + def packages(self): + return {"packages": [{"local": os.getcwd()}]} + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "dispatch": [{ + "macro_namespace": "dbt_utils", + "search_order": ["spark_utils", "dbt_utils"] + }] + } + + # call the macros from the 'dbt_utils' namespace + # instead of the unspecified / global namespace + def macro_namespace(self): + return "dbt_utils" + + + # actual test sequence needs to run 'deps' first + def test_build_assert_equal(self, project): + run_dbt(['deps']) + super().test_build_assert_equal(project) + + +class TestDateAdd(BaseDateAdd): + pass + + +class TestDateDiff(BaseDateDiff): + pass From b41d8b65d9b637ea84c1d574af4c7051400189a7 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 18 May 2022 12:52:44 +0200 Subject: [PATCH 02/15] Install dbt-core first --- dev-requirements.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index ff10434..a303a73 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,3 +1,4 @@ pytest -git+https://github.com/dbt-labs/dbt-spark.git@jerco/utils-lift-shift#egg=dbt-spark[ODBC] -git+https://github.com/dbt-labs/dbt-core.git@jerco/utils-lift-shift#egg=dbt-tests-adapter&subdirectory=tests/adapter \ No newline at end of file +git+https://github.com/dbt-labs/dbt-core.git@jerco/utils-lift-shift#egg=dbt-core&subdirectory=core +git+https://github.com/dbt-labs/dbt-core.git@jerco/utils-lift-shift#egg=dbt-tests-adapter&subdirectory=tests/adapter +git+https://github.com/dbt-labs/dbt-spark.git@jerco/utils-lift-shift#egg=dbt-spark[ODBC] \ No newline at end of file From b63d6d467af93e4a20fec579efff376951792d7f Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 18 May 2022 12:56:16 +0200 Subject: [PATCH 03/15] Fixing it live --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index faf88a3..8c24a96 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -38,7 +38,7 @@ jobs: name: "Run Functional Tests" command: | . venv/bin/activate - python3 pytest tests/functional --profile databricks_sql_endpoint + python3 -m pytest tests/functional --profile databricks_sql_endpoint - run: name: "Run Tests - dbt-utils" From b1e1022a970c1079e3982b296fb11bce6335eee4 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 18 May 2022 12:58:54 +0200 Subject: [PATCH 04/15] Just keep swimmingg --- tests/functional/test_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/functional/test_utils.py b/tests/functional/test_utils.py index 483347a..b21fdae 100644 --- a/tests/functional/test_utils.py +++ b/tests/functional/test_utils.py @@ -1,3 +1,7 @@ +import os +import pytest +from dbt.tests.util import run_dbt +from dbt.tests.adapter.utils.base_utils import BaseUtils from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd from dbt.tests.adapter.utils.test_datediff import BaseDateDiff From d0f33a21b35dd7347aaaa65f63f8f86deb4182fc Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Thu, 16 Jun 2022 12:47:47 +0200 Subject: [PATCH 05/15] More macros --- macros/dbt_utils/cross_db_utils/concat.sql | 2 +- macros/dbt_utils/cross_db_utils/dateadd.sql | 2 +- macros/dbt_utils/cross_db_utils/datediff.sql | 2 +- .../dbt_utils/cross_db_utils/split_part.sql | 22 +------------------ tests/functional/test_utils.py | 14 ++++++++++-- 5 files changed, 16 insertions(+), 26 deletions(-) diff --git a/macros/dbt_utils/cross_db_utils/concat.sql b/macros/dbt_utils/cross_db_utils/concat.sql index 30f1a42..0a62eb9 100644 --- a/macros/dbt_utils/cross_db_utils/concat.sql +++ b/macros/dbt_utils/cross_db_utils/concat.sql @@ -1,3 +1,3 @@ {% macro spark__concat(fields) -%} - concat({{ fields|join(', ') }}) + {{ return(dbt.concat(fields)) }} {%- endmacro %} diff --git a/macros/dbt_utils/cross_db_utils/dateadd.sql b/macros/dbt_utils/cross_db_utils/dateadd.sql index b144a2d..17f3d07 100644 --- a/macros/dbt_utils/cross_db_utils/dateadd.sql +++ b/macros/dbt_utils/cross_db_utils/dateadd.sql @@ -1,3 +1,3 @@ {% macro spark__dateadd(datepart, interval, from_date_or_timestamp) %} - {{ return(adapter.dispatch('dateadd', 'dbt')(datepart, interval, from_date_or_timestamp)) }} + {{ return(dbt.dateadd(datepart, interval, from_date_or_timestamp)) }} {% endmacro %} diff --git a/macros/dbt_utils/cross_db_utils/datediff.sql b/macros/dbt_utils/cross_db_utils/datediff.sql index 190f29f..e35be61 100644 --- a/macros/dbt_utils/cross_db_utils/datediff.sql +++ b/macros/dbt_utils/cross_db_utils/datediff.sql @@ -1,3 +1,3 @@ {% macro spark__datediff(first_date, second_date, datepart) %} - {{ return(adapter.dispatch('dateadd', 'dbt')(datepart, interval, from_date_or_timestamp)) }} + {{ return(dbt.dateadd(datepart, interval, from_date_or_timestamp)) }} {% endmacro %} diff --git a/macros/dbt_utils/cross_db_utils/split_part.sql b/macros/dbt_utils/cross_db_utils/split_part.sql index b476e05..966fdb7 100644 --- a/macros/dbt_utils/cross_db_utils/split_part.sql +++ b/macros/dbt_utils/cross_db_utils/split_part.sql @@ -1,23 +1,3 @@ {% macro spark__split_part(string_text, delimiter_text, part_number) %} - - {% set delimiter_expr %} - - -- escape if starts with a special character - case when regexp_extract({{ delimiter_text }}, '([^A-Za-z0-9])(.*)', 1) != '_' - then concat('\\', {{ delimiter_text }}) - else {{ delimiter_text }} end - - {% endset %} - - {% set split_part_expr %} - - split( - {{ string_text }}, - {{ delimiter_expr }} - )[({{ part_number - 1 }})] - - {% endset %} - - {{ return(split_part_expr) }} - + {{ return(dbt.split_part(string_text, delimiter_text, part_number)) }} {% endmacro %} diff --git a/tests/functional/test_utils.py b/tests/functional/test_utils.py index b21fdae..b034deb 100644 --- a/tests/functional/test_utils.py +++ b/tests/functional/test_utils.py @@ -2,8 +2,10 @@ import pytest from dbt.tests.util import run_dbt from dbt.tests.adapter.utils.base_utils import BaseUtils +from dbt.tests.adapter.utils.test_concat import BaseConcat from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd from dbt.tests.adapter.utils.test_datediff import BaseDateDiff +from dbt.tests.adapter.utils.test_split_part import BaseSplitPart class BaseSparkUtilsBackCompat(BaseUtils): @@ -33,9 +35,17 @@ def test_build_assert_equal(self, project): super().test_build_assert_equal(project) -class TestDateAdd(BaseDateAdd): +class TestConcat(BaseSparkUtilsBackCompat, BaseConcat): pass -class TestDateDiff(BaseDateDiff): +class TestDateAdd(BaseSparkUtilsBackCompat, BaseDateAdd): + pass + + +class TestDateDiff(BaseSparkUtilsBackCompat, BaseDateDiff): + pass + + +class TestSplitPart(BaseSparkUtilsBackCompat, BaseSplitPart): pass From 506f1bb1fd5555412d535a5da1698aabac8dcbe3 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Thu, 16 Jun 2022 13:19:18 +0200 Subject: [PATCH 06/15] Update branch, submodule pointers --- dbt-utils | 2 +- dev-requirements.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dbt-utils b/dbt-utils index ac072a3..1360427 160000 --- a/dbt-utils +++ b/dbt-utils @@ -1 +1 @@ -Subproject commit ac072a3c4b78d43a1c013e7de8b8fa6e290b544e +Subproject commit 1360427fc5a6bf06c305836322f0b12433d15216 diff --git a/dev-requirements.txt b/dev-requirements.txt index a303a73..ffa42bc 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,4 +1,4 @@ pytest -git+https://github.com/dbt-labs/dbt-core.git@jerco/utils-lift-shift#egg=dbt-core&subdirectory=core -git+https://github.com/dbt-labs/dbt-core.git@jerco/utils-lift-shift#egg=dbt-tests-adapter&subdirectory=tests/adapter +git+https://github.com/dbt-labs/dbt-core.git@dbeatty/utils-lift-shift#egg=dbt-core&subdirectory=core +git+https://github.com/dbt-labs/dbt-core.git@dbeatty/utils-lift-shift#egg=dbt-tests-adapter&subdirectory=tests/adapter git+https://github.com/dbt-labs/dbt-spark.git@jerco/utils-lift-shift#egg=dbt-spark[ODBC] \ No newline at end of file From 08aa4c562d4927b575a27c65501482941c0795f8 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Thu, 16 Jun 2022 13:50:46 +0200 Subject: [PATCH 07/15] Fix packages, install dbt-utils --- tests/functional/test_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/functional/test_utils.py b/tests/functional/test_utils.py index b034deb..056a449 100644 --- a/tests/functional/test_utils.py +++ b/tests/functional/test_utils.py @@ -12,7 +12,11 @@ class BaseSparkUtilsBackCompat(BaseUtils): # install this repo as a package @pytest.fixture(scope="class") def packages(self): - return {"packages": [{"local": os.getcwd()}]} + return { + "packages": [ + {"local": os.getcwd()}, + {"local": f"{os.getcwd()}/dbt-utils"} + ]} @pytest.fixture(scope="class") def project_config_update(self): From c103b95979d02081e28179130b69b6537cf40a1d Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 6 Jul 2022 14:57:10 +0200 Subject: [PATCH 08/15] Try updating pointers --- dbt-utils | 2 +- dev-requirements.txt | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbt-utils b/dbt-utils index 1360427..dcd85fb 160000 --- a/dbt-utils +++ b/dbt-utils @@ -1 +1 @@ -Subproject commit 1360427fc5a6bf06c305836322f0b12433d15216 +Subproject commit dcd85fb97de13b7da3c47237b1c0a5d390cfd170 diff --git a/dev-requirements.txt b/dev-requirements.txt index ffa42bc..6484e43 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,4 +1,4 @@ pytest -git+https://github.com/dbt-labs/dbt-core.git@dbeatty/utils-lift-shift#egg=dbt-core&subdirectory=core -git+https://github.com/dbt-labs/dbt-core.git@dbeatty/utils-lift-shift#egg=dbt-tests-adapter&subdirectory=tests/adapter -git+https://github.com/dbt-labs/dbt-spark.git@jerco/utils-lift-shift#egg=dbt-spark[ODBC] \ No newline at end of file +git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core +git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter +git+https://github.com/dbt-labs/dbt-spark.git#egg=dbt-spark[ODBC] \ No newline at end of file From d5c7dbc01c92a8945dca170a3fee960d1bee3eba Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 22 Jul 2022 14:40:46 +0200 Subject: [PATCH 09/15] Is it ready --- .gitignore | 2 ++ macros/dbt_utils/cross_db_utils/datatypes.sql | 2 +- macros/dbt_utils/cross_db_utils/datediff.sql | 2 +- tests/functional/test_utils.py | 27 +++++++++++++++---- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index a0e4833..14e076f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ /**/dbt_packages/ /**/logs/ /**/env/ +/**/__pycache__/ +test.env diff --git a/macros/dbt_utils/cross_db_utils/datatypes.sql b/macros/dbt_utils/cross_db_utils/datatypes.sql index c935d02..3e4a0e7 100644 --- a/macros/dbt_utils/cross_db_utils/datatypes.sql +++ b/macros/dbt_utils/cross_db_utils/datatypes.sql @@ -1,5 +1,5 @@ {# numeric ------------------------------------------------ #} {% macro spark__type_numeric() %} - decimal(28, 6) + {{ return(dbt.type_numeric()) }} {% endmacro %} diff --git a/macros/dbt_utils/cross_db_utils/datediff.sql b/macros/dbt_utils/cross_db_utils/datediff.sql index e35be61..a356bb3 100644 --- a/macros/dbt_utils/cross_db_utils/datediff.sql +++ b/macros/dbt_utils/cross_db_utils/datediff.sql @@ -1,3 +1,3 @@ {% macro spark__datediff(first_date, second_date, datepart) %} - {{ return(dbt.dateadd(datepart, interval, from_date_or_timestamp)) }} + {{ return(dbt.datediff(first_date, second_date, datepart)) }} {% endmacro %} diff --git a/tests/functional/test_utils.py b/tests/functional/test_utils.py index 056a449..8387c3b 100644 --- a/tests/functional/test_utils.py +++ b/tests/functional/test_utils.py @@ -1,14 +1,18 @@ import os import pytest from dbt.tests.util import run_dbt + from dbt.tests.adapter.utils.base_utils import BaseUtils from dbt.tests.adapter.utils.test_concat import BaseConcat from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd from dbt.tests.adapter.utils.test_datediff import BaseDateDiff from dbt.tests.adapter.utils.test_split_part import BaseSplitPart +from dbt.tests.adapter.utils.data_types.base_data_type_macro import BaseDataTypeMacro +from dbt.tests.adapter.utils.data_types.test_type_numeric import BaseTypeNumeric + -class BaseSparkUtilsBackCompat(BaseUtils): +class BaseSparkUtilsBackCompat: # install this repo as a package @pytest.fixture(scope="class") def packages(self): @@ -33,23 +37,36 @@ def macro_namespace(self): return "dbt_utils" +class BaseSparkUtilsBackCompatUtil(BaseSparkUtilsBackCompat, BaseUtils): # actual test sequence needs to run 'deps' first def test_build_assert_equal(self, project): run_dbt(['deps']) super().test_build_assert_equal(project) -class TestConcat(BaseSparkUtilsBackCompat, BaseConcat): +class BaseSparkUtilsBackCompatDataType(BaseSparkUtilsBackCompat, BaseDataTypeMacro): + # actual test sequence needs to run 'deps' first + def test_check_types_assert_match(self, project): + run_dbt(['deps']) + super().test_check_types_assert_match(project) + + +class TestConcat(BaseSparkUtilsBackCompatUtil, BaseConcat): pass -class TestDateAdd(BaseSparkUtilsBackCompat, BaseDateAdd): +class TestDateAdd(BaseSparkUtilsBackCompatUtil, BaseDateAdd): pass -class TestDateDiff(BaseSparkUtilsBackCompat, BaseDateDiff): +class TestDateDiff(BaseSparkUtilsBackCompatUtil, BaseDateDiff): pass -class TestSplitPart(BaseSparkUtilsBackCompat, BaseSplitPart): +class TestSplitPart(BaseSparkUtilsBackCompatUtil, BaseSplitPart): pass + + +class TestTypeNumeric(BaseSparkUtilsBackCompatDataType, BaseTypeNumeric): + def numeric_fixture_type(self): + return "decimal(28,6)" From bcc00eda366c07c3ca6ffa5fba22f4fcbcd98742 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 22 Jul 2022 15:07:53 +0200 Subject: [PATCH 10/15] Try pinning pyodbc --- dev-requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/dev-requirements.txt b/dev-requirements.txt index 6484e43..866f0f3 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,4 +1,5 @@ pytest +pyodbc==4.0.32 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter git+https://github.com/dbt-labs/dbt-spark.git#egg=dbt-spark[ODBC] \ No newline at end of file From 3b7367ea4f7158607e8f2401774c35762ed759be Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 22 Jul 2022 15:11:10 +0200 Subject: [PATCH 11/15] Update dbt-utils submodule --- dbt-utils | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt-utils b/dbt-utils index dcd85fb..fbbc0fb 160000 --- a/dbt-utils +++ b/dbt-utils @@ -1 +1 @@ -Subproject commit dcd85fb97de13b7da3c47237b1c0a5d390cfd170 +Subproject commit fbbc0fb82c9e7298cfe7fb305aa316e533977112 From c1e8d3d2486acf5cc0dbe069591c5160febfce99 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 22 Jul 2022 16:03:03 +0200 Subject: [PATCH 12/15] Slightly better --- integration_tests/dbt_utils/dbt_project.yml | 2 ++ macros/dbt_utils/cross_db_utils/concat.sql | 3 --- macros/dbt_utils/cross_db_utils/dateadd.sql | 3 --- macros/dbt_utils/cross_db_utils/datediff.sql | 3 --- .../cross_db_utils/deprecated/assert_not_null.sql | 3 +++ macros/dbt_utils/cross_db_utils/deprecated/concat.sql | 3 +++ .../cross_db_utils/{ => deprecated}/datatypes.sql | 2 +- macros/dbt_utils/cross_db_utils/deprecated/dateadd.sql | 6 ++++++ macros/dbt_utils/cross_db_utils/deprecated/datediff.sql | 6 ++++++ .../dbt_utils/cross_db_utils/deprecated/split_part.sql | 3 +++ macros/dbt_utils/cross_db_utils/split_part.sql | 3 --- macros/etc/assert_not_null.sql | 9 --------- tests/functional/test_utils.py | 2 +- 13 files changed, 25 insertions(+), 23 deletions(-) delete mode 100644 macros/dbt_utils/cross_db_utils/concat.sql delete mode 100644 macros/dbt_utils/cross_db_utils/dateadd.sql delete mode 100644 macros/dbt_utils/cross_db_utils/datediff.sql create mode 100644 macros/dbt_utils/cross_db_utils/deprecated/assert_not_null.sql create mode 100644 macros/dbt_utils/cross_db_utils/deprecated/concat.sql rename macros/dbt_utils/cross_db_utils/{ => deprecated}/datatypes.sql (66%) create mode 100644 macros/dbt_utils/cross_db_utils/deprecated/dateadd.sql create mode 100644 macros/dbt_utils/cross_db_utils/deprecated/datediff.sql create mode 100644 macros/dbt_utils/cross_db_utils/deprecated/split_part.sql delete mode 100644 macros/dbt_utils/cross_db_utils/split_part.sql delete mode 100644 macros/etc/assert_not_null.sql diff --git a/integration_tests/dbt_utils/dbt_project.yml b/integration_tests/dbt_utils/dbt_project.yml index ecfe622..79a5e65 100644 --- a/integration_tests/dbt_utils/dbt_project.yml +++ b/integration_tests/dbt_utils/dbt_project.yml @@ -17,6 +17,8 @@ clean-targets: # directories to be removed by `dbt clean` - "dbt_modules" dispatch: + - macro_namespace: dbt + search_order: ['dbt'] - macro_namespace: dbt_utils search_order: - spark_utils diff --git a/macros/dbt_utils/cross_db_utils/concat.sql b/macros/dbt_utils/cross_db_utils/concat.sql deleted file mode 100644 index 0a62eb9..0000000 --- a/macros/dbt_utils/cross_db_utils/concat.sql +++ /dev/null @@ -1,3 +0,0 @@ -{% macro spark__concat(fields) -%} - {{ return(dbt.concat(fields)) }} -{%- endmacro %} diff --git a/macros/dbt_utils/cross_db_utils/dateadd.sql b/macros/dbt_utils/cross_db_utils/dateadd.sql deleted file mode 100644 index 17f3d07..0000000 --- a/macros/dbt_utils/cross_db_utils/dateadd.sql +++ /dev/null @@ -1,3 +0,0 @@ -{% macro spark__dateadd(datepart, interval, from_date_or_timestamp) %} - {{ return(dbt.dateadd(datepart, interval, from_date_or_timestamp)) }} -{% endmacro %} diff --git a/macros/dbt_utils/cross_db_utils/datediff.sql b/macros/dbt_utils/cross_db_utils/datediff.sql deleted file mode 100644 index a356bb3..0000000 --- a/macros/dbt_utils/cross_db_utils/datediff.sql +++ /dev/null @@ -1,3 +0,0 @@ -{% macro spark__datediff(first_date, second_date, datepart) %} - {{ return(dbt.datediff(first_date, second_date, datepart)) }} -{% endmacro %} diff --git a/macros/dbt_utils/cross_db_utils/deprecated/assert_not_null.sql b/macros/dbt_utils/cross_db_utils/deprecated/assert_not_null.sql new file mode 100644 index 0000000..cbfe19b --- /dev/null +++ b/macros/dbt_utils/cross_db_utils/deprecated/assert_not_null.sql @@ -0,0 +1,3 @@ +{% macro assert_not_null(function, arg) -%} + {{ return(adapter.dispatch('assert_not_null', 'dbt')(function, arg)) }} +{%- endmacro %} diff --git a/macros/dbt_utils/cross_db_utils/deprecated/concat.sql b/macros/dbt_utils/cross_db_utils/deprecated/concat.sql new file mode 100644 index 0000000..13c316c --- /dev/null +++ b/macros/dbt_utils/cross_db_utils/deprecated/concat.sql @@ -0,0 +1,3 @@ +{% macro spark__concat(fields) -%} + {{ return(adapter.dispatch('concat', 'dbt')(fields)) }} +{%- endmacro %} diff --git a/macros/dbt_utils/cross_db_utils/datatypes.sql b/macros/dbt_utils/cross_db_utils/deprecated/datatypes.sql similarity index 66% rename from macros/dbt_utils/cross_db_utils/datatypes.sql rename to macros/dbt_utils/cross_db_utils/deprecated/datatypes.sql index 3e4a0e7..b418221 100644 --- a/macros/dbt_utils/cross_db_utils/datatypes.sql +++ b/macros/dbt_utils/cross_db_utils/deprecated/datatypes.sql @@ -1,5 +1,5 @@ {# numeric ------------------------------------------------ #} {% macro spark__type_numeric() %} - {{ return(dbt.type_numeric()) }} + {{ return(adapter.dispatch('type_numeric', 'dbt')()) }} {% endmacro %} diff --git a/macros/dbt_utils/cross_db_utils/deprecated/dateadd.sql b/macros/dbt_utils/cross_db_utils/deprecated/dateadd.sql new file mode 100644 index 0000000..964ad98 --- /dev/null +++ b/macros/dbt_utils/cross_db_utils/deprecated/dateadd.sql @@ -0,0 +1,6 @@ +{% macro spark__dateadd(datepart, interval, from_date_or_timestamp) %} + -- dispatch here gets very very confusing + -- we just need to hint to dbt that this is a required macro for resolving dbt.spark__datediff() + -- {{ assert_not_null() }} + {{ return(adapter.dispatch('dateadd', 'dbt')(datepart, interval, from_date_or_timestamp)) }} +{% endmacro %} diff --git a/macros/dbt_utils/cross_db_utils/deprecated/datediff.sql b/macros/dbt_utils/cross_db_utils/deprecated/datediff.sql new file mode 100644 index 0000000..46b406f --- /dev/null +++ b/macros/dbt_utils/cross_db_utils/deprecated/datediff.sql @@ -0,0 +1,6 @@ +{% macro spark__datediff(first_date, second_date, datepart) %} + -- dispatch here gets very very confusing + -- we just need to hint to dbt that this is a required macro for resolving dbt.spark__datediff() + -- {{ assert_not_null() }} + {{ return(adapter.dispatch('datediff', 'dbt')(first_date, second_date, datepart)) }} +{% endmacro %} diff --git a/macros/dbt_utils/cross_db_utils/deprecated/split_part.sql b/macros/dbt_utils/cross_db_utils/deprecated/split_part.sql new file mode 100644 index 0000000..114a131 --- /dev/null +++ b/macros/dbt_utils/cross_db_utils/deprecated/split_part.sql @@ -0,0 +1,3 @@ +{% macro spark__split_part(string_text, delimiter_text, part_number) %} + {{ return(adapter.dispatch('split_part', 'dbt')(string_text, delimiter_text, part_number)) }} +{% endmacro %} diff --git a/macros/dbt_utils/cross_db_utils/split_part.sql b/macros/dbt_utils/cross_db_utils/split_part.sql deleted file mode 100644 index 966fdb7..0000000 --- a/macros/dbt_utils/cross_db_utils/split_part.sql +++ /dev/null @@ -1,3 +0,0 @@ -{% macro spark__split_part(string_text, delimiter_text, part_number) %} - {{ return(dbt.split_part(string_text, delimiter_text, part_number)) }} -{% endmacro %} diff --git a/macros/etc/assert_not_null.sql b/macros/etc/assert_not_null.sql deleted file mode 100644 index e4692de..0000000 --- a/macros/etc/assert_not_null.sql +++ /dev/null @@ -1,9 +0,0 @@ -{% macro assert_not_null(function, arg) -%} - {{ return(adapter.dispatch('assert_not_null', 'spark_utils')(function, arg)) }} -{%- endmacro %} - -{% macro default__assert_not_null(function, arg) %} - - coalesce({{function}}({{arg}}), nvl2({{function}}({{arg}}), assert_true({{function}}({{arg}}) is not null), null)) - -{% endmacro %} diff --git a/tests/functional/test_utils.py b/tests/functional/test_utils.py index 8387c3b..9353f66 100644 --- a/tests/functional/test_utils.py +++ b/tests/functional/test_utils.py @@ -19,7 +19,7 @@ def packages(self): return { "packages": [ {"local": os.getcwd()}, - {"local": f"{os.getcwd()}/dbt-utils"} + {"git": "https://github.com/dbt-labs/dbt-utils"} ]} @pytest.fixture(scope="class") From 8f01640e785a1532cb0fb454814c01f8949fbfdd Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 3 Aug 2022 11:51:40 +0200 Subject: [PATCH 13/15] Keep going --- integration_tests/dbt_utils/dbt_project.yml | 27 ++++++++++++++++--- .../dbt_utils/models/test_recency.sql | 2 ++ 2 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 integration_tests/dbt_utils/models/test_recency.sql diff --git a/integration_tests/dbt_utils/dbt_project.yml b/integration_tests/dbt_utils/dbt_project.yml index 79a5e65..0854dd1 100644 --- a/integration_tests/dbt_utils/dbt_project.yml +++ b/integration_tests/dbt_utils/dbt_project.yml @@ -32,6 +32,19 @@ seeds: models: dbt_utils_integration_tests: +file_format: delta + + # these were added after the cross-db migration + # see https://github.com/dbt-labs/dbt-core/issues/5520 + # the compatibility will be properly added in dbt-spark + # just disable them for now + cross_db_utils: + test_array_append: + +enabled: false + test_array_concat: + +enabled: false + test_array_construct: + +enabled: false + sql: # macro doesn't work for this integration test (schema pattern) test_get_relations_by_pattern: @@ -43,13 +56,19 @@ models: test_pivot_apostrophe: +enabled: false generic_tests: - # integration test doesn't work + # default version of this integration test uses an explicit cast to 'datetime' + # which SparkSQL does not support test_recency: +enabled: false cross_db_utils: # integration test doesn't work test_any_value: +enabled: false - # integration test doesn't work - test_listagg: - +enabled: false \ No newline at end of file + +tests: + dbt_utils_integration_tests: + cross_db_utils: + # expect exactly two failures + # (both use "order by", which isn't supported in SparkSQL) + assert_equal_test_listagg_actual__expected: + error_if: ">2" diff --git a/integration_tests/dbt_utils/models/test_recency.sql b/integration_tests/dbt_utils/models/test_recency.sql new file mode 100644 index 0000000..d44b5bc --- /dev/null +++ b/integration_tests/dbt_utils/models/test_recency.sql @@ -0,0 +1,2 @@ +select + {{ dbt_utils.date_trunc('day', dbt_utils.current_timestamp()) }} as today From a943272c783d88f779442e7bb94ee1fc13b0bb58 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 3 Aug 2022 12:20:00 +0200 Subject: [PATCH 14/15] Cleanup after rebase --- integration_tests/dbt_utils/dbt_project.yml | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/integration_tests/dbt_utils/dbt_project.yml b/integration_tests/dbt_utils/dbt_project.yml index 0854dd1..18198c3 100644 --- a/integration_tests/dbt_utils/dbt_project.yml +++ b/integration_tests/dbt_utils/dbt_project.yml @@ -17,8 +17,6 @@ clean-targets: # directories to be removed by `dbt clean` - "dbt_modules" dispatch: - - macro_namespace: dbt - search_order: ['dbt'] - macro_namespace: dbt_utils search_order: - spark_utils @@ -32,18 +30,6 @@ seeds: models: dbt_utils_integration_tests: +file_format: delta - - # these were added after the cross-db migration - # see https://github.com/dbt-labs/dbt-core/issues/5520 - # the compatibility will be properly added in dbt-spark - # just disable them for now - cross_db_utils: - test_array_append: - +enabled: false - test_array_concat: - +enabled: false - test_array_construct: - +enabled: false sql: # macro doesn't work for this integration test (schema pattern) @@ -57,7 +43,7 @@ models: +enabled: false generic_tests: # default version of this integration test uses an explicit cast to 'datetime' - # which SparkSQL does not support + # which SparkSQL does not support. override with our own version test_recency: +enabled: false cross_db_utils: From 1dbafb6723d9437f9afdf3344a58b985e3792eb1 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 3 Aug 2022 12:40:20 +0200 Subject: [PATCH 15/15] Update version requirement --- dbt_project.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt_project.yml b/dbt_project.yml index 14f6bec..cda6511 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,5 +1,5 @@ name: 'spark_utils' version: '0.3.0' config-version: 2 -require-dbt-version: [">=1.0.0", "<2.0.0"] +require-dbt-version: [">=1.2.0", "<2.0.0"] macro-paths: ["macros"] \ No newline at end of file