diff --git a/CHANGELOG.md b/CHANGELOG.md index 93d027e7b..28f0dcf03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ ### Under the hood - Port testing framework changes from [dbt-labs/dbt-spark#299](https://github.com/dbt-labs/dbt-spark/pull/299) and [dbt-labs/dbt-spark#314](https://github.com/dbt-labs/dbt-spark/pull/314) ([#70](https://github.com/databricks/dbt-databricks/pull/70)) +## dbt-databricks 1.0.3 (Release TBD) + +### Fixes +- Make internal macros use macro dispatch pattern ([#72](https://github.com/databricks/dbt-databricks/pull/72)) + ## dbt-databricks 1.0.2 (March 31, 2022) ### Features diff --git a/dbt/include/databricks/macros/adapters.sql b/dbt/include/databricks/macros/adapters.sql index 7a57a2a1c..330d931f7 100644 --- a/dbt/include/databricks/macros/adapters.sql +++ b/dbt/include/databricks/macros/adapters.sql @@ -1,19 +1,11 @@ -{% macro dbt_databricks_file_format_clause() %} +{% macro databricks__file_format_clause() %} {%- set file_format = config.get('file_format', default='delta') -%} {%- if file_format is not none %} using {{ file_format }} {%- endif %} {%- endmacro -%} -{% macro dbt_databricks_location_clause() %} - {%- set location_root = config.get('location_root', validator=validation.any[basestring]) -%} - {%- set identifier = model['alias'] -%} - {%- if location_root is not none %} - location '{{ location_root }}/{{ identifier }}' - {%- endif %} -{%- endmacro -%} - -{% macro dbt_databricks_options_clause() -%} +{% macro databricks__options_clause() -%} {%- set options = config.get('options') -%} {%- if config.get('file_format', default='delta') == 'hudi' -%} {%- set unique_key = config.get('unique_key') -%} @@ -35,51 +27,12 @@ {%- endif %} {%- endmacro -%} -{% macro dbt_databricks_comment_clause() %} - {%- set raw_persist_docs = config.get('persist_docs', {}) -%} - {%- if raw_persist_docs is mapping -%} - {%- set raw_relation = raw_persist_docs.get('relation', false) -%} - {%- if raw_relation -%} - comment '{{ model.description | replace("'", "\\'") }}' - {% endif %} - {%- elif raw_persist_docs -%} - {{ exceptions.raise_compiler_error("Invalid value provided for 'persist_docs'. Expected dict but got value: " ~ raw_persist_docs) }} - {% endif %} +{% macro tblproperties_clause() -%} + {{ return(adapter.dispatch('tblproperties_clause', 'dbt')()) }} {%- endmacro -%} -{% macro dbt_databricks_partition_cols(label, required=false) %} - {%- set cols = config.get('partition_by', validator=validation.any[list, basestring]) -%} - {%- if cols is not none %} - {%- if cols is string -%} - {%- set cols = [cols] -%} - {%- endif -%} - {{ label }} ( - {%- for item in cols -%} - {{ item }} - {%- if not loop.last -%},{%- endif -%} - {%- endfor -%} - ) - {%- endif %} -{%- endmacro -%} - -{% macro dbt_databricks_clustered_cols(label, required=false) %} - {%- set cols = config.get('clustered_by', validator=validation.any[list, basestring]) -%} - {%- set buckets = config.get('buckets', validator=validation.any[int]) -%} - {%- if (cols is not none) and (buckets is not none) %} - {%- if cols is string -%} - {%- set cols = [cols] -%} - {%- endif -%} - {{ label }} ( - {%- for item in cols -%} - {{ item }} - {%- if not loop.last -%},{%- endif -%} - {%- endfor -%} - ) into {{ buckets }} buckets - {%- endif %} -{%- endmacro -%} - -{% macro dbt_databricks_tblproperties_clause() -%} +{% macro databricks__tblproperties_clause() -%} {%- set tblproperties = config.get('tblproperties') -%} {%- if tblproperties is not none %} tblproperties ( @@ -90,28 +43,23 @@ {%- endif %} {%- endmacro -%} -{#-- We can't use temporary tables with `create ... as ()` syntax #} -{% macro dbt_databricks_create_temporary_view(relation, sql) -%} - create temporary view {{ relation.include(schema=false) }} as - {{ sql }} -{% endmacro %} {% macro databricks__create_table_as(temporary, relation, sql) -%} {% if temporary -%} - {{ dbt_databricks_create_temporary_view(relation, sql) }} + {{ create_temporary_view(relation, sql) }} {%- else -%} {% if config.get('file_format', default='delta') == 'delta' %} create or replace table {{ relation }} {% else %} create table {{ relation }} {% endif %} - {{ dbt_databricks_file_format_clause() }} - {{ dbt_databricks_options_clause() }} - {{ dbt_databricks_partition_cols(label="partitioned by") }} - {{ dbt_databricks_clustered_cols(label="clustered by") }} - {{ dbt_databricks_location_clause() }} - {{ dbt_databricks_comment_clause() }} - {{ dbt_databricks_tblproperties_clause() }} + {{ file_format_clause() }} + {{ options_clause() }} + {{ partition_cols(label="partitioned by") }} + {{ clustered_cols(label="clustered by") }} + {{ location_clause() }} + {{ comment_clause() }} + {{ tblproperties_clause() }} as {{ sql }} {%- endif %} @@ -119,8 +67,8 @@ {% macro databricks__create_view_as(relation, sql) -%} create or replace view {{ relation }} - {{ dbt_databricks_comment_clause() }} - {{ dbt_databricks_tblproperties_clause() }} + {{ comment_clause() }} + {{ tblproperties_clause() }} as {{ sql }} {% endmacro %} diff --git a/dbt/include/databricks/macros/materializations/incremental/incremental.sql b/dbt/include/databricks/macros/materializations/incremental/incremental.sql index 306eac732..4eb28432d 100644 --- a/dbt/include/databricks/macros/materializations/incremental/incremental.sql +++ b/dbt/include/databricks/macros/materializations/incremental/incremental.sql @@ -34,7 +34,7 @@ {% else %} {% do run_query(create_table_as(True, tmp_relation, sql)) %} {% do process_schema_changes(on_schema_change, tmp_relation, existing_relation) %} - {% set build_sql = dbt_databricks_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key) %} + {% set build_sql = dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key) %} {% endif %} {%- call statement('main') -%} diff --git a/dbt/include/databricks/macros/materializations/incremental/strategies.sql b/dbt/include/databricks/macros/materializations/incremental/strategies.sql deleted file mode 100644 index 074fc7002..000000000 --- a/dbt/include/databricks/macros/materializations/incremental/strategies.sql +++ /dev/null @@ -1,29 +0,0 @@ -{% macro dbt_databricks_get_insert_overwrite_sql(source_relation, target_relation) %} - - {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%} - {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%} - insert overwrite table {{ target_relation }} - {{ dbt_databricks_partition_cols(label="partition") }} - select {{dest_cols_csv}} from {{ source_relation.include(database=false, schema=false) }} - -{% endmacro %} - - -{% macro dbt_databricks_get_incremental_sql(strategy, source, target, unique_key) %} - {%- if strategy == 'append' -%} - {#-- insert new records into existing table, without updating or overwriting #} - {{ get_insert_into_sql(source, target) }} - {%- elif strategy == 'insert_overwrite' -%} - {#-- insert statements don't like CTEs, so support them via a temp view #} - {{ dbt_databricks_get_insert_overwrite_sql(source, target) }} - {%- elif strategy == 'merge' -%} - {#-- merge all columns with databricks delta - schema changes are handled for us #} - {{ get_merge_sql(target, source, unique_key, dest_columns=none, predicates=none) }} - {%- else -%} - {% set no_sql_for_strategy_msg -%} - No known SQL for the incremental strategy provided: {{ strategy }} - {%- endset %} - {%- do exceptions.raise_compiler_error(no_sql_for_strategy_msg) -%} - {%- endif -%} - -{% endmacro %} diff --git a/dbt/include/databricks/macros/materializations/seed.sql b/dbt/include/databricks/macros/materializations/seed.sql index 4e8f87ada..fc305f9d2 100644 --- a/dbt/include/databricks/macros/materializations/seed.sql +++ b/dbt/include/databricks/macros/materializations/seed.sql @@ -15,12 +15,12 @@ {{ adapter.quote_seed_column(column_name, quote_seed_column) }} {{ type }} {%- if not loop.last -%}, {%- endif -%} {%- endfor -%} ) - {{ dbt_databricks_file_format_clause() }} - {{ dbt_databricks_partition_cols(label="partitioned by") }} - {{ dbt_databricks_clustered_cols(label="clustered by") }} - {{ dbt_databricks_location_clause() }} - {{ dbt_databricks_comment_clause() }} - {{ dbt_databricks_tblproperties_clause() }} + {{ file_format_clause() }} + {{ partition_cols(label="partitioned by") }} + {{ clustered_cols(label="clustered by") }} + {{ location_clause() }} + {{ comment_clause() }} + {{ tblproperties_clause() }} {% endset %} {% call statement('_') -%} diff --git a/tests/unit/test_macros.py b/tests/unit/test_macros.py index a3abd273b..882db6904 100644 --- a/tests/unit/test_macros.py +++ b/tests/unit/test_macros.py @@ -1,16 +1,16 @@ import unittest from unittest import mock import re -from jinja2 import Environment, FileSystemLoader +from jinja2 import Environment, FileSystemLoader, PackageLoader class TestSparkMacros(unittest.TestCase): def setUp(self): + self.parent_jinja_env = Environment( + loader=PackageLoader("dbt.include.spark", "macros"), extensions=["jinja2.ext.do"] + ) self.jinja_env = Environment( - loader=FileSystemLoader("dbt/include/databricks/macros"), - extensions=[ - "jinja2.ext.do", - ], + loader=FileSystemLoader("dbt/include/databricks/macros"), extensions=["jinja2.ext.do"] ) self.config = {} @@ -19,16 +19,29 @@ def setUp(self): "model": mock.Mock(), "exceptions": mock.Mock(), "config": mock.Mock(), + "adapter": mock.Mock(), + "return": lambda r: r, } self.default_context["config"].get = lambda key, default=None, **kwargs: self.config.get( key, default ) def __get_template(self, template_filename): + parent = self.parent_jinja_env.get_template(template_filename, globals=self.default_context) + self.default_context.update(parent.module.__dict__) return self.jinja_env.get_template(template_filename, globals=self.default_context) def __run_macro(self, template, name, temporary, relation, sql): self.default_context["model"].alias = relation + + def dispatch(macro_name, macro_namespace=None, packages=None): + if hasattr(template.module, f"databricks__{macro_name}"): + return getattr(template.module, f"databricks__{macro_name}") + else: + return self.default_context[f"spark__{macro_name}"] + + self.default_context["adapter"].dispatch = dispatch + if temporary is not None: value = getattr(template.module, name)(temporary, relation, sql) else: diff --git a/tox.ini b/tox.ini index 42589c9d9..4823c5061 100644 --- a/tox.ini +++ b/tox.ini @@ -9,8 +9,8 @@ commands = /bin/bash -c '$(which black) --config black.ini --check .' /bin/bash -c '$(which mypy) --config-file mypy.ini --namespace-packages --explicit-package-bases dbt tests' passenv = DBT_* PYTEST_ADDOPTS deps = - -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt + -r{toxinidir}/requirements.txt [testenv:black] basepython = python3 @@ -24,8 +24,8 @@ basepython = python3 commands = /bin/bash -c '{envpython} -m pytest -v {posargs} tests/unit' passenv = DBT_* PYTEST_ADDOPTS deps = - -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt + -r{toxinidir}/requirements.txt [testenv:integration-databricks-cluster] basepython = python3 @@ -33,8 +33,8 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_cluster t /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_cluster -n4 tests/integration/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' passenv = DBT_* PYTEST_ADDOPTS deps = - -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt + -r{toxinidir}/requirements.txt -e. [testenv:integration-databricks-uc-cluster] @@ -43,8 +43,8 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_uc_cluste /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_uc_cluster -n4 tests/integration/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' passenv = DBT_* PYTEST_ADDOPTS deps = - -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt + -r{toxinidir}/requirements.txt -e. [testenv:integration-databricks-sql-endpoint] @@ -53,8 +53,8 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpo /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_sql_endpoint -n4 tests/integration/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' passenv = DBT_* PYTEST_ADDOPTS deps = - -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt + -r{toxinidir}/requirements.txt -e. [testenv:integration-databricks-uc-sql-endpoint] @@ -63,6 +63,6 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_uc_sql_en /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_uc_sql_endpoint -n4 tests/integration/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' passenv = DBT_* PYTEST_ADDOPTS deps = - -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt + -r{toxinidir}/requirements.txt -e.