Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make internal macros use macro dispatch pattern #72

Merged
merged 9 commits into from
Apr 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
### Under the hood
- Port testing framework changes from [dbt-labs/dbt-spark#299](https://github.com/dbt-labs/dbt-spark/pull/299) and [dbt-labs/dbt-spark#314](https://github.com/dbt-labs/dbt-spark/pull/314) ([#70](https://github.com/databricks/dbt-databricks/pull/70))

## dbt-databricks 1.0.3 (Release TBD)

### Fixes
- Make internal macros use macro dispatch pattern ([#72](https://github.com/databricks/dbt-databricks/pull/72))

## dbt-databricks 1.0.2 (March 31, 2022)

### Features
Expand Down
82 changes: 15 additions & 67 deletions dbt/include/databricks/macros/adapters.sql
Original file line number Diff line number Diff line change
@@ -1,19 +1,11 @@
{% macro dbt_databricks_file_format_clause() %}
{% macro databricks__file_format_clause() %}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious, if a user has re-defined the file_format_clause in an existing project, for example

{% macro file_format_clause() %}
...
{% endmacro %}

Will this change (and the upstream change in dbt-spark) break the existing pipeline when the user upgrades the adapter version?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to @jtcohen6's explanation in #72 (comment), the user's file_format_clause will be used.

  1. Macros in the user's own project — they can always override the builtins, and they can do it by defining a macro named any of these: file_format_clause, databricks__file_format_clause, spark__file_format_clause, default__file_format_clause

{%- set file_format = config.get('file_format', default='delta') -%}
{%- if file_format is not none %}
using {{ file_format }}
{%- endif %}
{%- endmacro -%}

{% macro dbt_databricks_location_clause() %}
{%- set location_root = config.get('location_root', validator=validation.any[basestring]) -%}
{%- set identifier = model['alias'] -%}
{%- if location_root is not none %}
location '{{ location_root }}/{{ identifier }}'
{%- endif %}
{%- endmacro -%}

{% macro dbt_databricks_options_clause() -%}
{% macro databricks__options_clause() -%}
{%- set options = config.get('options') -%}
{%- if config.get('file_format', default='delta') == 'hudi' -%}
{%- set unique_key = config.get('unique_key') -%}
Expand All @@ -35,51 +27,12 @@
{%- endif %}
{%- endmacro -%}

{% macro dbt_databricks_comment_clause() %}
{%- set raw_persist_docs = config.get('persist_docs', {}) -%}

{%- if raw_persist_docs is mapping -%}
{%- set raw_relation = raw_persist_docs.get('relation', false) -%}
{%- if raw_relation -%}
comment '{{ model.description | replace("'", "\\'") }}'
{% endif %}
{%- elif raw_persist_docs -%}
{{ exceptions.raise_compiler_error("Invalid value provided for 'persist_docs'. Expected dict but got value: " ~ raw_persist_docs) }}
{% endif %}
{% macro tblproperties_clause() -%}
{{ return(adapter.dispatch('tblproperties_clause', 'dbt')()) }}
{%- endmacro -%}

{% macro dbt_databricks_partition_cols(label, required=false) %}
{%- set cols = config.get('partition_by', validator=validation.any[list, basestring]) -%}
{%- if cols is not none %}
{%- if cols is string -%}
{%- set cols = [cols] -%}
{%- endif -%}
{{ label }} (
{%- for item in cols -%}
{{ item }}
{%- if not loop.last -%},{%- endif -%}
{%- endfor -%}
)
{%- endif %}
{%- endmacro -%}

{% macro dbt_databricks_clustered_cols(label, required=false) %}
{%- set cols = config.get('clustered_by', validator=validation.any[list, basestring]) -%}
{%- set buckets = config.get('buckets', validator=validation.any[int]) -%}
{%- if (cols is not none) and (buckets is not none) %}
{%- if cols is string -%}
{%- set cols = [cols] -%}
{%- endif -%}
{{ label }} (
{%- for item in cols -%}
{{ item }}
{%- if not loop.last -%},{%- endif -%}
{%- endfor -%}
) into {{ buckets }} buckets
{%- endif %}
{%- endmacro -%}

{% macro dbt_databricks_tblproperties_clause() -%}
{% macro databricks__tblproperties_clause() -%}
{%- set tblproperties = config.get('tblproperties') -%}
{%- if tblproperties is not none %}
tblproperties (
Expand All @@ -90,37 +43,32 @@
{%- endif %}
{%- endmacro -%}

{#-- We can't use temporary tables with `create ... as ()` syntax #}
{% macro dbt_databricks_create_temporary_view(relation, sql) -%}
create temporary view {{ relation.include(schema=false) }} as
{{ sql }}
{% endmacro %}

{% macro databricks__create_table_as(temporary, relation, sql) -%}
{% if temporary -%}
{{ dbt_databricks_create_temporary_view(relation, sql) }}
{{ create_temporary_view(relation, sql) }}
{%- else -%}
{% if config.get('file_format', default='delta') == 'delta' %}
create or replace table {{ relation }}
{% else %}
create table {{ relation }}
{% endif %}
{{ dbt_databricks_file_format_clause() }}
{{ dbt_databricks_options_clause() }}
{{ dbt_databricks_partition_cols(label="partitioned by") }}
{{ dbt_databricks_clustered_cols(label="clustered by") }}
{{ dbt_databricks_location_clause() }}
{{ dbt_databricks_comment_clause() }}
{{ dbt_databricks_tblproperties_clause() }}
{{ file_format_clause() }}
{{ options_clause() }}
{{ partition_cols(label="partitioned by") }}
{{ clustered_cols(label="clustered by") }}
{{ location_clause() }}
{{ comment_clause() }}
{{ tblproperties_clause() }}
as
{{ sql }}
{%- endif %}
{%- endmacro -%}

{% macro databricks__create_view_as(relation, sql) -%}
create or replace view {{ relation }}
{{ dbt_databricks_comment_clause() }}
{{ dbt_databricks_tblproperties_clause() }}
{{ comment_clause() }}
{{ tblproperties_clause() }}
as
{{ sql }}
{% endmacro %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
{% else %}
{% do run_query(create_table_as(True, tmp_relation, sql)) %}
{% do process_schema_changes(on_schema_change, tmp_relation, existing_relation) %}
{% set build_sql = dbt_databricks_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key) %}
{% set build_sql = dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key) %}
{% endif %}

{%- call statement('main') -%}
Expand Down

This file was deleted.

12 changes: 6 additions & 6 deletions dbt/include/databricks/macros/materializations/seed.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
{{ adapter.quote_seed_column(column_name, quote_seed_column) }} {{ type }} {%- if not loop.last -%}, {%- endif -%}
{%- endfor -%}
)
{{ dbt_databricks_file_format_clause() }}
{{ dbt_databricks_partition_cols(label="partitioned by") }}
{{ dbt_databricks_clustered_cols(label="clustered by") }}
{{ dbt_databricks_location_clause() }}
{{ dbt_databricks_comment_clause() }}
{{ dbt_databricks_tblproperties_clause() }}
{{ file_format_clause() }}
{{ partition_cols(label="partitioned by") }}
{{ clustered_cols(label="clustered by") }}
{{ location_clause() }}
{{ comment_clause() }}
{{ tblproperties_clause() }}
{% endset %}

{% call statement('_') -%}
Expand Down
23 changes: 18 additions & 5 deletions tests/unit/test_macros.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import unittest
from unittest import mock
import re
from jinja2 import Environment, FileSystemLoader
from jinja2 import Environment, FileSystemLoader, PackageLoader


class TestSparkMacros(unittest.TestCase):
def setUp(self):
self.parent_jinja_env = Environment(
loader=PackageLoader("dbt.include.spark", "macros"), extensions=["jinja2.ext.do"]
)
self.jinja_env = Environment(
loader=FileSystemLoader("dbt/include/databricks/macros"),
extensions=[
"jinja2.ext.do",
],
loader=FileSystemLoader("dbt/include/databricks/macros"), extensions=["jinja2.ext.do"]
)

self.config = {}
Expand All @@ -19,16 +19,29 @@ def setUp(self):
"model": mock.Mock(),
"exceptions": mock.Mock(),
"config": mock.Mock(),
"adapter": mock.Mock(),
"return": lambda r: r,
}
self.default_context["config"].get = lambda key, default=None, **kwargs: self.config.get(
key, default
)

def __get_template(self, template_filename):
parent = self.parent_jinja_env.get_template(template_filename, globals=self.default_context)
self.default_context.update(parent.module.__dict__)
return self.jinja_env.get_template(template_filename, globals=self.default_context)

def __run_macro(self, template, name, temporary, relation, sql):
self.default_context["model"].alias = relation

def dispatch(macro_name, macro_namespace=None, packages=None):
if hasattr(template.module, f"databricks__{macro_name}"):
return getattr(template.module, f"databricks__{macro_name}")
else:
return self.default_context[f"spark__{macro_name}"]

self.default_context["adapter"].dispatch = dispatch

if temporary is not None:
value = getattr(template.module, name)(temporary, relation, sql)
else:
Expand Down
12 changes: 6 additions & 6 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ commands = /bin/bash -c '$(which black) --config black.ini --check .'
/bin/bash -c '$(which mypy) --config-file mypy.ini --namespace-packages --explicit-package-bases dbt tests'
passenv = DBT_* PYTEST_ADDOPTS
deps =
-r{toxinidir}/requirements.txt
-r{toxinidir}/dev_requirements.txt
-r{toxinidir}/requirements.txt
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is needed because we need to install the latest dbt-spark version first?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.


[testenv:black]
basepython = python3
Expand All @@ -24,17 +24,17 @@ basepython = python3
commands = /bin/bash -c '{envpython} -m pytest -v {posargs} tests/unit'
passenv = DBT_* PYTEST_ADDOPTS
deps =
-r{toxinidir}/requirements.txt
-r{toxinidir}/dev_requirements.txt
-r{toxinidir}/requirements.txt

[testenv:integration-databricks-cluster]
basepython = python3
commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_cluster tests/functional/adapter/test_basic.py {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
/bin/bash -c '{envpython} -m pytest -v -m profile_databricks_cluster -n4 tests/integration/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
passenv = DBT_* PYTEST_ADDOPTS
deps =
-r{toxinidir}/requirements.txt
-r{toxinidir}/dev_requirements.txt
-r{toxinidir}/requirements.txt
-e.

[testenv:integration-databricks-uc-cluster]
Expand All @@ -43,8 +43,8 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_uc_cluste
/bin/bash -c '{envpython} -m pytest -v -m profile_databricks_uc_cluster -n4 tests/integration/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
passenv = DBT_* PYTEST_ADDOPTS
deps =
-r{toxinidir}/requirements.txt
-r{toxinidir}/dev_requirements.txt
-r{toxinidir}/requirements.txt
-e.

[testenv:integration-databricks-sql-endpoint]
Expand All @@ -53,8 +53,8 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpo
/bin/bash -c '{envpython} -m pytest -v -m profile_databricks_sql_endpoint -n4 tests/integration/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
passenv = DBT_* PYTEST_ADDOPTS
deps =
-r{toxinidir}/requirements.txt
-r{toxinidir}/dev_requirements.txt
-r{toxinidir}/requirements.txt
-e.

[testenv:integration-databricks-uc-sql-endpoint]
Expand All @@ -63,6 +63,6 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_uc_sql_en
/bin/bash -c '{envpython} -m pytest -v -m profile_databricks_uc_sql_endpoint -n4 tests/integration/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
passenv = DBT_* PYTEST_ADDOPTS
deps =
-r{toxinidir}/requirements.txt
-r{toxinidir}/dev_requirements.txt
-r{toxinidir}/requirements.txt
-e.