Skip to content

Commit

Permalink
[#2401] Configs in schema files
Browse files Browse the repository at this point in the history
  • Loading branch information
gshank committed Aug 10, 2021
1 parent e7b8488 commit 757a4be
Show file tree
Hide file tree
Showing 27 changed files with 563 additions and 288 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
### Features
- Add `dbt build` command to run models, tests, seeds, and snapshots in DAG order. ([#2743](https://github.com/dbt-labs/dbt/issues/2743), [#3490](https://github.com/dbt-labs/dbt/issues/3490), [#3608](https://github.com/dbt-labs/dbt/issues/3608))
- Introduce `on_schema_change` config to detect and handle schema changes on incremental models ([#1132](https://github.com/fishtown-analytics/dbt/issues/1132), [#3387](https://github.com/fishtown-analytics/dbt/issues/3387))
- Enable setting configs in schema files ([#2401](https://github.com/dbt-labs/dbt/issues/2401), [#3616](https://github.com/dbt-labs/dbt/pull/3616))

### Fixes
- Fix docs generation for cross-db sources in REDSHIFT RA3 node ([#3236](https://github.com/fishtown-analytics/dbt/issues/3236), [#3408](https://github.com/fishtown-analytics/dbt/pull/3408))
Expand Down
14 changes: 14 additions & 0 deletions core/dbt/context/context_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ def calculate_node_config(
resource_type: NodeType,
project_name: str,
base: bool,
patch_config_dict: Dict[str, Any] = None
) -> BaseConfig:
own_config = self.get_node_project(project_name)

Expand All @@ -134,6 +135,12 @@ def calculate_node_config(
for fqn_config in project_configs:
result = self._update_from_config(result, fqn_config)

# When schema files patch config, it has lower precedence than
# config in the models (config_call_dict), so we add the patch_config_dict
# before the config_call_dict
if patch_config_dict:
result = self._update_from_config(result, patch_config_dict)

# config_calls are created in the 'experimental' model parser and
# the ParseConfigObject (via add_config_call)
result = self._update_from_config(result, config_call_dict)
Expand All @@ -153,6 +160,7 @@ def calculate_node_config_dict(
resource_type: NodeType,
project_name: str,
base: bool,
patch_config_dict: Dict[str, Any],
) -> Dict[str, Any]:
...

Expand Down Expand Up @@ -192,13 +200,15 @@ def calculate_node_config_dict(
resource_type: NodeType,
project_name: str,
base: bool,
patch_config_dict: dict = None
) -> Dict[str, Any]:
config = self.calculate_node_config(
config_call_dict=config_call_dict,
fqn=fqn,
resource_type=resource_type,
project_name=project_name,
base=base,
patch_config_dict=patch_config_dict
)
finalized = config.finalize_and_validate()
return finalized.to_dict(omit_none=True)
Expand All @@ -215,13 +225,15 @@ def calculate_node_config_dict(
resource_type: NodeType,
project_name: str,
base: bool,
patch_config_dict: dict = None
) -> Dict[str, Any]:
return self.calculate_node_config(
config_call_dict=config_call_dict,
fqn=fqn,
resource_type=resource_type,
project_name=project_name,
base=base,
patch_config_dict=patch_config_dict
)

def initial_result(
Expand Down Expand Up @@ -284,6 +296,7 @@ def build_config_dict(
base: bool = False,
*,
rendered: bool = True,
patch_config_dict: dict = None
) -> Dict[str, Any]:
if rendered:
src = ContextConfigGenerator(self._active_project)
Expand All @@ -296,4 +309,5 @@ def build_config_dict(
resource_type=self._resource_type,
project_name=self._project_name,
base=base,
patch_config_dict=patch_config_dict
)
4 changes: 2 additions & 2 deletions core/dbt/context/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1243,15 +1243,15 @@ class ModelContext(ProviderContext):

@contextproperty
def pre_hooks(self) -> List[Dict[str, Any]]:
if isinstance(self.model, ParsedSourceDefinition):
if self.model.resource_type in [NodeType.Source, NodeType.Test]:
return []
return [
h.to_dict(omit_none=True) for h in self.model.config.pre_hook
]

@contextproperty
def post_hooks(self) -> List[Dict[str, Any]]:
if isinstance(self.model, ParsedSourceDefinition):
if self.model.resource_type in [NodeType.Source, NodeType.Test]:
return []
return [
h.to_dict(omit_none=True) for h in self.model.config.post_hook
Expand Down
8 changes: 6 additions & 2 deletions core/dbt/contracts/graph/compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,19 @@ class CompiledSnapshotNode(CompiledNode):
@dataclass
class CompiledDataTestNode(CompiledNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.Test]})
config: TestConfig = field(default_factory=TestConfig)
# Was not able to make mypy happy and keep the code working. We need to
# refactor the various configs.
config: TestConfig = field(default_factory=TestConfig) # type:ignore


@dataclass
class CompiledSchemaTestNode(CompiledNode, HasTestMetadata):
# keep this in sync with ParsedSchemaTestNode!
resource_type: NodeType = field(metadata={'restrict': [NodeType.Test]})
column_name: Optional[str] = None
config: TestConfig = field(default_factory=TestConfig)
# Was not able to make mypy happy and keep the code working. We need to
# refactor the various configs.
config: TestConfig = field(default_factory=TestConfig) # type:ignore

def same_contents(self, other) -> bool:
if other is None:
Expand Down
60 changes: 2 additions & 58 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
CompileResultNode, ManifestNode, NonSourceCompiledNode, GraphMemberNode
)
from dbt.contracts.graph.parsed import (
ParsedMacro, ParsedDocumentation, ParsedNodePatch, ParsedMacroPatch,
ParsedMacro, ParsedDocumentation,
ParsedSourceDefinition, ParsedExposure, HasUniqueID,
UnpatchedSourceDefinition, ManifestNodes
)
Expand All @@ -26,9 +26,7 @@
from dbt.dataclass_schema import dbtClassMixin
from dbt.exceptions import (
CompilationException,
raise_duplicate_resource_name, raise_compiler_error, warn_or_error,
raise_duplicate_patch_name,
raise_duplicate_macro_patch_name, raise_duplicate_source_patch_name,
raise_duplicate_resource_name, raise_compiler_error,
)
from dbt.helper_types import PathSet
from dbt.logger import GLOBAL_LOGGER as logger
Expand Down Expand Up @@ -718,60 +716,6 @@ def get_resource_fqns(self) -> Mapping[str, PathSet]:
resource_fqns[resource_type_plural].add(tuple(resource.fqn))
return resource_fqns

# This is called by 'parse_patch' in the NodePatchParser
def add_patch(
self, source_file: SchemaSourceFile, patch: ParsedNodePatch,
) -> None:
if patch.yaml_key in ['models', 'seeds', 'snapshots']:
unique_id = self.ref_lookup.get_unique_id(patch.name, None)
elif patch.yaml_key == 'analyses':
unique_id = self.analysis_lookup.get_unique_id(patch.name, None)
else:
raise dbt.exceptions.InternalException(
f'Unexpected yaml_key {patch.yaml_key} for patch in '
f'file {source_file.path.original_file_path}'
)
if unique_id is None:
# This will usually happen when a node is disabled
return

# patches can't be overwritten
node = self.nodes.get(unique_id)
if node:
if node.patch_path:
package_name, existing_file_path = node.patch_path.split('://')
raise_duplicate_patch_name(patch, existing_file_path)
source_file.append_patch(patch.yaml_key, unique_id)
node.patch(patch)

def add_macro_patch(
self, source_file: SchemaSourceFile, patch: ParsedMacroPatch,
) -> None:
# macros are fully namespaced
unique_id = f'macro.{patch.package_name}.{patch.name}'
macro = self.macros.get(unique_id)
if not macro:
warn_or_error(
f'WARNING: Found documentation for macro "{patch.name}" '
f'which was not found'
)
return
if macro.patch_path:
package_name, existing_file_path = macro.patch_path.split('://')
raise_duplicate_macro_patch_name(patch, existing_file_path)
source_file.macro_patches[patch.name] = unique_id
macro.patch(patch)

def add_source_patch(
self, source_file: SchemaSourceFile, patch: SourcePatch,
) -> None:
# source patches must be unique
key = (patch.overrides, patch.name)
if key in self.source_patches:
raise_duplicate_source_patch_name(patch, self.source_patches[key])
self.source_patches[key] = patch
source_file.source_patches.append(key)

def get_used_schemas(self, resource_types=None):
return frozenset({
(node.database, node.schema) for node in
Expand Down
63 changes: 34 additions & 29 deletions core/dbt/contracts/graph/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,10 @@ def same_contents(
return True

# This is used in 'add_config_call' to created the combined config_call_dict.
# 'meta' moved here from node
mergebehavior = {
"append": ['pre-hook', 'pre_hook', 'post-hook', 'post_hook', 'tags'],
"update": ['quoting', 'column_types'],
"update": ['quoting', 'column_types', 'meta'],
}

@classmethod
Expand Down Expand Up @@ -355,10 +356,38 @@ class SourceConfig(BaseConfig):


@dataclass
class NodeConfig(BaseConfig):
class NodeAndTestConfig(BaseConfig):
enabled: bool = True
# these fields are included in serialized output, but are not part of
# config comparison (they are part of database_representation)
alias: Optional[str] = field(
default=None,
metadata=CompareBehavior.Exclude.meta(),
)
schema: Optional[str] = field(
default=None,
metadata=CompareBehavior.Exclude.meta(),
)
database: Optional[str] = field(
default=None,
metadata=CompareBehavior.Exclude.meta(),
)
tags: Union[List[str], str] = field(
default_factory=list_str,
metadata=metas(ShowBehavior.Hide,
MergeBehavior.Append,
CompareBehavior.Exclude),
)
meta: Dict[str, Any] = field(
default_factory=dict,
metadata=MergeBehavior.Update.meta(),
)


@dataclass
class NodeConfig(NodeAndTestConfig):
# Note: if any new fields are added with MergeBehavior, also update the
# 'mergebehavior' dictionary
enabled: bool = True
materialized: str = 'view'
persist_docs: Dict[str, Any] = field(default_factory=dict)
post_hook: List[Hook] = field(
Expand All @@ -369,11 +398,6 @@ class NodeConfig(BaseConfig):
default_factory=list,
metadata=MergeBehavior.Append.meta(),
)
# this only applies for config v1, so it doesn't participate in comparison
vars: Dict[str, Any] = field(
default_factory=dict,
metadata=metas(CompareBehavior.Exclude, MergeBehavior.Update),
)
quoting: Dict[str, Any] = field(
default_factory=dict,
metadata=MergeBehavior.Update.meta(),
Expand All @@ -384,26 +408,6 @@ class NodeConfig(BaseConfig):
default_factory=dict,
metadata=MergeBehavior.Update.meta(),
)
# these fields are included in serialized output, but are not part of
# config comparison (they are part of database_representation)
alias: Optional[str] = field(
default=None,
metadata=CompareBehavior.Exclude.meta(),
)
schema: Optional[str] = field(
default=None,
metadata=CompareBehavior.Exclude.meta(),
)
database: Optional[str] = field(
default=None,
metadata=CompareBehavior.Exclude.meta(),
)
tags: Union[List[str], str] = field(
default_factory=list_str,
metadata=metas(ShowBehavior.Hide,
MergeBehavior.Append,
CompareBehavior.Exclude),
)
full_refresh: Optional[bool] = None
on_schema_change: Optional[str] = 'ignore'

Expand Down Expand Up @@ -447,7 +451,8 @@ class SeedConfig(NodeConfig):


@dataclass
class TestConfig(NodeConfig):
class TestConfig(NodeAndTestConfig):
# this is repeated because of a different default
schema: Optional[str] = field(
default='dbt_test__audit',
metadata=CompareBehavior.Exclude.meta(),
Expand Down
Loading

0 comments on commit 757a4be

Please sign in to comment.