Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

convert cli-style strings in selectors to normalized dictionaries #2895

Merged
merged 1 commit into from
Nov 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
- Added native python 're' module for regex in jinja templates [#2851](https://github.com/fishtown-analytics/dbt/pull/2851)
- Store resolved node names in manifest ([#2647](https://github.com/fishtown-analytics/dbt/issues/2647), [#2837](https://github.com/fishtown-analytics/dbt/pull/2837))
- Save selectors dictionary to manifest, allow descriptions ([#2693](https://github.com/fishtown-analytics/dbt/issues/2693), [#2866](https://github.com/fishtown-analytics/dbt/pull/2866))
- Normalize cli-style-strings in manifest selectors dictionary ([#2879](https://github.com/fishtown-anaytics/dbt/issues/2879), [#2895](https://github.com/fishtown-analytics/dbt/pull/2895))

### Fixes
- Respect --project-dir in dbt clean command ([#2840](https://github.com/fishtown-analytics/dbt/issues/2840), [#2841](https://github.com/fishtown-analytics/dbt/pull/2841))
Expand Down
11 changes: 5 additions & 6 deletions core/dbt/config/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from dbt.version import get_installed_version
from dbt.utils import MultiDict
from dbt.node_types import NodeType
from dbt.config.selectors import SelectorDict

from dbt.contracts.project import (
Project as ProjectContract,
Expand Down Expand Up @@ -369,15 +370,13 @@ def create_project(self, rendered: RenderComponents) -> 'Project':
query_comment = _query_comment_from_cfg(cfg.query_comment)

packages = package_config_from_data(rendered.packages_dict)
selectors = selector_config_from_data(rendered.selectors_dict)
manifest_selectors: Dict[str, Any] = {}
if rendered.selectors_dict:
if rendered.selectors_dict and rendered.selectors_dict['selectors']:
# this is a dict with a single key 'selectors' pointing to a list
# of dicts.
if rendered.selectors_dict['selectors']:
# for each selector dict, transform into 'name': { }
for sel in rendered.selectors_dict['selectors']:
manifest_selectors[sel['name']] = sel
selectors = selector_config_from_data(rendered.selectors_dict)
manifest_selectors = SelectorDict.parse_from_selectors_list(
rendered.selectors_dict['selectors'])

project = Project(
project_name=name,
Expand Down
65 changes: 65 additions & 0 deletions core/dbt/config/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from dbt.contracts.selection import SelectorFile
from dbt.exceptions import DbtSelectorsError, RuntimeException
from dbt.graph import parse_from_selectors_definition, SelectionSpec
from dbt.graph.selector_spec import SelectionCriteria

MALFORMED_SELECTOR_ERROR = """\
The selectors.yml file in this project is malformed. Please double check
Expand Down Expand Up @@ -113,3 +114,67 @@ def selector_config_from_data(
result_type='invalid_selector',
) from e
return selectors


# These are utilities to clean up the dictionary created from
# selectors.yml by turning the cli-string format entries into
# normalized dictionary entries. It parallels the flow in
# dbt/graph/cli.py. If changes are made there, it might
# be necessary to make changes here. Ideally it would be
# good to combine the two flows into one at some point.
gshank marked this conversation as resolved.
Show resolved Hide resolved
class SelectorDict:

@classmethod
def parse_dict_definition(cls, definition):
key = list(definition)[0]
value = definition[key]
if isinstance(value, list):
new_values = []
for sel_def in value:
new_value = cls.parse_from_definition(sel_def)
new_values.append(new_value)
value = new_values
if key == 'exclude':
definition = {key: value}
elif len(definition) == 1:
definition = {'method': key, 'value': value}
return definition

@classmethod
def parse_a_definition(cls, def_type, definition):
# this definition must be a list
new_dict = {def_type: []}
for sel_def in definition[def_type]:
if isinstance(sel_def, dict):
sel_def = cls.parse_from_definition(sel_def)
new_dict[def_type].append(sel_def)
elif isinstance(sel_def, str):
sel_def = SelectionCriteria.dict_from_single_spec(sel_def)
new_dict[def_type].append(sel_def)
else:
new_dict[def_type].append(sel_def)
return new_dict

@classmethod
def parse_from_definition(cls, definition):
if isinstance(definition, str):
definition = SelectionCriteria.dict_from_single_spec(definition)
elif 'union' in definition:
definition = cls.parse_a_definition('union', definition)
elif 'intersection' in definition:
definition = cls.parse_a_definition('intersection', definition)
elif isinstance(definition, dict):
definition = cls.parse_dict_definition(definition)
return definition

# This is the normal entrypoint of this code. Give it the
# list of selectors generated from the selectors.yml file.
@classmethod
def parse_from_selectors_list(cls, selectors):
selector_dict = {}
for selector in selectors:
sel_name = selector['name']
selector_dict[sel_name] = selector
definition = cls.parse_from_definition(selector['definition'])
selector_dict[sel_name]['definition'] = definition
return selector_dict
20 changes: 20 additions & 0 deletions core/dbt/graph/selector_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,26 @@ def from_dict(cls, raw: Any, dct: Dict[str, Any]) -> 'SelectionCriteria':
children_depth=children_depth,
)

@classmethod
def dict_from_single_spec(cls, raw: str):
result = RAW_SELECTOR_PATTERN.match(raw)
if result is None:
return {'error': 'Invalid selector spec'}
dct: Dict[str, Any] = result.groupdict()
method_name, method_arguments = cls.parse_method(dct)
meth_name = str(method_name)
if method_arguments:
meth_name = meth_name + '.' + '.'.join(method_arguments)
dct['method'] = meth_name
dct = {k: v for k, v in dct.items() if (v is not None and v != '')}
if 'childrens_parents' in dct:
dct['childrens_parents'] = bool(dct.get('childrens_parents'))
if 'parents' in dct:
dct['parents'] = bool(dct.get('parents'))
if 'children' in dct:
dct['children'] = bool(dct.get('children'))
return dct

@classmethod
def from_single_spec(cls, raw: str) -> 'SelectionCriteria':
result = RAW_SELECTOR_PATTERN.match(raw)
Expand Down
115 changes: 115 additions & 0 deletions test/unit/test_manifest_selectors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import dbt.exceptions
import textwrap
import yaml
import unittest
from dbt.config.selectors import SelectorDict


def get_selector_dict(txt: str) -> dict:
txt = textwrap.dedent(txt)
dct = yaml.safe_load(txt)
return dct


class SelectorUnitTest(unittest.TestCase):

def test_compare_cli_non_cli(self):
dct = get_selector_dict('''\
selectors:
- name: nightly_diet_snowplow
description: "This uses more CLI-style syntax"
definition:
union:
- intersection:
- '@source:snowplow'
- 'tag:nightly'
- 'models/export'
- exclude:
- intersection:
- 'package:snowplow'
- 'config.materialized:incremental'
- export_performance_timing
- name: nightly_diet_snowplow_full
description: "This is a fuller YAML specification"
definition:
union:
- intersection:
- method: source
value: snowplow
childrens_parents: true
- method: tag
value: nightly
- method: path
value: models/export
- exclude:
- intersection:
- method: package
value: snowplow
- method: config.materialized
value: incremental
- method: fqn
value: export_performance_timing
''')

sel_dict = SelectorDict.parse_from_selectors_list(dct['selectors'])
assert(sel_dict)
with_strings = sel_dict['nightly_diet_snowplow']['definition']
no_strings = sel_dict['nightly_diet_snowplow_full']['definition']
self.assertEqual(with_strings, no_strings)
gshank marked this conversation as resolved.
Show resolved Hide resolved

def test_single_string_definition(self):
dct = get_selector_dict('''\
selectors:
- name: nightly_selector
definition:
'tag:nightly'
''')

sel_dict = SelectorDict.parse_from_selectors_list(dct['selectors'])
assert(sel_dict)
expected = {'method': 'tag', 'value': 'nightly'}
definition = sel_dict['nightly_selector']['definition']
self.assertEqual(expected, definition)


def test_single_key_value_definition(self):
dct = get_selector_dict('''\
selectors:
- name: nightly_selector
definition:
tag: nightly
''')

sel_dict = SelectorDict.parse_from_selectors_list(dct['selectors'])
assert(sel_dict)
expected = {'method': 'tag', 'value': 'nightly'}
definition = sel_dict['nightly_selector']['definition']
self.assertEqual(expected, definition)

def test_parent_definition(self):
dct = get_selector_dict('''\
selectors:
- name: kpi_nightly_selector
definition:
'+exposure:kpi_nightly'
''')

sel_dict = SelectorDict.parse_from_selectors_list(dct['selectors'])
assert(sel_dict)
expected = {'method': 'exposure', 'value': 'kpi_nightly', 'parents': True}
definition = sel_dict['kpi_nightly_selector']['definition']
self.assertEqual(expected, definition)

def test_plus_definition(self):
dct = get_selector_dict('''\
selectors:
- name: my_model_children_selector
definition:
'my_model+2'
''')

sel_dict = SelectorDict.parse_from_selectors_list(dct['selectors'])
assert(sel_dict)
expected = {'method': 'fqn', 'value': 'my_model', 'children': True, 'children_depth': '2'}
definition = sel_dict['my_model_children_selector']['definition']
self.assertEqual(expected, definition)