Skip to content

Commit

Permalink
Merge pull request #2895 from fishtown-analytics/string_selectors
Browse files Browse the repository at this point in the history
convert cli-style strings in selectors to normalized dictionaries
  • Loading branch information
gshank authored Nov 18, 2020
2 parents ec0f3d2 + bb83435 commit a8765d5
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
- Added native python 're' module for regex in jinja templates [#2851](https://github.com/fishtown-analytics/dbt/pull/2851)
- Store resolved node names in manifest ([#2647](https://github.com/fishtown-analytics/dbt/issues/2647), [#2837](https://github.com/fishtown-analytics/dbt/pull/2837))
- Save selectors dictionary to manifest, allow descriptions ([#2693](https://github.com/fishtown-analytics/dbt/issues/2693), [#2866](https://github.com/fishtown-analytics/dbt/pull/2866))
- Normalize cli-style-strings in manifest selectors dictionary ([#2879](https://github.com/fishtown-anaytics/dbt/issues/2879), [#2895](https://github.com/fishtown-analytics/dbt/pull/2895))

### Fixes
- Respect --project-dir in dbt clean command ([#2840](https://github.com/fishtown-analytics/dbt/issues/2840), [#2841](https://github.com/fishtown-analytics/dbt/pull/2841))
Expand Down
11 changes: 5 additions & 6 deletions core/dbt/config/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from dbt.version import get_installed_version
from dbt.utils import MultiDict
from dbt.node_types import NodeType
from dbt.config.selectors import SelectorDict

from dbt.contracts.project import (
Project as ProjectContract,
Expand Down Expand Up @@ -369,15 +370,13 @@ def create_project(self, rendered: RenderComponents) -> 'Project':
query_comment = _query_comment_from_cfg(cfg.query_comment)

packages = package_config_from_data(rendered.packages_dict)
selectors = selector_config_from_data(rendered.selectors_dict)
manifest_selectors: Dict[str, Any] = {}
if rendered.selectors_dict:
if rendered.selectors_dict and rendered.selectors_dict['selectors']:
# this is a dict with a single key 'selectors' pointing to a list
# of dicts.
if rendered.selectors_dict['selectors']:
# for each selector dict, transform into 'name': { }
for sel in rendered.selectors_dict['selectors']:
manifest_selectors[sel['name']] = sel
selectors = selector_config_from_data(rendered.selectors_dict)
manifest_selectors = SelectorDict.parse_from_selectors_list(
rendered.selectors_dict['selectors'])

project = Project(
project_name=name,
Expand Down
65 changes: 65 additions & 0 deletions core/dbt/config/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from dbt.contracts.selection import SelectorFile
from dbt.exceptions import DbtSelectorsError, RuntimeException
from dbt.graph import parse_from_selectors_definition, SelectionSpec
from dbt.graph.selector_spec import SelectionCriteria

MALFORMED_SELECTOR_ERROR = """\
The selectors.yml file in this project is malformed. Please double check
Expand Down Expand Up @@ -113,3 +114,67 @@ def selector_config_from_data(
result_type='invalid_selector',
) from e
return selectors


# These are utilities to clean up the dictionary created from
# selectors.yml by turning the cli-string format entries into
# normalized dictionary entries. It parallels the flow in
# dbt/graph/cli.py. If changes are made there, it might
# be necessary to make changes here. Ideally it would be
# good to combine the two flows into one at some point.
class SelectorDict:

@classmethod
def parse_dict_definition(cls, definition):
key = list(definition)[0]
value = definition[key]
if isinstance(value, list):
new_values = []
for sel_def in value:
new_value = cls.parse_from_definition(sel_def)
new_values.append(new_value)
value = new_values
if key == 'exclude':
definition = {key: value}
elif len(definition) == 1:
definition = {'method': key, 'value': value}
return definition

@classmethod
def parse_a_definition(cls, def_type, definition):
# this definition must be a list
new_dict = {def_type: []}
for sel_def in definition[def_type]:
if isinstance(sel_def, dict):
sel_def = cls.parse_from_definition(sel_def)
new_dict[def_type].append(sel_def)
elif isinstance(sel_def, str):
sel_def = SelectionCriteria.dict_from_single_spec(sel_def)
new_dict[def_type].append(sel_def)
else:
new_dict[def_type].append(sel_def)
return new_dict

@classmethod
def parse_from_definition(cls, definition):
if isinstance(definition, str):
definition = SelectionCriteria.dict_from_single_spec(definition)
elif 'union' in definition:
definition = cls.parse_a_definition('union', definition)
elif 'intersection' in definition:
definition = cls.parse_a_definition('intersection', definition)
elif isinstance(definition, dict):
definition = cls.parse_dict_definition(definition)
return definition

# This is the normal entrypoint of this code. Give it the
# list of selectors generated from the selectors.yml file.
@classmethod
def parse_from_selectors_list(cls, selectors):
selector_dict = {}
for selector in selectors:
sel_name = selector['name']
selector_dict[sel_name] = selector
definition = cls.parse_from_definition(selector['definition'])
selector_dict[sel_name]['definition'] = definition
return selector_dict
20 changes: 20 additions & 0 deletions core/dbt/graph/selector_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,26 @@ def from_dict(cls, raw: Any, dct: Dict[str, Any]) -> 'SelectionCriteria':
children_depth=children_depth,
)

@classmethod
def dict_from_single_spec(cls, raw: str):
result = RAW_SELECTOR_PATTERN.match(raw)
if result is None:
return {'error': 'Invalid selector spec'}
dct: Dict[str, Any] = result.groupdict()
method_name, method_arguments = cls.parse_method(dct)
meth_name = str(method_name)
if method_arguments:
meth_name = meth_name + '.' + '.'.join(method_arguments)
dct['method'] = meth_name
dct = {k: v for k, v in dct.items() if (v is not None and v != '')}
if 'childrens_parents' in dct:
dct['childrens_parents'] = bool(dct.get('childrens_parents'))
if 'parents' in dct:
dct['parents'] = bool(dct.get('parents'))
if 'children' in dct:
dct['children'] = bool(dct.get('children'))
return dct

@classmethod
def from_single_spec(cls, raw: str) -> 'SelectionCriteria':
result = RAW_SELECTOR_PATTERN.match(raw)
Expand Down
115 changes: 115 additions & 0 deletions test/unit/test_manifest_selectors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import dbt.exceptions
import textwrap
import yaml
import unittest
from dbt.config.selectors import SelectorDict


def get_selector_dict(txt: str) -> dict:
txt = textwrap.dedent(txt)
dct = yaml.safe_load(txt)
return dct


class SelectorUnitTest(unittest.TestCase):

def test_compare_cli_non_cli(self):
dct = get_selector_dict('''\
selectors:
- name: nightly_diet_snowplow
description: "This uses more CLI-style syntax"
definition:
union:
- intersection:
- '@source:snowplow'
- 'tag:nightly'
- 'models/export'
- exclude:
- intersection:
- 'package:snowplow'
- 'config.materialized:incremental'
- export_performance_timing
- name: nightly_diet_snowplow_full
description: "This is a fuller YAML specification"
definition:
union:
- intersection:
- method: source
value: snowplow
childrens_parents: true
- method: tag
value: nightly
- method: path
value: models/export
- exclude:
- intersection:
- method: package
value: snowplow
- method: config.materialized
value: incremental
- method: fqn
value: export_performance_timing
''')

sel_dict = SelectorDict.parse_from_selectors_list(dct['selectors'])
assert(sel_dict)
with_strings = sel_dict['nightly_diet_snowplow']['definition']
no_strings = sel_dict['nightly_diet_snowplow_full']['definition']
self.assertEqual(with_strings, no_strings)

def test_single_string_definition(self):
dct = get_selector_dict('''\
selectors:
- name: nightly_selector
definition:
'tag:nightly'
''')

sel_dict = SelectorDict.parse_from_selectors_list(dct['selectors'])
assert(sel_dict)
expected = {'method': 'tag', 'value': 'nightly'}
definition = sel_dict['nightly_selector']['definition']
self.assertEqual(expected, definition)


def test_single_key_value_definition(self):
dct = get_selector_dict('''\
selectors:
- name: nightly_selector
definition:
tag: nightly
''')

sel_dict = SelectorDict.parse_from_selectors_list(dct['selectors'])
assert(sel_dict)
expected = {'method': 'tag', 'value': 'nightly'}
definition = sel_dict['nightly_selector']['definition']
self.assertEqual(expected, definition)

def test_parent_definition(self):
dct = get_selector_dict('''\
selectors:
- name: kpi_nightly_selector
definition:
'+exposure:kpi_nightly'
''')

sel_dict = SelectorDict.parse_from_selectors_list(dct['selectors'])
assert(sel_dict)
expected = {'method': 'exposure', 'value': 'kpi_nightly', 'parents': True}
definition = sel_dict['kpi_nightly_selector']['definition']
self.assertEqual(expected, definition)

def test_plus_definition(self):
dct = get_selector_dict('''\
selectors:
- name: my_model_children_selector
definition:
'my_model+2'
''')

sel_dict = SelectorDict.parse_from_selectors_list(dct['selectors'])
assert(sel_dict)
expected = {'method': 'fqn', 'value': 'my_model', 'children': True, 'children_depth': '2'}
definition = sel_dict['my_model_children_selector']['definition']
self.assertEqual(expected, definition)

0 comments on commit a8765d5

Please sign in to comment.