From 8f41453f7bbed9af7d24d229880207eef36ac301 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Thu, 28 Oct 2021 11:44:41 +0200 Subject: [PATCH] Use enum for IndirectSelection --- core/dbt/flags.py | 6 +++--- core/dbt/graph/cli.py | 15 ++++++++------- core/dbt/graph/selector.py | 17 ++++++++++++----- core/dbt/graph/selector_spec.py | 20 +++++++++++++------- core/dbt/main.py | 1 + 5 files changed, 37 insertions(+), 22 deletions(-) diff --git a/core/dbt/flags.py b/core/dbt/flags.py index a05c7d0ecd8..011ca434a67 100644 --- a/core/dbt/flags.py +++ b/core/dbt/flags.py @@ -17,7 +17,7 @@ STRICT_MODE = False # Only here for backwards compatibility FULL_REFRESH = False # subcommand STORE_FAILURES = False # subcommand -EAGER_INDIRECT_SELECTION = True # subcommand +INDIRECT_SELECTION = 'eager' # subcommand # Global CLI commands USE_EXPERIMENTAL_PARSER = None @@ -95,7 +95,7 @@ def _get_context(): def set_from_args(args, user_config): global STRICT_MODE, FULL_REFRESH, WARN_ERROR, \ USE_EXPERIMENTAL_PARSER, STATIC_PARSER, WRITE_JSON, PARTIAL_PARSE, \ - USE_COLORS, STORE_FAILURES, PROFILES_DIR, DEBUG, LOG_FORMAT, EAGER_INDIRECT_SELECTION, \ + USE_COLORS, STORE_FAILURES, PROFILES_DIR, DEBUG, LOG_FORMAT, INDIRECT_SELECTION, \ VERSION_CHECK, FAIL_FAST, SEND_ANONYMOUS_USAGE_STATS, PRINTER_WIDTH, \ WHICH @@ -103,7 +103,7 @@ def set_from_args(args, user_config): # cli args without user_config or env var option FULL_REFRESH = getattr(args, 'full_refresh', FULL_REFRESH) STORE_FAILURES = getattr(args, 'store_failures', STORE_FAILURES) - EAGER_INDIRECT_SELECTION = getattr(args, 'indirect_selection', 'eager') != 'cautious' + INDIRECT_SELECTION = getattr(args, 'indirect_selection', INDIRECT_SELECTION) WHICH = getattr(args, 'which', WHICH) # global cli flags with env var and user_config alternatives diff --git a/core/dbt/graph/cli.py b/core/dbt/graph/cli.py index 9ac3c011c84..a0e9e980c9d 100644 --- a/core/dbt/graph/cli.py +++ b/core/dbt/graph/cli.py @@ -16,6 +16,7 @@ SelectionIntersection, SelectionDifference, SelectionCriteria, + IndirectSelection ) INTERSECTION_DELIMITER = ',' @@ -25,7 +26,7 @@ def parse_union( - components: List[str], expect_exists: bool, eagerly_expand: bool = True + components: List[str], expect_exists: bool, indirect_selection: IndirectSelection = IndirectSelection.Eager ) -> SelectionUnion: # turn ['a b', 'c'] -> ['a', 'b', 'c'] raw_specs = itertools.chain.from_iterable( @@ -36,7 +37,7 @@ def parse_union( # ['a', 'b', 'c,d'] -> union('a', 'b', intersection('c', 'd')) for raw_spec in raw_specs: intersection_components: List[SelectionSpec] = [ - SelectionCriteria.from_single_spec(part, eagerly_expand=eagerly_expand) + SelectionCriteria.from_single_spec(part, indirect_selection=indirect_selection) for part in raw_spec.split(INTERSECTION_DELIMITER) ] union_components.append(SelectionIntersection( @@ -52,14 +53,14 @@ def parse_union( def parse_union_from_default( - raw: Optional[List[str]], default: List[str], eagerly_expand: bool = True + raw: Optional[List[str]], default: List[str], indirect_selection: IndirectSelection = IndirectSelection.Eager ) -> SelectionUnion: components: List[str] expect_exists: bool if raw is None: - return parse_union(components=default, expect_exists=False, eagerly_expand=eagerly_expand) + return parse_union(components=default, expect_exists=False, indirect_selection=indirect_selection) else: - return parse_union(components=raw, expect_exists=True, eagerly_expand=eagerly_expand) + return parse_union(components=raw, expect_exists=True, indirect_selection=indirect_selection) def parse_difference( @@ -68,9 +69,9 @@ def parse_difference( included = parse_union_from_default( include, DEFAULT_INCLUDES, - eagerly_expand=flags.EAGER_INDIRECT_SELECTION + indirect_selection=IndirectSelection(flags.INDIRECT_SELECTION) ) - excluded = parse_union_from_default(exclude, DEFAULT_EXCLUDES, eagerly_expand=True) + excluded = parse_union_from_default(exclude, DEFAULT_EXCLUDES, indirect_selection=IndirectSelection.Eager) return SelectionDifference(components=[included, excluded]) diff --git a/core/dbt/graph/selector.py b/core/dbt/graph/selector.py index 369f07f107b..69e24e9e99b 100644 --- a/core/dbt/graph/selector.py +++ b/core/dbt/graph/selector.py @@ -3,7 +3,7 @@ from .graph import Graph, UniqueId from .queue import GraphQueue from .selector_methods import MethodManager -from .selector_spec import SelectionCriteria, SelectionSpec +from .selector_spec import SelectionCriteria, SelectionSpec, IndirectSelection from dbt.logger import GLOBAL_LOGGER as logger from dbt.node_types import NodeType @@ -95,7 +95,7 @@ def get_nodes_from_criteria( neighbors = self.collect_specified_neighbors(spec, collected) direct_nodes, indirect_nodes = self.expand_selection( selected=(collected | neighbors), - eagerly_expand=spec.eagerly_expand + indirect_selection=spec.indirect_selection ) return direct_nodes, indirect_nodes @@ -199,7 +199,7 @@ def filter_selection(self, selected: Set[UniqueId]) -> Set[UniqueId]: } def expand_selection( - self, selected: Set[UniqueId], eagerly_expand: bool = True + self, selected: Set[UniqueId], indirect_selection: IndirectSelection = IndirectSelection.Eager ) -> Tuple[Set[UniqueId], Set[UniqueId]]: # Test selection by default expands to include an implicitly/indirectly selected tests. # `dbt test -m model_a` also includes tests that directly depend on `model_a`. @@ -212,7 +212,7 @@ def expand_selection( # - If ANY parent is missing, return it separately. We'll keep it around # for later and see if its other parents show up. # Users can opt out of inclusive EAGER mode by passing --indirect-selection cautious - # CLI argument or by specifying `eagerly_expand: true` in a yaml selector + # CLI argument or by specifying `indirect_selection: true` in a yaml selector direct_nodes = set(selected) indirect_nodes = set() @@ -222,7 +222,10 @@ def expand_selection( node = self.manifest.nodes[unique_id] if can_select_indirectly(node): # should we add it in directly? - if eagerly_expand or set(node.depends_on.nodes) <= set(selected): + if ( + indirect_selection == IndirectSelection.Eager + or set(node.depends_on.nodes) <= set(selected) + ): direct_nodes.add(unique_id) # if not: else: @@ -236,6 +239,10 @@ def incorporate_indirect_nodes( # Check tests previously selected indirectly to see if ALL their # parents are now present. + # performance: if identical, skip the processing below + if set(direct_nodes) == set(indirect_nodes): + return direct_nodes + selected = set(direct_nodes) for unique_id in indirect_nodes: diff --git a/core/dbt/graph/selector_spec.py b/core/dbt/graph/selector_spec.py index 31e6a71c621..58b53afb002 100644 --- a/core/dbt/graph/selector_spec.py +++ b/core/dbt/graph/selector_spec.py @@ -1,7 +1,9 @@ import os import re +import enum from abc import ABCMeta, abstractmethod from dataclasses import dataclass +from dbt.dataclass_schema import StrEnum from typing import ( Set, Iterator, List, Optional, Dict, Union, Any, Iterable, Tuple @@ -21,6 +23,10 @@ ) SELECTOR_METHOD_SEPARATOR = '.' +class IndirectSelection(StrEnum): + Eager = 'eager' + Cautious = 'cautious' + def _probably_path(value: str): """Decide if value is probably a path. Windows has two path separators, so @@ -66,7 +72,7 @@ class SelectionCriteria: parents_depth: Optional[int] children: bool children_depth: Optional[int] - eagerly_expand: bool = True + indirect_selection: IndirectSelection = IndirectSelection.Eager def __post_init__(self): if self.children and self.childrens_parents: @@ -104,7 +110,7 @@ def parse_method( @classmethod def selection_criteria_from_dict( - cls, raw: Any, dct: Dict[str, Any], eagerly_expand: bool = True + cls, raw: Any, dct: Dict[str, Any], indirect_selection: IndirectSelection = IndirectSelection.Eager ) -> 'SelectionCriteria': if 'value' not in dct: raise RuntimeException( @@ -124,7 +130,7 @@ def selection_criteria_from_dict( parents_depth=parents_depth, children=bool(dct.get('children')), children_depth=children_depth, - eagerly_expand=(eagerly_expand or bool(dct.get('eagerly_expand'))), + indirect_selection=(IndirectSelection(dct.get('indirect_selection') or indirect_selection)), ) @classmethod @@ -145,12 +151,12 @@ def dict_from_single_spec(cls, raw: str): dct['parents'] = bool(dct.get('parents')) if 'children' in dct: dct['children'] = bool(dct.get('children')) - if 'eagerly_expand' in dct: - dct['eagerly_expand'] = bool(dct.get('eagerly_expand')) + if 'indirect_selection' in dct: + dct['indirect_selection'] = bool(dct.get('indirect_selection')) return dct @classmethod - def from_single_spec(cls, raw: str, eagerly_expand: bool = True) -> 'SelectionCriteria': + def from_single_spec(cls, raw: str, indirect_selection: bool = True) -> 'SelectionCriteria': result = RAW_SELECTOR_PATTERN.match(raw) if result is None: # bad spec! @@ -159,7 +165,7 @@ def from_single_spec(cls, raw: str, eagerly_expand: bool = True) -> 'SelectionCr return cls.selection_criteria_from_dict( raw, result.groupdict(), - eagerly_expand=eagerly_expand + indirect_selection=indirect_selection ) diff --git a/core/dbt/main.py b/core/dbt/main.py index 43c75b10eb6..43a0547dc52 100644 --- a/core/dbt/main.py +++ b/core/dbt/main.py @@ -740,6 +740,7 @@ def _build_test_subparser(subparsers, base_subparser): sub.add_argument( '--indirect-selection', choices=['eager', 'cautious'], + default='eager', dest='indirect_selection', help=''' Select all tests that are adjacent to selected resources,