From 49b80aa1249a7d734127af59bbadfac54e91b470 Mon Sep 17 00:00:00 2001 From: Tobie Tusing Date: Fri, 9 Aug 2024 14:38:15 -0700 Subject: [PATCH] Improve tree traversal of select_children (#10526) * update children search * update search to include children in original selector * add changie * remove unused function * fix wrong function call * fix depth --- .../Under the Hood-20240809-130234.yaml | 6 +++ core/dbt/graph/graph.py | 38 +++++++++++++++---- 2 files changed, 36 insertions(+), 8 deletions(-) create mode 100644 .changes/unreleased/Under the Hood-20240809-130234.yaml diff --git a/.changes/unreleased/Under the Hood-20240809-130234.yaml b/.changes/unreleased/Under the Hood-20240809-130234.yaml new file mode 100644 index 00000000000..964dd2fedf2 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20240809-130234.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Improve speed of tree traversal when finding children, increasing build speed for some selectors +time: 2024-08-09T13:02:34.759905-07:00 +custom: + Author: ttusing + Issue: "10434" diff --git a/core/dbt/graph/graph.py b/core/dbt/graph/graph.py index 122e2f4d29a..9f108663dcd 100644 --- a/core/dbt/graph/graph.py +++ b/core/dbt/graph/graph.py @@ -58,18 +58,40 @@ def select_childrens_parents(self, selected: Set[UniqueId]) -> Set[UniqueId]: def select_children( self, selected: Set[UniqueId], max_depth: Optional[int] = None ) -> Set[UniqueId]: - descendants: Set[UniqueId] = set() - for node in selected: - descendants.update(self.descendants(node, max_depth)) - return descendants + """Returns all nodes which are descendants of the 'selected' set. + Nodes in the 'selected' set are counted as children only if + they are descendants of other nodes in the 'selected' set.""" + children: Set[UniqueId] = set() + i = 0 + while len(selected) > 0 and (max_depth is None or i < max_depth): + next_layer: Set[UniqueId] = set() + for node in selected: + next_layer.update(self.descendants(node, 1)) + next_layer = next_layer - children # Avoid re-searching + children.update(next_layer) + selected = next_layer + i += 1 + + return children def select_parents( self, selected: Set[UniqueId], max_depth: Optional[int] = None ) -> Set[UniqueId]: - ancestors: Set[UniqueId] = set() - for node in selected: - ancestors.update(self.ancestors(node, max_depth)) - return ancestors + """Returns all nodes which are ancestors of the 'selected' set. + Nodes in the 'selected' set are counted as parents only if + they are ancestors of other nodes in the 'selected' set.""" + parents: Set[UniqueId] = set() + i = 0 + while len(selected) > 0 and (max_depth is None or i < max_depth): + next_layer: Set[UniqueId] = set() + for node in selected: + next_layer.update(self.ancestors(node, 1)) + next_layer = next_layer - parents # Avoid re-searching + parents.update(next_layer) + selected = next_layer + i += 1 + + return parents def select_successors(self, selected: Set[UniqueId]) -> Set[UniqueId]: successors: Set[UniqueId] = set()