Skip to content

Commit

Permalink
feat: Implement the exclude_resource_type filter (#1580)
Browse files Browse the repository at this point in the history
Add the `exclude_resource_type` selector 

Closes: #1576
  • Loading branch information
AlexandrKhabarov authored Mar 3, 2025
1 parent ddea39c commit c84cb1e
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 2 deletions.
31 changes: 30 additions & 1 deletion cosmos/dbt/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
CONFIG_SELECTOR = "config."
SOURCE_SELECTOR = "source:"
RESOURCE_TYPE_SELECTOR = "resource_type:"
EXCLUDE_RESOURCE_TYPE_SELECTOR = "exclude_resource_type:"
PLUS_SELECTOR = "+"
AT_SELECTOR = "@"
GRAPH_SELECTOR_REGEX = r"^(@|[0-9]*\+)?([^\+]+)(\+[0-9]*)?$|"
Expand All @@ -45,6 +46,7 @@ class GraphSelector:
+config.materialized:view
resource_type:resource_name
source:source_name
exclude_resource_type:resource_name
https://docs.getdbt.com/reference/node-selection/graph-operators
"""
Expand Down Expand Up @@ -290,6 +292,7 @@ def __init__(self, project_dir: Path | None, statement: str):
self.graph_selectors: list[GraphSelector] = []
self.sources: list[str] = []
self.resource_types: list[str] = []
self.exclude_resource_types: list[str] = []
self.load_from_statement(statement)

@property
Expand All @@ -302,6 +305,7 @@ def is_empty(self) -> bool:
or self.other
or self.sources
or self.resource_types
or self.exclude_resource_types
)

def load_from_statement(self, statement: str) -> None:
Expand Down Expand Up @@ -341,6 +345,8 @@ def _handle_no_precursors_or_descendants(self, item: str, node_name: str) -> Non
self._parse_source_selector(item)
elif node_name.startswith(RESOURCE_TYPE_SELECTOR):
self._parse_resource_type_selector(item)
elif node_name.startswith(EXCLUDE_RESOURCE_TYPE_SELECTOR):
self._parse_exclude_resource_type_selector(item)
else:
self._parse_unknown_selector(item)

Expand Down Expand Up @@ -375,13 +381,28 @@ def _parse_resource_type_selector(self, item: str) -> None:
resource_type_value = item[index:].strip()
self.resource_types.append(resource_type_value)

def _parse_exclude_resource_type_selector(self, item: str) -> None:
index = len(EXCLUDE_RESOURCE_TYPE_SELECTOR)
resource_type_value = item[index:].strip()
self.exclude_resource_types.append(resource_type_value)

def _parse_source_selector(self, item: str) -> None:
index = len(SOURCE_SELECTOR)
source_name = item[index:].strip()
self.sources.append(source_name)

def __repr__(self) -> str:
return f"SelectorConfig(paths={self.paths}, tags={self.tags}, config={self.config}, sources={self.sources}, resource={self.resource_types}, other={self.other}, graph_selectors={self.graph_selectors})"
return (
"SelectorConfig("
+ f"paths={self.paths}, "
+ f"tags={self.tags}, "
+ f"config={self.config}, "
+ f"sources={self.sources}, "
+ f"resource={self.resource_types}, "
+ f"exclude_resource={self.exclude_resource_types}, "
+ f"other={self.other}, "
+ f"graph_selectors={self.graph_selectors})"
)


class NodeSelector:
Expand Down Expand Up @@ -474,6 +495,9 @@ def _should_include_node(self, node_id: str, node: DbtNode) -> bool:
if self.config.resource_types and not self._is_resource_type_matching(node):
return False

if self.config.exclude_resource_types and self._is_exclude_resource_type_matching(node):
return False

if self.config.sources and not self._is_source_matching(node):
return False

Expand All @@ -485,6 +509,10 @@ def _is_resource_type_matching(self, node: DbtNode) -> bool:
return False
return True

def _is_exclude_resource_type_matching(self, node: DbtNode) -> bool:
"""Checks if the node's resource type is a subset of the config's exclude resource type."""
return node.resource_type.value in self.config.exclude_resource_types

def _is_source_matching(self, node: DbtNode) -> bool:
"""Checks if the node's source is a subset of the config's source."""
if node.resource_type != DbtResourceType.SOURCE:
Expand Down Expand Up @@ -619,6 +647,7 @@ def validate_filters(exclude: list[str], select: list[str]) -> None:
filter_parameter.startswith(PATH_SELECTOR)
or filter_parameter.startswith(TAG_SELECTOR)
or filter_parameter.startswith(RESOURCE_TYPE_SELECTOR)
or filter_parameter.startswith(EXCLUDE_RESOURCE_TYPE_SELECTOR)
or filter_parameter.startswith(SOURCE_SELECTOR)
or PLUS_SELECTOR in filter_parameter
or any([filter_parameter.startswith(CONFIG_SELECTOR + config + ":") for config in SUPPORTED_CONFIG])
Expand Down
3 changes: 2 additions & 1 deletion docs/configuration/selecting-excluding.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ The ``select`` and ``exclude`` parameters are lists, with values like the follow
- ``@node_name`` (@ operator): include/exclude the node with name ``node_name``, all its descendants, and all ancestors of those descendants. This is useful in CI environments where you want to build a model and all its descendants, but you need the ancestors of those descendants to exist first.
- ``tag:my_tag,+node_name`` (intersection): include/exclude ``node_name`` and its parents if they have the tag ``my_tag`` (`dbt set operator docs <https://docs.getdbt.com/reference/node-selection/set-operators>`_)
- ``['tag:first_tag', 'tag:second_tag']`` (union): include/exclude nodes that have either ``tag:first_tag`` or ``tag:second_tag``
- ``resource_type:<resource>``: include/exclude nodes with the resource type ``seed, snapshots, model, test, source``. For example, ``resource_type:source`` returns only nodes where resource_type == SOURCE
- ``resource_type:<resource>``: include nodes with the resource type ``seed, snapshots, model, test, source``. For example, ``resource_type:source`` returns only nodes where resource_type == SOURCE
- ``exclude_resource_type:<resource>``: exclude nodes with the resource type ``analysis, exposure, metric, model, saved_query, seed, semantic_model, snapshot, source, test, unit_test``. For example, ``exclude_resource_type:source`` returns only nodes where resource_type != SOURCE
- ``source:my_source``: include/exclude nodes that have the source ``my_source`` and are of resource_type ``source``
- ``source:my_source+``: include/exclude nodes that have the source ``my_source`` and their children
- ``source:my_source.my_table``: include/exclude nodes that have the source ``my_source`` and the table ``my_table``
Expand Down
59 changes: 59 additions & 0 deletions tests/dbt/test_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,44 @@ def test_select_nodes_by_resource_type_source():
assert selected == expected


def test_select_nodes_by_exclude_resource_type_model():
"""
Test that 'exclude_resource_type:model' picks up only nodes with resource_type != MODEL,
including any resources except models.
"""
local_nodes = dict(sample_nodes)
source_node = DbtNode(
unique_id=f"{DbtResourceType.SOURCE.value}.{SAMPLE_PROJ_PATH.stem}.my_source.my_table",
resource_type=DbtResourceType.SOURCE,
depends_on=[],
file_path=SAMPLE_PROJ_PATH / "sources/my_source.yml",
tags=[],
config={},
)

local_nodes[source_node.unique_id] = source_node
model_node = DbtNode(
unique_id=f"{DbtResourceType.MODEL.value}.{SAMPLE_PROJ_PATH.stem}.model_from_source",
resource_type=DbtResourceType.MODEL,
depends_on=[source_node.unique_id],
file_path=SAMPLE_PROJ_PATH / "models/model_from_source.sql",
tags=["depends_on_source"],
config={"materialized": "table", "tags": ["depends_on_source"]},
)

local_nodes[model_node.unique_id] = model_node
selected = select_nodes(
project_dir=SAMPLE_PROJ_PATH,
nodes=local_nodes,
select=["exclude_resource_type:model"],
)

assert source_node.unique_id in selected
assert model_node.unique_id not in selected
for model_id in sample_nodes.keys():
assert model_id not in selected


def test_select_nodes_by_source_name():
"""
Test selecting a single source node by exact name 'source:my_source.my_table'.
Expand Down Expand Up @@ -849,6 +887,27 @@ def test_exclude_nodes_by_resource_type_seed():
assert model_id in selected


def test_exclude_nodes_by_exclude_resource_type_seed():
"""
Test keeping any seed node via 'exclude_resource_type:seed'.
"""
local_nodes = dict(sample_nodes)
seed_node = DbtNode(
unique_id=f"{DbtResourceType.SEED.value}.{SAMPLE_PROJ_PATH.stem}.my_seed",
resource_type=DbtResourceType.SEED,
depends_on=[],
tags=[],
config={},
file_path=SAMPLE_PROJ_PATH / "models/my_seed.yml",
)

local_nodes[seed_node.unique_id] = seed_node
selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=local_nodes, exclude=["exclude_resource_type:seed"])
assert seed_node.unique_id in selected
for model_id in sample_nodes.keys():
assert model_id not in selected


def test_source_selector():
"""
Covers:
Expand Down

0 comments on commit c84cb1e

Please sign in to comment.