diff --git a/datatree/datatree.py b/datatree/datatree.py index f858cea..f2618d2 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -1242,8 +1242,13 @@ def filter(self: DataTree, filterfunc: Callable[[DataTree], bool]) -> DataTree: filterfunc: function A function which accepts only one DataTree - the node on which filterfunc will be called. + Returns + ------- + DataTree + See Also -------- + match pipe map_over_subtree """ @@ -1252,6 +1257,51 @@ def filter(self: DataTree, filterfunc: Callable[[DataTree], bool]) -> DataTree: } return DataTree.from_dict(filtered_nodes, name=self.root.name) + def match(self, pattern: str) -> DataTree: + """ + Return nodes with paths matching pattern. + + Uses unix glob-like syntax for pattern-matching. + + Parameters + ---------- + pattern: str + A pattern to match each node path against. + + Returns + ------- + DataTree + + See Also + -------- + filter + pipe + map_over_subtree + + Examples + -------- + >>> dt = DataTree.from_dict( + ... { + ... "/a/A": None, + ... "/a/B": None, + ... "/b/A": None, + ... "/b/B": None, + ... } + ... ) + >>> dt.match("*/B") + DataTree('None', parent=None) + ├── DataTree('a') + │ └── DataTree('B') + └── DataTree('b') + └── DataTree('B') + """ + matching_nodes = { + node.path: node.ds + for node in self.subtree + if NodePath(node.path).match(pattern) + } + return DataTree.from_dict(matching_nodes, name=self.root.name) + def map_over_subtree( self, func: Callable, diff --git a/datatree/tests/test_datatree.py b/datatree/tests/test_datatree.py index 726925f..26fd0e5 100644 --- a/datatree/tests/test_datatree.py +++ b/datatree/tests/test_datatree.py @@ -678,6 +678,25 @@ def f(x, tree, y): class TestSubset: + def test_match(self): + # TODO is this example going to cause problems with case sensitivity? + dt = DataTree.from_dict( + { + "/a/A": None, + "/a/B": None, + "/b/A": None, + "/b/B": None, + } + ) + result = dt.match("*/B") + expected = DataTree.from_dict( + { + "/a/B": None, + "/b/B": None, + } + ) + dtt.assert_identical(result, expected) + def test_filter(self): simpsons = DataTree.from_dict( d={ diff --git a/docs/source/api.rst b/docs/source/api.rst index 9a34bdd..54a9863 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -102,6 +102,7 @@ For manipulating, traversing, navigating, or mapping over the tree structure. DataTree.find_common_ancestor map_over_subtree DataTree.pipe + DataTree.match DataTree.filter DataTree Contents diff --git a/docs/source/hierarchical-data.rst b/docs/source/hierarchical-data.rst index 7795c9e..f74a635 100644 --- a/docs/source/hierarchical-data.rst +++ b/docs/source/hierarchical-data.rst @@ -379,7 +379,23 @@ Subsetting Tree Nodes We can subset our tree to select only nodes of interest in various ways. -The :py:meth:`DataTree.filter` method can be used to retain only the nodes of a tree that meet a certain condition. +Similarly to on a real filesystem, matching nodes by common patterns in their paths is often useful. +We can use :py:meth:`DataTree.match` for this: + +.. ipython:: python + + dt = DataTree.from_dict( + { + "/a/A": None, + "/a/B": None, + "/b/A": None, + "/b/B": None, + } + ) + result = dt.match("*/B") + +We can also subset trees by the contents of the nodes. +:py:meth:`DataTree.filter` retains only the nodes of a tree that meet a certain condition. For example, we could recreate the Simpson's family tree with the ages of each individual, then filter for only the adults: First lets recreate the tree but with an `age` data variable in every node: diff --git a/docs/source/whats-new.rst b/docs/source/whats-new.rst index 4af1691..eb2c034 100644 --- a/docs/source/whats-new.rst +++ b/docs/source/whats-new.rst @@ -23,6 +23,8 @@ v0.0.13 (unreleased) New Features ~~~~~~~~~~~~ +- New :py:meth:`DataTree.match` method for glob-like pattern matching of node paths. (:pull:`267`) + By `Tom Nicholas `_. - Indicate which node caused the problem if error encountered while applying user function using :py:func:`map_over_subtree` (:issue:`190`, :pull:`264`). Only works when using python 3.11 or later. By `Tom Nicholas `_.