From 49972d0fe648a2cca7edb73666a72287364fff20 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 24 Oct 2023 11:41:35 -0400 Subject: [PATCH 01/10] test --- datatree/tests/test_datatree.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/datatree/tests/test_datatree.py b/datatree/tests/test_datatree.py index 56ca1cbf..61ec27e0 100644 --- a/datatree/tests/test_datatree.py +++ b/datatree/tests/test_datatree.py @@ -678,6 +678,25 @@ def f(x, tree, y): class TestSubset: + def test_match(self): + # TODO is this example going to cause problems with case sensitivity? + dt = DataTree.from_dict( + { + "/a/A": None, + "/a/B": None, + "/b/A": None, + "/a/B": None, + } + ) + result = dt.match("*/B") + expected = DataTree.from_dict( + { + "/a/B": None, + "/a/B": None, + } + ) + dtt.assert_identical(result, expected) + def test_filter(self): simpsons = DataTree.from_dict( d={ From 32de35edf128f50287fd14c30e7631d31712a082 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 24 Oct 2023 11:41:47 -0400 Subject: [PATCH 02/10] implementation --- datatree/datatree.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/datatree/datatree.py b/datatree/datatree.py index b6bf8ac0..6a06a642 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -1177,6 +1177,7 @@ def filter(self: DataTree, filterfunc: Callable[[DataTree], bool]) -> DataTree: See Also -------- + search pipe map_over_subtree """ @@ -1185,6 +1186,28 @@ def filter(self: DataTree, filterfunc: Callable[[DataTree], bool]) -> DataTree: } return DataTree.from_dict(filtered_nodes, name=self.root.name) + def match(self, pattern: str) -> DataTree: + """ + Return nodes with paths matching pattern. + + Uses unix glob-like syntax for pattern-matching. + + Parameters + ---------- + pattern: str + A pattern to match each node path against. + + See Also + -------- + filter + pipe + map_over_subtree + """ + matching_nodes = { + node.path: node.ds for node in self.subtree if NodePath(node.path).match(pattern) + } + return DataTree.from_dict(matching_nodes, name=self.root.name) + def map_over_subtree( self, func: Callable, From a62b562b1d4c6393099f2288ed86e9591fdc6ae0 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 24 Oct 2023 11:42:06 -0400 Subject: [PATCH 03/10] documentation --- docs/source/hierarchical-data.rst | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/docs/source/hierarchical-data.rst b/docs/source/hierarchical-data.rst index 7795c9e2..7cb6f5cc 100644 --- a/docs/source/hierarchical-data.rst +++ b/docs/source/hierarchical-data.rst @@ -379,7 +379,23 @@ Subsetting Tree Nodes We can subset our tree to select only nodes of interest in various ways. -The :py:meth:`DataTree.filter` method can be used to retain only the nodes of a tree that meet a certain condition. +Similarly to on a real filesystem, matching nodes by common patterns in their paths is often useful. +We can use :py:meth:`DataTree.match` for this: + +.. ipython:: python + + dt = DataTree.from_dict( + { + "/a/A": None, + "/a/B": None, + "/b/A": None, + "/a/B": None, + } + ) + result = dt.match("*/B") + +We can also subset trees by the contents of the nodes. +:py:meth:`DataTree.filter` retains only the nodes of a tree that meet a certain condition. For example, we could recreate the Simpson's family tree with the ages of each individual, then filter for only the adults: First lets recreate the tree but with an `age` data variable in every node: From 6dde34ee6733bde79b2080f8c52a84b3f14ede03 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 Oct 2023 15:43:46 +0000 Subject: [PATCH 04/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- datatree/datatree.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index 6a06a642..26b33044 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -1204,7 +1204,9 @@ def match(self, pattern: str) -> DataTree: map_over_subtree """ matching_nodes = { - node.path: node.ds for node in self.subtree if NodePath(node.path).match(pattern) + node.path: node.ds + for node in self.subtree + if NodePath(node.path).match(pattern) } return DataTree.from_dict(matching_nodes, name=self.root.name) From caf55944cd562042aff1c6873d7fce67a4a5f57a Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 24 Oct 2023 11:44:00 -0400 Subject: [PATCH 05/10] whatsnew --- docs/source/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/whats-new.rst b/docs/source/whats-new.rst index 5d70f914..5b417043 100644 --- a/docs/source/whats-new.rst +++ b/docs/source/whats-new.rst @@ -23,6 +23,8 @@ v0.0.13 (unreleased) New Features ~~~~~~~~~~~~ +- New :py:meth:`DataTree.match` method for glob-like pattern matching of node paths. (:pull:`267`) + By `Tom Nicholas `_. - Indicate which node caused the problem if error encountered while applying user function using :py:func:`map_over_subtree` (:issue:`190`, :pull:`264`). Only works when using python 3.11 or later. By `Tom Nicholas `_. From 8f63e3afa59c6c932492cbbb9602fbf0b7b4c378 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 24 Oct 2023 11:45:06 -0400 Subject: [PATCH 06/10] API --- docs/source/api.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/api.rst b/docs/source/api.rst index 9a34bdd0..54a98639 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -102,6 +102,7 @@ For manipulating, traversing, navigating, or mapping over the tree structure. DataTree.find_common_ancestor map_over_subtree DataTree.pipe + DataTree.match DataTree.filter DataTree Contents From 85c25031c77e9974d1076ae7b34f8839bfde5036 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 24 Oct 2023 11:51:46 -0400 Subject: [PATCH 07/10] correct faulty test --- datatree/datatree.py | 26 ++++++++++++++++++++++++++ datatree/tests/test_datatree.py | 4 ++-- docs/source/hierarchical-data.rst | 2 +- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index 26b33044..9dc0d5cf 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -1175,6 +1175,10 @@ def filter(self: DataTree, filterfunc: Callable[[DataTree], bool]) -> DataTree: filterfunc: function A function which accepts only one DataTree - the node on which filterfunc will be called. + Returns + ------- + DataTree + See Also -------- search @@ -1197,11 +1201,33 @@ def match(self, pattern: str) -> DataTree: pattern: str A pattern to match each node path against. + Returns + ------- + DataTree + See Also -------- filter pipe map_over_subtree + + Examples + -------- + + >>> dt = DataTree.from_dict( + >>> { + >>> "/a/A": None, + >>> "/a/B": None, + >>> "/b/A": None, + >>> "/b/B": None, + >>> } + >>> ) + >>> dt.match("*/B") + DataTree('None', parent=None) + ├── DataTree('a') + │ └── DataTree('B') + └── DataTree('b') + └── DataTree('B') """ matching_nodes = { node.path: node.ds diff --git a/datatree/tests/test_datatree.py b/datatree/tests/test_datatree.py index 61ec27e0..9dd601c8 100644 --- a/datatree/tests/test_datatree.py +++ b/datatree/tests/test_datatree.py @@ -685,14 +685,14 @@ def test_match(self): "/a/A": None, "/a/B": None, "/b/A": None, - "/a/B": None, + "/b/B": None, } ) result = dt.match("*/B") expected = DataTree.from_dict( { "/a/B": None, - "/a/B": None, + "/b/B": None, } ) dtt.assert_identical(result, expected) diff --git a/docs/source/hierarchical-data.rst b/docs/source/hierarchical-data.rst index 7cb6f5cc..f74a635d 100644 --- a/docs/source/hierarchical-data.rst +++ b/docs/source/hierarchical-data.rst @@ -389,7 +389,7 @@ We can use :py:meth:`DataTree.match` for this: "/a/A": None, "/a/B": None, "/b/A": None, - "/a/B": None, + "/b/B": None, } ) result = dt.match("*/B") From f8bd1b7ea2d74f3462ac5e91d197d8db311d5abc Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 24 Oct 2023 11:54:53 -0400 Subject: [PATCH 08/10] remove newline --- datatree/datatree.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index 9dc0d5cf..d0cd0fb2 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -1213,7 +1213,6 @@ def match(self, pattern: str) -> DataTree: Examples -------- - >>> dt = DataTree.from_dict( >>> { >>> "/a/A": None, From 29773cf874a441282b1292d046e809839ebed1d0 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 24 Oct 2023 11:55:42 -0400 Subject: [PATCH 09/10] search-> match --- datatree/datatree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index d0cd0fb2..9c87a73e 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -1181,7 +1181,7 @@ def filter(self: DataTree, filterfunc: Callable[[DataTree], bool]) -> DataTree: See Also -------- - search + match pipe map_over_subtree """ From 767c1a7b3251774617e4d320a2aca9bdf9e6e29f Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 24 Oct 2023 13:32:44 -0400 Subject: [PATCH 10/10] format continuation lines correctly --- datatree/datatree.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index 9c87a73e..9f188a52 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -1214,13 +1214,13 @@ def match(self, pattern: str) -> DataTree: Examples -------- >>> dt = DataTree.from_dict( - >>> { - >>> "/a/A": None, - >>> "/a/B": None, - >>> "/b/A": None, - >>> "/b/B": None, - >>> } - >>> ) + ... { + ... "/a/A": None, + ... "/a/B": None, + ... "/b/A": None, + ... "/b/B": None, + ... } + ... ) >>> dt.match("*/B") DataTree('None', parent=None) ├── DataTree('a')