From 8bd456cdb5a873684ed18e77fe4f6983738084db Mon Sep 17 00:00:00 2001 From: Etienne Schalk <45271239+etienneschalk@users.noreply.github.com> Date: Thu, 12 Sep 2024 22:48:24 +0200 Subject: [PATCH] Make illegal path-like variable names when constructing a DataTree from a Dataset (#9378) * Make illegal path-like variable names when constructing a DataTree from a Dataset * Updated whats-new.rst * PR comments * Revert diff * Update xarray/core/datatree.py Co-authored-by: Tom Nicholas * Update xarray/core/datatree.py Co-authored-by: Tom Nicholas * Update xarray/tests/test_datatree.py Co-authored-by: Tom Nicholas * Update expected Exception message in test * Merge changes from #9476 * Fix --------- Co-authored-by: Tom Nicholas --- doc/whats-new.rst | 3 +++ xarray/core/datatree.py | 13 +++++++++++++ xarray/tests/test_datatree.py | 17 +++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9e436b69579..f9522a5db91 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -100,6 +100,9 @@ Breaking changes Bug fixes ~~~~~~~~~ +- Make illegal path-like variable names when constructing a DataTree from a Dataset + (:issue:`9339`, :pull:`9378`) + By `Etienne Schalk `_. - Fix bug with rechunking to a frequency when some periods contain no data (:issue:`9360`). By `Deepak Cherian `_. - Fix bug causing `DataTree.from_dict` to be sensitive to insertion order (:issue:`9276`, :pull:`9292`). diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 5715dca486f..9d53c544aa1 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -156,6 +156,18 @@ def check_alignment( check_alignment(child_path, child_ds, base_ds, child.children) +def _check_for_slashes_in_names(variables: Iterable[Hashable]) -> None: + offending_variable_names = [ + name for name in variables if isinstance(name, str) and "/" in name + ] + if len(offending_variable_names) > 0: + raise ValueError( + "Given variables have names containing the '/' character: " + f"{offending_variable_names}. " + "Variables stored in DataTree objects cannot have names containing '/' characters, as this would make path-like access to variables ambiguous." + ) + + class DatasetView(Dataset): """ An immutable Dataset-like view onto the data in a single DataTree node. @@ -453,6 +465,7 @@ def __init__( super().__init__(name=name, children=children) def _set_node_data(self, dataset: Dataset): + _check_for_slashes_in_names(dataset.variables) data_vars, coord_vars = _collect_data_and_coord_variables(dataset) self._data_variables = data_vars self._node_coord_variables = coord_vars diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 83d1dcded88..39939a23f91 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -83,6 +83,23 @@ def test_child_gets_named_on_attach(self): mary = DataTree(children={"Sue": sue}) # noqa assert mary.children["Sue"].name == "Sue" + def test_dataset_containing_slashes(self): + xda: xr.DataArray = xr.DataArray( + [[1, 2]], + coords={"label": ["a"], "R30m/y": [30, 60]}, + ) + xds: xr.Dataset = xr.Dataset({"group/subgroup/my_variable": xda}) + with pytest.raises( + ValueError, + match=re.escape( + "Given variables have names containing the '/' character: " + "['R30m/y', 'group/subgroup/my_variable']. " + "Variables stored in DataTree objects cannot have names containing '/' characters, " + "as this would make path-like access to variables ambiguous." + ), + ): + DataTree(xds) + class TestPaths: def test_path_property(self):