Skip to content

Commit

Permalink
Raise exception in to_dataset if resulting variable is also the name …
Browse files Browse the repository at this point in the history
…of a coordinate (#8433)

* Add tests for issue #7823

* Use 2D array to properly test to_dataset error handling logic

* Raise exception if a variable is also a coordinate in to_dataset

Co-authored-by: Deepak Cherian <[email protected]>

* Update whats-new

* Ensure that coordinates are in the original order

---------

Co-authored-by: Deepak Cherian <[email protected]>
  • Loading branch information
mgunyho and dcherian authored Nov 14, 2023
1 parent f0ade3d commit 59378cc
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 4 deletions.
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ Bug fixes
- Fix to once again support date offset strings as input to the loffset
parameter of resample and test this functionality (:pull:`8422`, :issue:`8399`).
By `Katelyn FitzGerald <https://github.com/kafitzgerald>`_.
- Fix a bug where :py:meth:`DataArray.to_dataset` silently drops a variable
if a coordinate with the same name already exists (:pull:`8433`, :issue:`7823`).
By `András Gunyhó <https://github.com/mgunyho>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
19 changes: 17 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,9 +574,24 @@ def subset(dim, label):
array.attrs = {}
return as_variable(array)

variables = {label: subset(dim, label) for label in self.get_index(dim)}
variables.update({k: v for k, v in self._coords.items() if k != dim})
variables_from_split = {
label: subset(dim, label) for label in self.get_index(dim)
}
coord_names = set(self._coords) - {dim}

ambiguous_vars = set(variables_from_split) & coord_names
if ambiguous_vars:
rename_msg_fmt = ", ".join([f"{v}=..." for v in sorted(ambiguous_vars)])
raise ValueError(
f"Splitting along the dimension {dim!r} would produce the variables "
f"{tuple(sorted(ambiguous_vars))} which are also existing coordinate "
f"variables. Use DataArray.rename({rename_msg_fmt}) or "
f"DataArray.assign_coords({dim}=...) to resolve this ambiguity."
)

variables = variables_from_split | {
k: v for k, v in self._coords.items() if k != dim
}
indexes = filter_indexes_from_coords(self._indexes, coord_names)
dataset = Dataset._construct_direct(
variables, coord_names, indexes=indexes, attrs=self.attrs
Expand Down
57 changes: 55 additions & 2 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3686,8 +3686,16 @@ def test_to_dataset_whole(self) -> None:
actual = named.to_dataset("bar")

def test_to_dataset_split(self) -> None:
array = DataArray([1, 2, 3], coords=[("x", list("abc"))], attrs={"a": 1})
expected = Dataset({"a": 1, "b": 2, "c": 3}, attrs={"a": 1})
array = DataArray(
[[1, 2], [3, 4], [5, 6]],
coords=[("x", list("abc")), ("y", [0.0, 0.1])],
attrs={"a": 1},
)
expected = Dataset(
{"a": ("y", [1, 2]), "b": ("y", [3, 4]), "c": ("y", [5, 6])},
coords={"y": [0.0, 0.1]},
attrs={"a": 1},
)
actual = array.to_dataset("x")
assert_identical(expected, actual)

Expand Down Expand Up @@ -3715,6 +3723,51 @@ def test_to_dataset_retains_keys(self) -> None:

assert_equal(array, result)

def test_to_dataset_coord_value_is_dim(self) -> None:
# github issue #7823

array = DataArray(
np.zeros((3, 3)),
coords={
# 'a' is both a coordinate value and the name of a coordinate
"x": ["a", "b", "c"],
"a": [1, 2, 3],
},
)

with pytest.raises(
ValueError,
match=(
re.escape("dimension 'x' would produce the variables ('a',)")
+ ".*"
+ re.escape("DataArray.rename(a=...) or DataArray.assign_coords(x=...)")
),
):
array.to_dataset("x")

# test error message formatting when there are multiple ambiguous
# values/coordinates
array2 = DataArray(
np.zeros((3, 3, 2)),
coords={
"x": ["a", "b", "c"],
"a": [1, 2, 3],
"b": [0.0, 0.1],
},
)

with pytest.raises(
ValueError,
match=(
re.escape("dimension 'x' would produce the variables ('a', 'b')")
+ ".*"
+ re.escape(
"DataArray.rename(a=..., b=...) or DataArray.assign_coords(x=...)"
)
),
):
array2.to_dataset("x")

def test__title_for_slice(self) -> None:
array = DataArray(
np.ones((4, 3, 2)),
Expand Down

0 comments on commit 59378cc

Please sign in to comment.