Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sort when encoding coordinates for deterministic outputs #8034

Merged
merged 3 commits into from
Aug 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ Internal Changes

- :py:func:`as_variable` now consistently includes the variable name in any exceptions
raised. (:pull:`7995`). By `Peter Hill <https://github.com/ZedThree>`_
- :py:func:`encode_dataset_coordinates` now sorts coordinates automatically assigned to
`coordinates` attributes during serialization (:issue:`8026`, :pull:`8034`).
`By Ian Carroll <https://github.com/itcarroll>`_.

.. _whats-new.2023.07.0:

Expand Down
4 changes: 2 additions & 2 deletions xarray/conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names):
if not coords_str and variable_coordinates[name]:
coordinates_text = " ".join(
str(coord_name)
for coord_name in variable_coordinates[name]
for coord_name in sorted(variable_coordinates[name])
if coord_name not in not_technically_coordinates
)
if coordinates_text:
Expand All @@ -719,7 +719,7 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names):
SerializationWarning,
)
else:
attributes["coordinates"] = " ".join(map(str, global_coordinates))
attributes["coordinates"] = " ".join(sorted(map(str, global_coordinates)))

return variables, attributes

Expand Down
21 changes: 17 additions & 4 deletions xarray/tests/test_conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ def test_multidimensional_coordinates(self) -> None:
foo1_coords = enc["foo1"].attrs.get("coordinates", "")
foo2_coords = enc["foo2"].attrs.get("coordinates", "")
foo3_coords = enc["foo3"].attrs.get("coordinates", "")
assert set(foo1_coords.split()) == {"lat1", "lon1"}
assert set(foo2_coords.split()) == {"lat2", "lon2"}
assert set(foo3_coords.split()) == {"lat3", "lon3"}
assert foo1_coords == "lon1 lat1"
itcarroll marked this conversation as resolved.
Show resolved Hide resolved
assert foo2_coords == "lon2 lat2"
assert foo3_coords == "lon3 lat3"
# Should not have any global coordinates.
assert "coordinates" not in attrs

Expand All @@ -150,11 +150,12 @@ def test_var_with_coord_attr(self) -> None:
enc, attrs = conventions.encode_dataset_coordinates(orig)
# Make sure we have the right coordinates for each variable.
values_coords = enc["values"].attrs.get("coordinates", "")
assert set(values_coords.split()) == {"time", "lat", "lon"}
assert values_coords == "time lon lat"
# Should not have any global coordinates.
assert "coordinates" not in attrs

def test_do_not_overwrite_user_coordinates(self) -> None:
# don't overwrite user-defined "coordinates" encoding
orig = Dataset(
coords={"x": [0, 1, 2], "y": ("x", [5, 6, 7]), "z": ("x", [8, 9, 10])},
data_vars={"a": ("x", [1, 2, 3]), "b": ("x", [3, 5, 6])},
Expand All @@ -168,6 +169,18 @@ def test_do_not_overwrite_user_coordinates(self) -> None:
with pytest.raises(ValueError, match=r"'coordinates' found in both attrs"):
conventions.encode_dataset_coordinates(orig)

def test_deterministic_coords_encoding(self) -> None:
# the coordinates attribute is sorted when set by xarray.conventions ...
# ... on a variable's coordinates attribute
ds = Dataset({"foo": 0}, coords={"baz": 0, "bar": 0})
vars, attrs = conventions.encode_dataset_coordinates(ds)
assert vars["foo"].attrs["coordinates"] == "bar baz"
assert attrs.get("coordinates") is None
# ... on the global coordinates attribute
ds = ds.drop_vars("foo")
vars, attrs = conventions.encode_dataset_coordinates(ds)
assert attrs["coordinates"] == "bar baz"

@pytest.mark.filterwarnings("ignore:Converting non-nanosecond")
def test_emit_coordinates_attribute_in_attrs(self) -> None:
orig = Dataset(
Expand Down