Skip to content

Commit

Permalink
sort when encoding coordinates for deterministic outputs (#8034)
Browse files Browse the repository at this point in the history
* sort assignments to .attr["coordinates"]

* [skip-ci] Apply suggestions from code review

Co-authored-by: Michael Niklas  <[email protected]>

---------

Co-authored-by: Deepak Cherian <[email protected]>
Co-authored-by: Michael Niklas <[email protected]>
  • Loading branch information
3 people authored Aug 3, 2023
1 parent 92c8b33 commit fb6bbbe
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 6 deletions.
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ Internal Changes

- :py:func:`as_variable` now consistently includes the variable name in any exceptions
raised. (:pull:`7995`). By `Peter Hill <https://github.com/ZedThree>`_
- :py:func:`encode_dataset_coordinates` now sorts coordinates automatically assigned to
`coordinates` attributes during serialization (:issue:`8026`, :pull:`8034`).
`By Ian Carroll <https://github.com/itcarroll>`_.

.. _whats-new.2023.07.0:

Expand Down
4 changes: 2 additions & 2 deletions xarray/conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names):
if not coords_str and variable_coordinates[name]:
coordinates_text = " ".join(
str(coord_name)
for coord_name in variable_coordinates[name]
for coord_name in sorted(variable_coordinates[name])
if coord_name not in not_technically_coordinates
)
if coordinates_text:
Expand All @@ -719,7 +719,7 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names):
SerializationWarning,
)
else:
attributes["coordinates"] = " ".join(map(str, global_coordinates))
attributes["coordinates"] = " ".join(sorted(map(str, global_coordinates)))

return variables, attributes

Expand Down
21 changes: 17 additions & 4 deletions xarray/tests/test_conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ def test_multidimensional_coordinates(self) -> None:
foo1_coords = enc["foo1"].attrs.get("coordinates", "")
foo2_coords = enc["foo2"].attrs.get("coordinates", "")
foo3_coords = enc["foo3"].attrs.get("coordinates", "")
assert set(foo1_coords.split()) == {"lat1", "lon1"}
assert set(foo2_coords.split()) == {"lat2", "lon2"}
assert set(foo3_coords.split()) == {"lat3", "lon3"}
assert foo1_coords == "lon1 lat1"
assert foo2_coords == "lon2 lat2"
assert foo3_coords == "lon3 lat3"
# Should not have any global coordinates.
assert "coordinates" not in attrs

Expand All @@ -150,11 +150,12 @@ def test_var_with_coord_attr(self) -> None:
enc, attrs = conventions.encode_dataset_coordinates(orig)
# Make sure we have the right coordinates for each variable.
values_coords = enc["values"].attrs.get("coordinates", "")
assert set(values_coords.split()) == {"time", "lat", "lon"}
assert values_coords == "time lon lat"
# Should not have any global coordinates.
assert "coordinates" not in attrs

def test_do_not_overwrite_user_coordinates(self) -> None:
# don't overwrite user-defined "coordinates" encoding
orig = Dataset(
coords={"x": [0, 1, 2], "y": ("x", [5, 6, 7]), "z": ("x", [8, 9, 10])},
data_vars={"a": ("x", [1, 2, 3]), "b": ("x", [3, 5, 6])},
Expand All @@ -168,6 +169,18 @@ def test_do_not_overwrite_user_coordinates(self) -> None:
with pytest.raises(ValueError, match=r"'coordinates' found in both attrs"):
conventions.encode_dataset_coordinates(orig)

def test_deterministic_coords_encoding(self) -> None:
# the coordinates attribute is sorted when set by xarray.conventions ...
# ... on a variable's coordinates attribute
ds = Dataset({"foo": 0}, coords={"baz": 0, "bar": 0})
vars, attrs = conventions.encode_dataset_coordinates(ds)
assert vars["foo"].attrs["coordinates"] == "bar baz"
assert attrs.get("coordinates") is None
# ... on the global coordinates attribute
ds = ds.drop_vars("foo")
vars, attrs = conventions.encode_dataset_coordinates(ds)
assert attrs["coordinates"] == "bar baz"

@pytest.mark.filterwarnings("ignore:Converting non-nanosecond")
def test_emit_coordinates_attribute_in_attrs(self) -> None:
orig = Dataset(
Expand Down

0 comments on commit fb6bbbe

Please sign in to comment.