diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3bab430f6f2..ca0e913f773 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -96,6 +96,9 @@ Internal Changes - :py:func:`as_variable` now consistently includes the variable name in any exceptions raised. (:pull:`7995`). By `Peter Hill `_ +- :py:func:`encode_dataset_coordinates` now sorts coordinates automatically assigned to + `coordinates` attributes during serialization (:issue:`8026`, :pull:`8034`). + `By Ian Carroll `_. .. _whats-new.2023.07.0: diff --git a/xarray/conventions.py b/xarray/conventions.py index 053863ace2a..5a6675d60c1 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -694,7 +694,7 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): if not coords_str and variable_coordinates[name]: coordinates_text = " ".join( str(coord_name) - for coord_name in variable_coordinates[name] + for coord_name in sorted(variable_coordinates[name]) if coord_name not in not_technically_coordinates ) if coordinates_text: @@ -719,7 +719,7 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): SerializationWarning, ) else: - attributes["coordinates"] = " ".join(map(str, global_coordinates)) + attributes["coordinates"] = " ".join(sorted(map(str, global_coordinates))) return variables, attributes diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 424b7db5ac4..4dae7809be9 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -129,9 +129,9 @@ def test_multidimensional_coordinates(self) -> None: foo1_coords = enc["foo1"].attrs.get("coordinates", "") foo2_coords = enc["foo2"].attrs.get("coordinates", "") foo3_coords = enc["foo3"].attrs.get("coordinates", "") - assert set(foo1_coords.split()) == {"lat1", "lon1"} - assert set(foo2_coords.split()) == {"lat2", "lon2"} - assert set(foo3_coords.split()) == {"lat3", "lon3"} + assert foo1_coords == "lon1 lat1" + assert foo2_coords == "lon2 lat2" + assert foo3_coords == "lon3 lat3" # Should not have any global coordinates. assert "coordinates" not in attrs @@ -150,11 +150,12 @@ def test_var_with_coord_attr(self) -> None: enc, attrs = conventions.encode_dataset_coordinates(orig) # Make sure we have the right coordinates for each variable. values_coords = enc["values"].attrs.get("coordinates", "") - assert set(values_coords.split()) == {"time", "lat", "lon"} + assert values_coords == "time lon lat" # Should not have any global coordinates. assert "coordinates" not in attrs def test_do_not_overwrite_user_coordinates(self) -> None: + # don't overwrite user-defined "coordinates" encoding orig = Dataset( coords={"x": [0, 1, 2], "y": ("x", [5, 6, 7]), "z": ("x", [8, 9, 10])}, data_vars={"a": ("x", [1, 2, 3]), "b": ("x", [3, 5, 6])}, @@ -168,6 +169,18 @@ def test_do_not_overwrite_user_coordinates(self) -> None: with pytest.raises(ValueError, match=r"'coordinates' found in both attrs"): conventions.encode_dataset_coordinates(orig) + def test_deterministic_coords_encoding(self) -> None: + # the coordinates attribute is sorted when set by xarray.conventions ... + # ... on a variable's coordinates attribute + ds = Dataset({"foo": 0}, coords={"baz": 0, "bar": 0}) + vars, attrs = conventions.encode_dataset_coordinates(ds) + assert vars["foo"].attrs["coordinates"] == "bar baz" + assert attrs.get("coordinates") is None + # ... on the global coordinates attribute + ds = ds.drop_vars("foo") + vars, attrs = conventions.encode_dataset_coordinates(ds) + assert attrs["coordinates"] == "bar baz" + @pytest.mark.filterwarnings("ignore:Converting non-nanosecond") def test_emit_coordinates_attribute_in_attrs(self) -> None: orig = Dataset(