xarray-contrib · martinfleis · Jul 17, 2024 · Jul 1, 2024 · Jul 1, 2024 · Jul 1, 2024
diff --git a/.gitignore b/.gitignore
@@ -139,10 +139,11 @@ dmypy.json
 
 # sphinx
 doc/source/generated
+doc/source/geo-encoded*
 
 # ruff
 .ruff_cache
 doc/source/cube.joblib.compressed
 doc/source/cube.pickle
 
-cache/
+cache/
diff --git a/doc/source/io.ipynb b/doc/source/io.ipynb
diff --git a/pyproject.toml b/pyproject.toml
@@ -30,6 +30,7 @@ dependencies = [
     "xarray >= 2022.12.0",
     "pyproj >= 3.0.0",
     "shapely >= 2.0b1",
+    "cf_xarray >= 0.9.2",
 ]
 
 [project.urls]

diff --git a/xvec/accessor.py b/xvec/accessor.py
@@ -1258,6 +1258,74 @@ def extract_points(
                 )
         return result
 
+    def encode_cf(self) -> xr.Dataset:
+        """Encode geometry variables and associated CRS with CF conventions"""
+        import cf_xarray as cfxr
+
+        ds = self._obj.copy()
+        coords = self.geom_coords_indexed
+
+        # TODO: this could use geoxarray, but is quite simple in any case
+        # Adapted from rioxarray
+        # preserve ordering for roundtripping
+        unique_crs = []
+        for _, xi in sorted(coords.xindexes.items()):
+            if xi.crs not in unique_crs:
+                unique_crs.append(xi.crs)
+        if len(unique_crs) == 1:
+            grid_mappings = {unique_crs.pop(): "spatial_ref"}
+        else:
+            grid_mappings = {
+                crs_: f"spatial_ref_{i}" for i, crs_ in enumerate(unique_crs)
+            }
+
+        for crs, grid_mapping in grid_mappings.items():
+            grid_mapping_attrs = crs.to_cf()
+            # TODO: not all CRS can be represented by CF grid_mappings
+            # For now, we allow this.
+            # if "grid_mapping_name" not in grid_mapping_attrs:
+            #     raise ValueError
+            wkt_str = crs.to_wkt()
+            grid_mapping_attrs["spatial_ref"] = wkt_str
+            grid_mapping_attrs["crs_wkt"] = wkt_str
+            ds.coords[grid_mapping] = xr.Variable(
+                dims=(), data=0, attrs=grid_mapping_attrs
+            )
+
+        for name, coord in coords.items():
+            dims = set(coord.dims)
+            index = coords.xindexes[name]
+            varnames = (k for k, v in ds._variables.items() if dims & set(v.dims))
+            for name in varnames:
+                ds._variables[name].attrs["grid_mapping"] = grid_mappings[index.crs]
+        encoded = cfxr.geometry.encode_geometries(ds)
+        return encoded
+
+    def decode_cf(self) -> xr.Dataset:
+        import cf_xarray as cfxr
+
+        decoded = cfxr.geometry.decode_geometries(self._obj.copy())
+        crs = {
+            name: CRS.from_user_input(var.attrs["crs_wkt"])
+            for name, var in decoded._variables.items()
+            if "crs_wkt" in var.attrs or "grid_mapping_name" in var.attrs
+        }
+        dims = decoded.xvec.geom_coords.dims
+        for dim in dims:
+            decoded = (
+                decoded.set_xindex(dim) if dim not in decoded._indexes else decoded
+            )
+            decoded = decoded.xvec.set_geom_indexes(
+                dim, crs=crs.get(decoded[dim].attrs.get("grid_mapping", None))
+            )
+        for name in crs:
+            # remove spatial_ref so the coordinate system is only stored on the index
+            del decoded[name]
+        for var in decoded._variables.values():
+            if set(dims) & set(var.dims):
+                var.attrs.pop("grid_mapping", None)
+        return decoded
+
 
 def _resolve_input(
     positional: Mapping[Any, Any] | None,

diff --git a/xvec/tests/conftest.py b/xvec/tests/conftest.py
@@ -69,7 +69,7 @@ def multi_dataset(geom_array, geom_array_z):
 
 @pytest.fixture(scope="session")
 def multi_geom_dataset(geom_array, geom_array_z):
-    return (
+    ds = (
         xr.Dataset(
             coords={
                 "geom": geom_array,
@@ -80,11 +80,32 @@ def multi_geom_dataset(geom_array, geom_array_z):
         .set_xindex("geom", GeometryIndex, crs=26915)
         .set_xindex("geom_z", GeometryIndex, crs=26915)
     )
+    ds["geom"].attrs["crs"] = ds.xindexes["geom"].crs
+    ds["geom_z"].attrs["crs"] = ds.xindexes["geom_z"].crs
+    return ds
+
+
+@pytest.fixture(scope="session")
+def multi_geom_multi_crs_dataset(geom_array, geom_array_z):
+    ds = (
+        xr.Dataset(
+            coords={
+                "geom": geom_array,
+                "geom_z": geom_array_z,
+            }
+        )
+        .drop_indexes(["geom", "geom_z"])
+        .set_xindex("geom", GeometryIndex, crs=26915)
+        .set_xindex("geom_z", GeometryIndex, crs="EPSG:4362")
+    )
+    ds["geom"].attrs["crs"] = ds.xindexes["geom"].crs
+    ds["geom_z"].attrs["crs"] = ds.xindexes["geom_z"].crs
+    return ds
 
 
 @pytest.fixture(scope="session")
 def multi_geom_no_index_dataset(geom_array, geom_array_z):
-    return (
+    ds = (
         xr.Dataset(
             coords={
                 "geom": geom_array,
@@ -96,6 +117,9 @@ def multi_geom_no_index_dataset(geom_array, geom_array_z):
         .set_xindex("geom", GeometryIndex, crs=26915)
         .set_xindex("geom_z", GeometryIndex, crs=26915)
     )
+    ds["geom"].attrs["crs"] = ds.xindexes["geom"].crs
+    ds["geom_z"].attrs["crs"] = ds.xindexes["geom_z"].crs
+    return ds
 
 
 @pytest.fixture(scope="session")
@@ -157,3 +181,18 @@ def traffic_dataset(geom_array):
             "day": pd.date_range("2023-01-01", periods=10),
         },
     ).xvec.set_geom_indexes(["origin", "destination"], crs=26915)
+
+
+@pytest.fixture(
+    params=[
+        "first_geom_dataset",
+        "multi_dataset",
+        "multi_geom_dataset",
+        "multi_geom_no_index_dataset",
+        "multi_geom_multi_crs_dataset",
+        "traffic_dataset",
+    ],
+    scope="session",
+)
+def all_datasets(request):
+    return request.getfixturevalue(request.param)
diff --git a/xvec/tests/test_accessor.py b/xvec/tests/test_accessor.py
@@ -674,3 +674,21 @@ def test_extract_points_array():
             geometry=4326
         ),
     )
+
+
+def test_cf_roundtrip(all_datasets):
+    ds = all_datasets
+    copy = ds.copy(deep=True)
+    encoded = ds.xvec.encode_cf()
+
+    if unique_crs := {
+        idx.crs for idx in ds.xvec.geom_coords_indexed.xindexes.values() if idx.crs
+    }:
+        nwkts = sum(1 for var in encoded._variables.values() if "crs_wkt" in var.attrs)
+        assert len(unique_crs) == nwkts
+
+    roundtripped = encoded.xvec.decode_cf()
+
+    xr.testing.assert_identical(ds, roundtripped)
+    # make sure we didn't modify the original dataset.
+    xr.testing.assert_identical(ds, copy)