From de0c78b82204bacbf3ffc28f6596b0b57241e260 Mon Sep 17 00:00:00 2001 From: Tim Heap Date: Wed, 21 Jun 2023 11:14:45 +1000 Subject: [PATCH] Add Convention.geometry and Convention.bounds Convention.geometry is a single Polygon that represents the dataset geometry, while Convention.bounds is the minimum bounds of the dataset. Both have default implementations, but specific conventions can override this implementation if there is a more efficient way of computing it. --- docs/releases/development.rst | 1 + src/emsarray/conventions/_base.py | 23 +++++++- src/emsarray/conventions/grid.py | 19 +++++- src/emsarray/conventions/ugrid.py | 11 +++- src/emsarray/types.py | 3 +- tests/conventions/test_base.py | 31 +++++++++- tests/conventions/test_cfgrid1d.py | 94 +++++++++++++++++++----------- tests/conventions/test_cfgrid2d.py | 14 ++++- tests/conventions/test_ugrid.py | 10 ++++ tests/utils.py | 18 +++++- 10 files changed, 184 insertions(+), 40 deletions(-) diff --git a/docs/releases/development.rst b/docs/releases/development.rst index f58cb6b..c8587c7 100644 --- a/docs/releases/development.rst +++ b/docs/releases/development.rst @@ -9,3 +9,4 @@ Next release (in development) for significant speed improvements (:pr:`77`). * Added :func:`emsarray.utils.timed_func` for easily logging some performance metrics (:pr:`79`). +* Add :attr:`.Convention.bounds` and :attr:`.Convention.geometry` attributes (:pr:`83`). diff --git a/src/emsarray/conventions/_base.py b/src/emsarray/conventions/_base.py index 276b199..fe3a8aa 100644 --- a/src/emsarray/conventions/_base.py +++ b/src/emsarray/conventions/_base.py @@ -13,6 +13,7 @@ import numpy as np import xarray as xr +from shapely import unary_union from shapely.geometry import Point, Polygon from shapely.geometry.base import BaseGeometry @@ -23,7 +24,7 @@ _requires_plot, animate_on_figure, plot_on_figure, polygons_to_collection ) from emsarray.state import State -from emsarray.types import Pathish +from emsarray.types import Bounds, Pathish if TYPE_CHECKING: # Import these optional dependencies only during type checking @@ -941,6 +942,26 @@ def mask(self) -> np.ndarray: dtype=bool, count=self.polygons.size) return cast(np.ndarray, mask) + @cached_property + def geometry(self) -> Polygon: + """ + A shapely :class:`Polygon` that represents the geometry of the entire dataset. + + This is equivalent to the union of all polygons in the dataset, + although specific conventions may have a simpler way of constructing this. + """ + return unary_union(self.polygons[self.mask]) + + @cached_property + def bounds(self) -> Bounds: + """ + Returns minimum bounding region (minx, miny, maxx, maxy) of the entire dataset. + + This is equivalent to the bounds of the dataset :attr:`geometry`, + although specific conventons may have a simpler way of constructing this. + """ + return cast(Bounds, self.geometry.bounds) + @cached_property @utils.timed_func def spatial_index(self) -> SpatialIndex[SpatialIndexItem[Index]]: diff --git a/src/emsarray/conventions/grid.py b/src/emsarray/conventions/grid.py index 836f248..f928057 100644 --- a/src/emsarray/conventions/grid.py +++ b/src/emsarray/conventions/grid.py @@ -16,11 +16,12 @@ import numpy as np import xarray as xr +from shapely.geometry import Polygon, box from shapely.geometry.base import BaseGeometry from emsarray import masking, utils from emsarray.exceptions import ConventionViolationWarning -from emsarray.types import Pathish +from emsarray.types import Bounds, Pathish from ._base import Convention, Specificity @@ -223,6 +224,16 @@ def topology(self) -> Topology: """A :class:`CFGridTopology` helper.""" return self.topology_class(self.dataset) + @cached_property + def bounds(self) -> Bounds: + # This can be computed easily from the coordinate bounds + topology = self.topology + min_x = np.nanmin(topology.longitude_bounds) + max_x = np.nanmax(topology.longitude_bounds) + min_y = np.nanmin(topology.latitude_bounds) + max_y = np.nanmax(topology.latitude_bounds) + return (min_x, min_y, max_x, max_y) + def unravel_index( self, index: int, @@ -414,6 +425,12 @@ def face_centres(self) -> np.ndarray: centres = np.column_stack((xx.flatten(), yy.flatten())) return cast(np.ndarray, centres) + @cached_property + def geometry(self) -> Polygon: + # As CFGrid1D is axis aligned, + # the geometry can be constructed from the bounds. + return box(*self.bounds) + # 2D coordinate grids diff --git a/src/emsarray/conventions/ugrid.py b/src/emsarray/conventions/ugrid.py index af3a332..b1faf8f 100644 --- a/src/emsarray/conventions/ugrid.py +++ b/src/emsarray/conventions/ugrid.py @@ -29,7 +29,7 @@ from emsarray.exceptions import ( ConventionViolationError, ConventionViolationWarning ) -from emsarray.types import Pathish +from emsarray.types import Bounds, Pathish from ._base import Convention, Specificity @@ -1103,6 +1103,15 @@ def face_centres(self) -> np.ndarray: return cast(np.ndarray, face_centres) return super().face_centres + @cached_property + def bounds(self) -> Bounds: + topology = self.topology + min_x = np.nanmin(topology.node_x) + max_x = np.nanmax(topology.node_x) + min_y = np.nanmin(topology.node_y) + max_y = np.nanmax(topology.node_y) + return (min_x, min_y, max_x, max_y) + def selector_for_index(self, index: UGridIndex) -> Dict[Hashable, int]: kind, i = index if kind is UGridKind.face: diff --git a/src/emsarray/types.py b/src/emsarray/types.py index 1160df9..9464ec0 100644 --- a/src/emsarray/types.py +++ b/src/emsarray/types.py @@ -1,6 +1,7 @@ """Collection of type aliases used across the library.""" import os -from typing import Union +from typing import Tuple, Union Pathish = Union[os.PathLike, str] +Bounds = Tuple[float, float, float, float] diff --git a/tests/conventions/test_base.py b/tests/conventions/test_base.py index 4c109bd..fa37daf 100644 --- a/tests/conventions/test_base.py +++ b/tests/conventions/test_base.py @@ -9,7 +9,7 @@ import numpy as np import pytest import xarray as xr -from shapely.geometry import LineString, Point, box +from shapely.geometry import LineString, Point, Polygon, box from shapely.geometry.base import BaseGeometry from emsarray import masking, utils @@ -84,6 +84,8 @@ def drop_geometry(self) -> xr.Dataset: @cached_property def polygons(self) -> np.ndarray: height, width = self.shape + # Each polygon is a box from (x, y, x+1, y+1), + # however the polygons around the edge are masked out with None. return np.array([ box(x, y, x + 1, y + 1) if (0 < x < width - 1) and (0 < y < height - 1) @@ -126,6 +128,33 @@ def test_mask(): ) +def test_geometry(): + dataset = xr.Dataset({ + 'values': (['z', 'y', 'x'], np.random.standard_normal((5, 10, 20))), + 'botz': (['y', 'x'], np.random.standard_normal((10, 20)) - 10), + }) + convention = SimpleConvention(dataset) + + # The geometry will be the union of all the polygons, + # which results in some 'extra' vertices along the edge. + assert convention.geometry == Polygon( + [(1, x) for x in range(1, 10)] + + [(y, 9) for y in range(2, 20)] + + [(19, x) for x in reversed(range(1, 9))] + + [(y, 1) for y in reversed(range(1, 19))] + ) + + +def test_bounds(): + dataset = xr.Dataset({ + 'values': (['z', 'y', 'x'], np.random.standard_normal((5, 10, 20))), + 'botz': (['y', 'x'], np.random.standard_normal((10, 20)) - 10), + }) + convention = SimpleConvention(dataset) + + assert convention.bounds == (1, 1, 19, 9) + + def test_spatial_index(): dataset = xr.Dataset({ 'values': (['z', 'y', 'x'], np.random.standard_normal((5, 10, 20))), diff --git a/tests/conventions/test_cfgrid1d.py b/tests/conventions/test_cfgrid1d.py index 66786cc..79db895 100644 --- a/tests/conventions/test_cfgrid1d.py +++ b/tests/conventions/test_cfgrid1d.py @@ -15,7 +15,7 @@ from emsarray.conventions import get_dataset_convention from emsarray.conventions.grid import CFGrid1D, CFGridKind, CFGridTopology from emsarray.operations import geometry -from tests.utils import box, mask_from_strings +from tests.utils import assert_property_not_cached, box, mask_from_strings def make_dataset( @@ -24,6 +24,7 @@ def make_dataset( height: int, depth: int = 5, time_size: int = 4, + bounds: bool = False, ) -> xr.Dataset: longitude_name = 'lon' latitude_name = 'lat' @@ -116,18 +117,39 @@ def make_dataset( }, ) + data_vars = [ + time, lat, lon, depth_var, + botz, eta, temp, + ] + + if bounds: + lon_grid = np.concatenate([ + lon.values - 0.08, + [lon.values[-1] + 0.02] + ]) + lat_grid = np.concatenate([ + lat.values - 0.07, + [lat.values[-1] + 0.03] + ]) + lon_bounds = xr.DataArray( + np.c_[lon_grid[:-1], lon_grid[1:]], + dims=[longitude_name, 'bounds'], + name="lon_bounds", + ) + lat_bounds = xr.DataArray( + np.c_[lat_grid[:-1], lat_grid[1:]], + dims=[latitude_name, 'bounds'], + name="lat_bounds", + ) + lon.attrs['bounds'] = lon_bounds.name + lat.attrs['bounds'] = lat_bounds.name + data_vars += [lon_bounds, lat_bounds] + dataset = xr.Dataset( - data_vars={var.name: var for var in [ - time, lat, lon, depth_var, - botz, eta, temp - ]}, + data_vars={var.name: var for var in data_vars}, attrs={ - 'title': "COMPAS defalt version", - 'paramhead': "Example COMPAS grid", - 'paramfile': "in.prm", - 'version': "v1.0 rev(1234)", - 'Conventions': "UGRID-1.0", - 'start_index': 0, + 'title': "Example CFGrid1D", + 'Conventions': "CF-1.4", }, ) dataset.encoding['unlimited_dims'] = {'time'} @@ -157,7 +179,7 @@ def test_varnames(): def test_polygons_no_bounds(): - dataset = make_dataset(width=3, height=4) + dataset = make_dataset(width=3, height=4, bounds=False) polygons = dataset.ems.polygons # Should be one item for every face @@ -175,27 +197,7 @@ def test_polygons_no_bounds(): def test_polygons_bounds(): - dataset = make_dataset(width=3, height=4) - lon_grid = np.concatenate([ - dataset['lon'].values - 0.08, - [dataset['lon'].values[-1] + 0.02] - ]) - lat_grid = np.concatenate([ - dataset['lat'].values - 0.07, - [dataset['lat'].values[-1] + 0.03] - ]) - dataset = dataset.assign({ - 'lon_bounds': xr.DataArray( - np.c_[lon_grid[:-1], lon_grid[1:]], - dims=[dataset['lon'].dims[0], 'bounds'], - ), - 'lat_bounds': xr.DataArray( - np.c_[lat_grid[:-1], lat_grid[1:]], - dims=[dataset['lat'].dims[0], 'bounds'], - ), - }) - dataset['lon'].attrs['bounds'] = 'lon_bounds' - dataset['lat'].attrs['bounds'] = 'lat_bounds' + dataset = make_dataset(width=3, height=4, bounds=True) assert_allclose(dataset.ems.topology.longitude_bounds, dataset['lon_bounds']) assert_allclose(dataset.ems.topology.latitude_bounds, dataset['lat_bounds']) @@ -210,6 +212,32 @@ def test_polygons_bounds(): tolerance=1e-6) +def test_bounds_no_bounds(): + dataset = make_dataset(width=3, height=4, bounds=False) + assert_allclose(dataset.ems.bounds, (-0.05, -0.05, 0.25, 0.35)) + assert_property_not_cached(dataset.ems, 'geometry') + + +def test_bounds_with_bounds(): + dataset = make_dataset(width=3, height=4, bounds=True) + assert_allclose(dataset.ems.bounds, (-0.08, -0.07, 0.22, 0.33)) + assert_property_not_cached(dataset.ems, 'geometry') + + +def test_geometry(): + dataset = make_dataset(width=3, height=4, bounds=False) + assert_geometries_equal( + dataset.ems.geometry, + Polygon([ + (0.25, -0.05), + (0.25, 0.35), + (-0.05, 0.35), + (-0.05, -0.05), + (0.25, -0.05), + ]), + tolerance=1e-6) + + def test_selector_for_index(): dataset = make_dataset(width=11, height=7, depth=5) convention: CFGrid1D = dataset.ems diff --git a/tests/conventions/test_cfgrid2d.py b/tests/conventions/test_cfgrid2d.py index bb9781c..e4c8e5c 100644 --- a/tests/conventions/test_cfgrid2d.py +++ b/tests/conventions/test_cfgrid2d.py @@ -17,6 +17,7 @@ import pytest import xarray as xr from matplotlib.figure import Figure +from numpy.testing import assert_allclose from shapely.geometry import Polygon from shapely.testing import assert_geometries_equal @@ -24,7 +25,10 @@ from emsarray.conventions.grid import CFGridKind from emsarray.conventions.shoc import ShocSimple from emsarray.operations import geometry -from tests.utils import DiagonalShocGrid, ShocGridGenerator, ShocLayerGenerator +from tests.utils import ( + DiagonalShocGrid, ShocGridGenerator, ShocLayerGenerator, + assert_property_not_cached +) def make_dataset( @@ -234,6 +238,14 @@ def test_holes(): assert poly.equals_exact(Polygon([(0.3, 1.9), (0.4, 1.8), (0.5, 1.9), (0.4, 2.0), (0.3, 1.9)]), 1e-6) +def test_bounds(): + dataset = make_dataset( + j_size=10, i_size=20, corner_size=5, include_bounds=True) + # The corner cuts out some of the upper bounds + assert_allclose(dataset.ems.bounds, (0, 0, 3, 2.5)) + assert_property_not_cached(dataset.ems, 'geometry') + + def test_face_centres(): # SHOC simple face centres are taken directly from the coordinates, # not calculated from polygon centres. diff --git a/tests/conventions/test_ugrid.py b/tests/conventions/test_ugrid.py index 8198cb9..c0f4bcb 100644 --- a/tests/conventions/test_ugrid.py +++ b/tests/conventions/test_ugrid.py @@ -23,6 +23,7 @@ ConventionViolationError, ConventionViolationWarning ) from emsarray.operations import geometry +from tests.utils import assert_property_not_cached def make_faces(width: int, height, fill_value: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: @@ -401,6 +402,15 @@ def test_face_centres_from_centroids(): np.testing.assert_equal(face_centres[linear_index], [lon, lat]) +def test_bounds(datasets: pathlib.Path): + dataset = xr.open_dataset(datasets / 'ugrid_mesh2d.nc') + r = 3.6 + assert_allclose(dataset.ems.bounds, (-r, -r, r, r)) + + # We also want to check that we didn't make the geometry to calculate this + assert_property_not_cached(dataset.ems, 'geometry') + + @pytest.mark.parametrize( ['index', 'selector'], ( diff --git a/tests/utils.py b/tests/utils.py index 3331117..11c9051 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -3,7 +3,7 @@ import abc import itertools from functools import cached_property -from typing import Dict, Hashable, List, Optional, Tuple +from typing import Any, Dict, Hashable, List, Optional, Tuple import numpy as np import shapely @@ -350,3 +350,19 @@ def make_x_grid(self, j: np.ndarray, i: np.ndarray) -> np.ndarray: def make_y_grid(self, j: np.ndarray, i: np.ndarray) -> np.ndarray: return 0.1 * (5 + j) * np.sin(np.pi - i * np.pi / (self.i_size)) # type: ignore + + +def assert_property_not_cached( + instance: Any, + prop_name: str, + /, +) -> None: + __tracebackhide__ = True # noqa + cls = type(instance) + prop = getattr(cls, prop_name) + assert isinstance(prop, cached_property), \ + "{instance!r}.{prop_name} is not a cached_property" + + cache = instance.__dict__ + assert prop.attrname not in cache, \ + f"{instance!r}.{prop_name} was cached!"