From d186913629387cae41fa038db3af101ec1fdac3a Mon Sep 17 00:00:00 2001 From: Tim Heap Date: Wed, 28 Aug 2024 15:51:18 +1000 Subject: [PATCH] Check dataset polygons are valid when constructing them `Convention.polygons` is now defined on the base `Convention` class. It calls a new abstract method `Convention._make_polygons()`. This allows us to do some post processing of the polygons as part of generating them. Invalid polygons generate a warning and are dropped as part of generation. Non-simple polygons generate a warning but are kept. This means the triangulation code doesn't need to perform this step. It doesn't save any time overall, as the polygons still need to be tested either way. --- src/emsarray/conventions/_base.py | 28 +++++++++++++++++++++----- src/emsarray/conventions/arakawa_c.py | 4 +--- src/emsarray/conventions/grid.py | 8 ++------ src/emsarray/conventions/ugrid.py | 4 +--- src/emsarray/exceptions.py | 14 ++++++++++++- src/emsarray/operations/triangulate.py | 6 ------ tests/conventions/test_base.py | 3 +-- 7 files changed, 41 insertions(+), 26 deletions(-) diff --git a/src/emsarray/conventions/_base.py b/src/emsarray/conventions/_base.py index 287fddb..3002258 100644 --- a/src/emsarray/conventions/_base.py +++ b/src/emsarray/conventions/_base.py @@ -8,15 +8,15 @@ from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar, cast import numpy +import shapely import xarray -from shapely import unary_union from shapely.geometry import MultiPolygon, Point, Polygon from shapely.geometry.base import BaseGeometry from shapely.strtree import STRtree from emsarray import utils from emsarray.compat.shapely import SpatialIndex -from emsarray.exceptions import NoSuchCoordinateError +from emsarray.exceptions import InvalidPolygonWarning, NoSuchCoordinateError from emsarray.operations import depth, point_extraction from emsarray.plot import ( _requires_plot, animate_on_figure, make_plot_title, plot_on_figure, @@ -1264,8 +1264,8 @@ def make_quiver( return Quiver(axes, x, y, *values, **kwargs) - @property - @abc.abstractmethod + @cached_property + @utils.timed_func def polygons(self) -> numpy.ndarray: """A :class:`numpy.ndarray` of :class:`shapely.Polygon` instances representing the cells in this dataset. @@ -1286,6 +1286,24 @@ def polygons(self) -> numpy.ndarray: :meth:`ravel_index` :attr:`mask` """ + polygons = self._make_polygons() + + not_none = (polygons != None) # noqa: E711 + invalid_polygon_indices = numpy.flatnonzero(not_none & ~shapely.is_valid(polygons)) + if len(invalid_polygon_indices): + indices_str = numpy.array2string( + invalid_polygon_indices, max_line_width=None, threshold=5) + warnings.warn( + f"Dropping invalid polygons at indices {indices_str}", + category=InvalidPolygonWarning) + polygons[invalid_polygon_indices] = None + not_none[invalid_polygon_indices] = False + + polygons.flags.writeable = False + return polygons + + @abc.abstractmethod + def _make_polygons(self) -> numpy.ndarray: pass @cached_property @@ -1337,7 +1355,7 @@ def geometry(self) -> Polygon | MultiPolygon: This is equivalent to the union of all polygons in the dataset, although specific conventions may have a simpler way of constructing this. """ - return unary_union(self.polygons[self.mask]) + return shapely.unary_union(self.polygons[self.mask]) @cached_property def bounds(self) -> Bounds: diff --git a/src/emsarray/conventions/arakawa_c.py b/src/emsarray/conventions/arakawa_c.py index 05b881f..17e7742 100644 --- a/src/emsarray/conventions/arakawa_c.py +++ b/src/emsarray/conventions/arakawa_c.py @@ -260,9 +260,7 @@ def unpack_index(self, index: ArakawaCIndex) -> tuple[ArakawaCGridKind, Sequence def pack_index(self, grid_kind: ArakawaCGridKind, indexes: Sequence[int]) -> ArakawaCIndex: return cast(ArakawaCIndex, (grid_kind, *indexes)) - @cached_property - @utils.timed_func - def polygons(self) -> numpy.ndarray: + def _make_polygons(self) -> numpy.ndarray: # Make an array of shape (j, i, 2) of all the nodes grid = numpy.stack([self.node.longitude.values, self.node.latitude.values], axis=-1) diff --git a/src/emsarray/conventions/grid.py b/src/emsarray/conventions/grid.py index 78a0b65..4e278f8 100644 --- a/src/emsarray/conventions/grid.py +++ b/src/emsarray/conventions/grid.py @@ -408,9 +408,7 @@ def check_dataset(cls, dataset: xarray.Dataset) -> int | None: return Specificity.LOW - @cached_property - @utils.timed_func - def polygons(self) -> numpy.ndarray: + def _make_polygons(self) -> numpy.ndarray: lon_bounds = self.topology.longitude_bounds.values lat_bounds = self.topology.latitude_bounds.values @@ -576,9 +574,7 @@ def check_dataset(cls, dataset: xarray.Dataset) -> int | None: return Specificity.LOW - @cached_property - @utils.timed_func - def polygons(self) -> numpy.ndarray: + def _make_polygons(self) -> numpy.ndarray: # Construct polygons from the bounds of the cells lon_bounds = self.topology.longitude_bounds.values lat_bounds = self.topology.latitude_bounds.values diff --git a/src/emsarray/conventions/ugrid.py b/src/emsarray/conventions/ugrid.py index 33acf3c..7f22d4f 100644 --- a/src/emsarray/conventions/ugrid.py +++ b/src/emsarray/conventions/ugrid.py @@ -1076,9 +1076,7 @@ def grid_kinds(self) -> frozenset[UGridKind]: items.append(UGridKind.edge) return frozenset(items) - @cached_property - @utils.timed_func - def polygons(self) -> numpy.ndarray: + def _make_polygons(self) -> numpy.ndarray: """Generate list of Polygons""" # X,Y coords of each node topology = self.topology diff --git a/src/emsarray/exceptions.py b/src/emsarray/exceptions.py index 1ae9f14..7a40cd5 100644 --- a/src/emsarray/exceptions.py +++ b/src/emsarray/exceptions.py @@ -10,13 +10,19 @@ class EmsarrayError(Exception): """ +class EmsarrayWarning(Warning): + """ + Base class for all emsarray-specific warning classes. + """ + + class ConventionViolationError(EmsarrayError): """ A dataset violates its conventions in a way that is not recoverable. """ -class ConventionViolationWarning(UserWarning): +class ConventionViolationWarning(EmsarrayWarning): """ A dataset violates its conventions in a way that we can handle. For example, an attribute has an invalid type, @@ -30,3 +36,9 @@ class NoSuchCoordinateError(KeyError, EmsarrayError): such as in :attr:`.Convention.time_coordinate` and :attr:`.Convention.depth_coordinate`. """ + + +class InvalidPolygonWarning(EmsarrayWarning): + """ + A polygon in a dataset was invalid or not simple. + """ diff --git a/src/emsarray/operations/triangulate.py b/src/emsarray/operations/triangulate.py index aeeced0..335d002 100644 --- a/src/emsarray/operations/triangulate.py +++ b/src/emsarray/operations/triangulate.py @@ -147,9 +147,6 @@ def _triangulate_polygon(polygon: Polygon) -> list[tuple[Vertex, Vertex, Vertex] :func:`triangulate_dataset`, `Polygon triangulation `_ """ - if not polygon.is_simple: - raise ValueError("_triangulate_polygon only supports simple polygons") - # The 'ear clipping' method used below is correct for all polygons, but not # performant. If the polygon is convex we can use a shortcut method. if polygon.equals(polygon.convex_hull): @@ -174,9 +171,6 @@ def _triangulate_polygon(polygon: Polygon) -> list[tuple[Vertex, Vertex, Vertex] # Most polygons will be either squares, convex quadrilaterals, or convex # polygons. - # Maintain a consistent winding order - polygon = polygon.normalize() - triangles: list[tuple[Vertex, Vertex, Vertex]] = [] # Note that shapely polygons with n vertices will be closed, and thus have # n+1 coordinates. We trim that superfluous coordinate off in the next line diff --git a/tests/conventions/test_base.py b/tests/conventions/test_base.py index 0aabd45..28200dd 100644 --- a/tests/conventions/test_base.py +++ b/tests/conventions/test_base.py @@ -118,8 +118,7 @@ def wind( def drop_geometry(self) -> xarray.Dataset: return self.dataset - @cached_property - def polygons(self) -> numpy.ndarray: + def _make_polygons(self) -> numpy.ndarray: height, width = self.shape # Each polygon is a box from (x, y, x+1, y+1), # however the polygons around the edge are masked out with None.