From 7fca2517ac9db7cad11b4e1729fadd10d3bbf35e Mon Sep 17 00:00:00 2001 From: Tim Heap Date: Tue, 10 Jan 2023 10:51:23 +1100 Subject: [PATCH 1/4] Bump minimum Shapely version to 2.0.0 Shapely 2.0.0 gains the `shapely.polygons` function which can create a large number of polygons at speed. This is perfect for making dataset geometries. --- continuous-integration/requirements.txt | 5 +++-- setup.cfg | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/continuous-integration/requirements.txt b/continuous-integration/requirements.txt index 64342cf..28f8e0e 100644 --- a/continuous-integration/requirements.txt +++ b/continuous-integration/requirements.txt @@ -18,7 +18,7 @@ bokeh==2.4.3 # via dask bottleneck==1.3.5 # via emsarray (setup.cfg) -cartopy==0.21.0 +cartopy==0.21.1 # via emsarray (setup.cfg) certifi==2022.9.24 # via @@ -119,6 +119,7 @@ numpy==1.23.3 # netcdf4 # pandas # pykdtree + # shapely # xarray packaging==21.3 # via @@ -203,7 +204,7 @@ requests==2.28.1 # via # pooch # sphinx -shapely==1.8.4 +shapely==2.0.0 # via # cartopy # emsarray (setup.cfg) diff --git a/setup.cfg b/setup.cfg index 639a337..22f2a51 100644 --- a/setup.cfg +++ b/setup.cfg @@ -28,7 +28,7 @@ install_requires = netcdf4 >=1.5.3 numpy >=1.18.0 packaging >=21.3 - shapely >=1.8.0 + shapely >=2.0.0 pyshp >=2.3.0 xarray[parallel] >=0.18.2 @@ -38,7 +38,7 @@ include = emsarray* [options.extras_require] plot = - cartopy >=0.20.0 + cartopy >=0.21.1 matplotlib >=3.4.3 pykdtree >=1.2.2 From 310832c838990915d0ea157db2b8b4f2f0c6e103 Mon Sep 17 00:00:00 2001 From: Tim Heap Date: Tue, 10 Jan 2023 10:55:47 +1100 Subject: [PATCH 2/4] Arguments to `box` must be numbers, not Decimal Changes to Shapely 2.0.0 mean the arguments to `shapely.geometry.box` can no longer be `decimal.Decimal`. They must be numpy-compatible scalar number types. --- src/emsarray/cli/utils.py | 6 +++--- tests/cli/test_utils.py | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/emsarray/cli/utils.py b/src/emsarray/cli/utils.py index c6e6858..4c297c5 100644 --- a/src/emsarray/cli/utils.py +++ b/src/emsarray/cli/utils.py @@ -287,8 +287,8 @@ def geometry_argument(argument_string: str) -> BaseGeometry: bounds_match = bounds_re.match(argument_string) if bounds_match is not None: try: - return box(*map(decimal.Decimal, bounds_match.groups())) - except (decimal.DecimalException, ValueError): + return box(*map(float, bounds_match.groups())) + except ValueError: pass try: @@ -340,7 +340,7 @@ def bounds_argument(bounds_string: str) -> BaseGeometry: match = bounds_re.match(bounds_string) if match is not None: try: - return box(*map(decimal.Decimal, match.groups())) + return box(*map(float, match.groups())) except decimal.DecimalException: pass raise argparse.ArgumentTypeError("Expecting four comma separated numbers for bounds") diff --git a/tests/cli/test_utils.py b/tests/cli/test_utils.py index b6ceaf8..bdc2f21 100644 --- a/tests/cli/test_utils.py +++ b/tests/cli/test_utils.py @@ -2,7 +2,6 @@ import argparse import json -from decimal import Decimal from pathlib import Path from typing import Any, List @@ -79,7 +78,7 @@ def test_add_verbosity_group(args: List[str], expected: int) -> None: def test_bounds_argument() -> None: - expected = box(Decimal('1.5'), Decimal('.2'), Decimal('3.'), Decimal('4')) + expected = box(1.5, .2, 3., 4) actual = utils.bounds_argument("1.5 , .2 , 3.,4") assert actual.equals(expected) @@ -91,7 +90,7 @@ def test_bounds_argument_invalid() -> None: def test_geometry_argument_bounds() -> None: - expected = box(Decimal('1.5'), Decimal('.2'), Decimal('3.'), Decimal('4')) + expected = box(1.5, .2, 3., 4) actual = utils.geometry_argument("1.5 , .2 , 3.,4") assert actual.equals(expected) From 6cb9d80915db4abfe401847b52787ab277ed4651 Mon Sep 17 00:00:00 2001 From: Tim Heap Date: Tue, 10 Jan 2023 10:58:57 +1100 Subject: [PATCH 3/4] Use `shapely.polygons` when constructing polygons This interface is drastically quicker than constructing polygons one-by-one. This has reduced the time taken to construct the `gbr4` polygons from ~6 seconds to ~0.1 seconds. Similar reductions apply to other datasets also. --- src/emsarray/conventions/arakawa_c.py | 46 ++++++-------- src/emsarray/conventions/grid.py | 88 +++++++++++++++------------ src/emsarray/conventions/ugrid.py | 18 ++++-- 3 files changed, 81 insertions(+), 71 deletions(-) diff --git a/src/emsarray/conventions/arakawa_c.py b/src/emsarray/conventions/arakawa_c.py index 3287cc6..379a141 100644 --- a/src/emsarray/conventions/arakawa_c.py +++ b/src/emsarray/conventions/arakawa_c.py @@ -14,9 +14,9 @@ from typing import Dict, Hashable, Optional, Tuple, cast import numpy as np +import shapely import xarray as xr from shapely.geometry.base import BaseGeometry -from shapely.geometry.polygon import Polygon, orient from xarray.core.dataset import DatasetCoordinates from emsarray import masking, utils @@ -285,33 +285,23 @@ def polygons(self) -> np.ndarray: x_node = self.node.longitude.values y_node = self.node.latitude.values - def cell(index: int) -> Polygon: - """ - Construct a single Polygon cell based on the 4 node points - surrounding given cell centre - """ - # 1D plus unravel index seems to be 10x faster than 2D indices - # or can also try and use the mask approach further below - but this - # seems to be fast enough - (_face, j, i) = self.unravel_index(index) - v1 = x_node[j, i], y_node[j, i] - v2 = x_node[j + 1, i], y_node[j + 1, i] - v3 = x_node[j + 1, i + 1], y_node[j + 1, i + 1] - v4 = x_node[j, i + 1], y_node[j, i + 1] - - points = [v1, v2, v3, v4, v1] - # Can't construct polygons if we don't have all the points - if np.isnan(points).any(): - return None - # There is no guarantee that the x or y dimensions are oriented in - # any particular direction, so the winding order of the polygon is - # not guaranteed. `orient` will fix this for us so we don't have to - # think about it. - return orient(Polygon(points)) - - # Make a polygon for each wet cell - polygons = list(map(cell, range(self.face.size))) - return np.array(polygons, dtype=object) + # Transform these in to point pairs + grid = np.stack([x_node, y_node], axis=-1) + # Make a new array of shape (topology.size, 4, 2) + rows = np.stack([ + grid[:-1, :-1], + grid[:-1, +1:], + grid[+1:, +1:], + grid[+1:, :-1], + ], axis=2).reshape((-1, 4, 2)) + + polygons = np.full(self.face.size, None, dtype=np.object_) + complete_row_indices = np.flatnonzero(np.isfinite(rows).all(axis=(1, 2))) + shapely.polygons( + rows[complete_row_indices], + indices=complete_row_indices, + out=polygons) + return polygons @cached_property def face_centres(self) -> np.ndarray: diff --git a/src/emsarray/conventions/grid.py b/src/emsarray/conventions/grid.py index 868f23d..bbc6a8a 100644 --- a/src/emsarray/conventions/grid.py +++ b/src/emsarray/conventions/grid.py @@ -7,7 +7,6 @@ import abc import enum import itertools -import logging import warnings from contextlib import suppress from functools import cached_property @@ -16,18 +15,15 @@ ) import numpy as np +import shapely import xarray as xr -from shapely.geometry import box from shapely.geometry.base import BaseGeometry -from shapely.geometry.polygon import Polygon, orient from emsarray import masking, utils from emsarray.types import Pathish from ._base import Convention, Specificity -logger = logging.getLogger(__name__) - class CFGridKind(str, enum.Enum): """""" @@ -360,18 +356,38 @@ def check_dataset(cls, dataset: xr.Dataset) -> Optional[int]: @cached_property def polygons(self) -> np.ndarray: - # Keep these as 2D so that we can easily map centre->grid indices - longitude_bounds = self.topology.longitude_bounds.values.tolist() - latitude_bounds = self.topology.latitude_bounds.values.tolist() - - def cell(index: int) -> Polygon: - y, x = self.unravel_index(index) - lon_min, lon_max = sorted(longitude_bounds[x]) - lat_min, lat_max = sorted(latitude_bounds[y]) - return box(lon_min, lat_min, lon_max, lat_max) - - # Make a polygon for each wet cell - return np.array([cell(index) for index in range(self.topology.size)], dtype=object) + lon_bounds = self.topology.longitude_bounds.values + lat_bounds = self.topology.latitude_bounds.values + if lon_bounds[0, 0] > lon_bounds[0, 1]: + lon_bounds = lon_bounds[:, ::-1] + if lat_bounds[0, 0] > lat_bounds[0, 1]: + lat_bounds = lat_bounds[:, ::-1] + + # For bounds arrays, + # b[i, 1] == b[i + 1, 0]) + # we can get the corners of each cell using this + lons = np.concatenate([lon_bounds[:, 0], lon_bounds[-1:, 1]]) + lats = np.concatenate([lat_bounds[:, 0], lat_bounds[-1:, 1]]) + # Transform these in to point pairs + grid = np.stack(np.meshgrid(lons, lats), axis=-1) + # Make a new array of shape (topology.size, 4, 2) + rows = np.stack([ + grid[:-1, +1:], + grid[+1:, +1:], + grid[+1:, :-1], + grid[:-1, :-1], + ], axis=2).reshape((-1, 4, 2)) + + polygons = np.full(self.topology.size, None, dtype=np.object_) + # There might be missing polygons, represented as nans. + # Shapely will throw an error on these. + # Find all the complete rows and only construct polygons for them. + complete_row_indices = np.flatnonzero(np.isfinite(rows).all(axis=(1, 2))) + shapely.polygons( + rows[complete_row_indices], + indices=complete_row_indices, + out=polygons) + return polygons @cached_property def face_centres(self) -> np.ndarray: @@ -466,27 +482,23 @@ def polygons(self) -> np.ndarray: for pad in itertools.product([(1, 0), (0, 1)], [(1, 0), (0, 1)]) ], axis=0) - shape = self.topology.shape - - def cell(index: int) -> Optional[Polygon]: - (j, i) = np.unravel_index(index, shape) - v1 = x_grid[j, i], y_grid[j, i] - v2 = x_grid[j, i + 1], y_grid[j, i + 1] - v3 = x_grid[j + 1, i + 1], y_grid[j + 1, i + 1] - v4 = x_grid[j + 1, i], y_grid[j + 1, i] - points = [v1, v2, v3, v4, v1] - # Can't construct polygons if we don't have all the points - if np.isnan(points).any(): - return None - # There is no guarantee that the x or y dimensions are oriented in - # any particular direction, so the winding order of the polygon is - # not guaranteed. `orient` will fix this for us so we don't have to - # think about it. - return orient(Polygon(points)) - - # Make a polygon for each wet cell - polygons = list(map(cell, range(self.topology.size))) - return np.array(polygons, dtype=object) + # Transform these in to point pairs + grid = np.stack([x_grid, y_grid], axis=-1) + # Make a new array of shape (topology.size, 4, 2) + rows = np.stack([ + grid[:-1, :-1], + grid[:-1, +1:], + grid[+1:, +1:], + grid[+1:, :-1], + ], axis=2).reshape((-1, 4, 2)) + + polygons = np.full(self.topology.size, None, dtype=np.object_) + complete_row_indices = np.flatnonzero(np.isfinite(rows).all(axis=(1, 2))) + shapely.polygons( + rows[complete_row_indices], + indices=complete_row_indices, + out=polygons) + return polygons @cached_property def face_centres(self) -> np.ndarray: diff --git a/src/emsarray/conventions/ugrid.py b/src/emsarray/conventions/ugrid.py index 9fcf48e..7d392a6 100644 --- a/src/emsarray/conventions/ugrid.py +++ b/src/emsarray/conventions/ugrid.py @@ -16,12 +16,13 @@ from dataclasses import dataclass from functools import cached_property from typing import ( - Any, Dict, FrozenSet, Hashable, Iterable, List, Optional, Set, Tuple, cast + Any, Dict, FrozenSet, Hashable, Iterable, List, Mapping, Optional, Set, + Tuple, cast ) import numpy as np +import shapely import xarray as xr -from shapely.geometry import Polygon from shapely.geometry.base import BaseGeometry from emsarray import utils @@ -1089,11 +1090,18 @@ def polygons(self) -> np.ndarray: node_y = topology.node_y.values face_node = topology.face_node_array - def create_poly(row: np.ma.MaskedArray) -> Polygon: + polygons = np.full(topology.face_count, None, dtype=np.object_) + polygons_of_size: Mapping[int, Dict[int, np.ndarray]] = defaultdict(dict) + + for index, row in enumerate(face_node): vertices = row.compressed() - return Polygon(np.c_[node_x[vertices], node_y[vertices]]) + polygons_of_size[vertices.size][index] = np.c_[node_x[vertices], node_y[vertices]] + + for size, size_polygons in polygons_of_size.items(): + coords = np.stack(list(size_polygons.values())) + shapely.polygons(coords, indices=list(size_polygons.keys()), out=polygons) - return np.array([create_poly(row) for row in face_node], dtype=object) + return polygons @cached_property def face_centres(self) -> np.ndarray: From c877caec28ddc0a1f2429a8aa24fc5ee69ce29c5 Mon Sep 17 00:00:00 2001 From: Tim Heap Date: Tue, 10 Jan 2023 11:01:16 +1100 Subject: [PATCH 4/4] Misc numpy array construction improvements --- src/emsarray/conventions/_base.py | 4 +++- tests/conventions/test_base.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/emsarray/conventions/_base.py b/src/emsarray/conventions/_base.py index 45ae10b..91a01c0 100644 --- a/src/emsarray/conventions/_base.py +++ b/src/emsarray/conventions/_base.py @@ -923,7 +923,9 @@ def mask(self) -> np.ndarray: -------- :meth:`Convention.make_linear` """ - mask = np.fromiter((p is not None for p in self.polygons), dtype=bool) + mask = np.fromiter( + (p is not None for p in self.polygons), + dtype=bool, count=self.polygons.size) return cast(np.ndarray, mask) @cached_property diff --git a/tests/conventions/test_base.py b/tests/conventions/test_base.py index d3cd70b..7699bc0 100644 --- a/tests/conventions/test_base.py +++ b/tests/conventions/test_base.py @@ -89,7 +89,7 @@ def polygons(self) -> np.ndarray: else None for y in range(self.shape[0]) for x in range(self.shape[1]) - ], dtype=object) + ], dtype=np.object_) def make_clip_mask( self,