Skip to content

Commit

Permalink
Merge pull request #47 from csiro-coasts/shapely-2
Browse files Browse the repository at this point in the history
Faster polygon construction using Shapely 2.0.0
  • Loading branch information
mx-moth authored Jan 10, 2023
2 parents 118be3f + c877cae commit 3d4c767
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 83 deletions.
5 changes: 3 additions & 2 deletions continuous-integration/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ bokeh==2.4.3
# via dask
bottleneck==1.3.5
# via emsarray (setup.cfg)
cartopy==0.21.0
cartopy==0.21.1
# via emsarray (setup.cfg)
certifi==2022.9.24
# via
Expand Down Expand Up @@ -119,6 +119,7 @@ numpy==1.23.3
# netcdf4
# pandas
# pykdtree
# shapely
# xarray
packaging==21.3
# via
Expand Down Expand Up @@ -203,7 +204,7 @@ requests==2.28.1
# via
# pooch
# sphinx
shapely==1.8.4
shapely==2.0.0
# via
# cartopy
# emsarray (setup.cfg)
Expand Down
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ install_requires =
netcdf4 >=1.5.3
numpy >=1.18.0
packaging >=21.3
shapely >=1.8.0
shapely >=2.0.0
pyshp >=2.3.0
xarray[parallel] >=0.18.2

Expand All @@ -38,7 +38,7 @@ include = emsarray*

[options.extras_require]
plot =
cartopy >=0.20.0
cartopy >=0.21.1
matplotlib >=3.4.3
pykdtree >=1.2.2

Expand Down
6 changes: 3 additions & 3 deletions src/emsarray/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,8 @@ def geometry_argument(argument_string: str) -> BaseGeometry:
bounds_match = bounds_re.match(argument_string)
if bounds_match is not None:
try:
return box(*map(decimal.Decimal, bounds_match.groups()))
except (decimal.DecimalException, ValueError):
return box(*map(float, bounds_match.groups()))
except ValueError:
pass

try:
Expand Down Expand Up @@ -340,7 +340,7 @@ def bounds_argument(bounds_string: str) -> BaseGeometry:
match = bounds_re.match(bounds_string)
if match is not None:
try:
return box(*map(decimal.Decimal, match.groups()))
return box(*map(float, match.groups()))
except decimal.DecimalException:
pass
raise argparse.ArgumentTypeError("Expecting four comma separated numbers for bounds")
4 changes: 3 additions & 1 deletion src/emsarray/conventions/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -923,7 +923,9 @@ def mask(self) -> np.ndarray:
--------
:meth:`Convention.make_linear`
"""
mask = np.fromiter((p is not None for p in self.polygons), dtype=bool)
mask = np.fromiter(
(p is not None for p in self.polygons),
dtype=bool, count=self.polygons.size)
return cast(np.ndarray, mask)

@cached_property
Expand Down
46 changes: 18 additions & 28 deletions src/emsarray/conventions/arakawa_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
from typing import Dict, Hashable, Optional, Tuple, cast

import numpy as np
import shapely
import xarray as xr
from shapely.geometry.base import BaseGeometry
from shapely.geometry.polygon import Polygon, orient
from xarray.core.dataset import DatasetCoordinates

from emsarray import masking, utils
Expand Down Expand Up @@ -285,33 +285,23 @@ def polygons(self) -> np.ndarray:
x_node = self.node.longitude.values
y_node = self.node.latitude.values

def cell(index: int) -> Polygon:
"""
Construct a single Polygon cell based on the 4 node points
surrounding given cell centre
"""
# 1D plus unravel index seems to be 10x faster than 2D indices
# or can also try and use the mask approach further below - but this
# seems to be fast enough
(_face, j, i) = self.unravel_index(index)
v1 = x_node[j, i], y_node[j, i]
v2 = x_node[j + 1, i], y_node[j + 1, i]
v3 = x_node[j + 1, i + 1], y_node[j + 1, i + 1]
v4 = x_node[j, i + 1], y_node[j, i + 1]

points = [v1, v2, v3, v4, v1]
# Can't construct polygons if we don't have all the points
if np.isnan(points).any():
return None
# There is no guarantee that the x or y dimensions are oriented in
# any particular direction, so the winding order of the polygon is
# not guaranteed. `orient` will fix this for us so we don't have to
# think about it.
return orient(Polygon(points))

# Make a polygon for each wet cell
polygons = list(map(cell, range(self.face.size)))
return np.array(polygons, dtype=object)
# Transform these in to point pairs
grid = np.stack([x_node, y_node], axis=-1)
# Make a new array of shape (topology.size, 4, 2)
rows = np.stack([
grid[:-1, :-1],
grid[:-1, +1:],
grid[+1:, +1:],
grid[+1:, :-1],
], axis=2).reshape((-1, 4, 2))

polygons = np.full(self.face.size, None, dtype=np.object_)
complete_row_indices = np.flatnonzero(np.isfinite(rows).all(axis=(1, 2)))
shapely.polygons(
rows[complete_row_indices],
indices=complete_row_indices,
out=polygons)
return polygons

@cached_property
def face_centres(self) -> np.ndarray:
Expand Down
88 changes: 50 additions & 38 deletions src/emsarray/conventions/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import abc
import enum
import itertools
import logging
import warnings
from contextlib import suppress
from functools import cached_property
Expand All @@ -16,18 +15,15 @@
)

import numpy as np
import shapely
import xarray as xr
from shapely.geometry import box
from shapely.geometry.base import BaseGeometry
from shapely.geometry.polygon import Polygon, orient

from emsarray import masking, utils
from emsarray.types import Pathish

from ._base import Convention, Specificity

logger = logging.getLogger(__name__)


class CFGridKind(str, enum.Enum):
""""""
Expand Down Expand Up @@ -360,18 +356,38 @@ def check_dataset(cls, dataset: xr.Dataset) -> Optional[int]:

@cached_property
def polygons(self) -> np.ndarray:
# Keep these as 2D so that we can easily map centre->grid indices
longitude_bounds = self.topology.longitude_bounds.values.tolist()
latitude_bounds = self.topology.latitude_bounds.values.tolist()

def cell(index: int) -> Polygon:
y, x = self.unravel_index(index)
lon_min, lon_max = sorted(longitude_bounds[x])
lat_min, lat_max = sorted(latitude_bounds[y])
return box(lon_min, lat_min, lon_max, lat_max)

# Make a polygon for each wet cell
return np.array([cell(index) for index in range(self.topology.size)], dtype=object)
lon_bounds = self.topology.longitude_bounds.values
lat_bounds = self.topology.latitude_bounds.values
if lon_bounds[0, 0] > lon_bounds[0, 1]:
lon_bounds = lon_bounds[:, ::-1]
if lat_bounds[0, 0] > lat_bounds[0, 1]:
lat_bounds = lat_bounds[:, ::-1]

# For bounds arrays,
# b[i, 1] == b[i + 1, 0])
# we can get the corners of each cell using this
lons = np.concatenate([lon_bounds[:, 0], lon_bounds[-1:, 1]])
lats = np.concatenate([lat_bounds[:, 0], lat_bounds[-1:, 1]])
# Transform these in to point pairs
grid = np.stack(np.meshgrid(lons, lats), axis=-1)
# Make a new array of shape (topology.size, 4, 2)
rows = np.stack([
grid[:-1, +1:],
grid[+1:, +1:],
grid[+1:, :-1],
grid[:-1, :-1],
], axis=2).reshape((-1, 4, 2))

polygons = np.full(self.topology.size, None, dtype=np.object_)
# There might be missing polygons, represented as nans.
# Shapely will throw an error on these.
# Find all the complete rows and only construct polygons for them.
complete_row_indices = np.flatnonzero(np.isfinite(rows).all(axis=(1, 2)))
shapely.polygons(
rows[complete_row_indices],
indices=complete_row_indices,
out=polygons)
return polygons

@cached_property
def face_centres(self) -> np.ndarray:
Expand Down Expand Up @@ -466,27 +482,23 @@ def polygons(self) -> np.ndarray:
for pad in itertools.product([(1, 0), (0, 1)], [(1, 0), (0, 1)])
], axis=0)

shape = self.topology.shape

def cell(index: int) -> Optional[Polygon]:
(j, i) = np.unravel_index(index, shape)
v1 = x_grid[j, i], y_grid[j, i]
v2 = x_grid[j, i + 1], y_grid[j, i + 1]
v3 = x_grid[j + 1, i + 1], y_grid[j + 1, i + 1]
v4 = x_grid[j + 1, i], y_grid[j + 1, i]
points = [v1, v2, v3, v4, v1]
# Can't construct polygons if we don't have all the points
if np.isnan(points).any():
return None
# There is no guarantee that the x or y dimensions are oriented in
# any particular direction, so the winding order of the polygon is
# not guaranteed. `orient` will fix this for us so we don't have to
# think about it.
return orient(Polygon(points))

# Make a polygon for each wet cell
polygons = list(map(cell, range(self.topology.size)))
return np.array(polygons, dtype=object)
# Transform these in to point pairs
grid = np.stack([x_grid, y_grid], axis=-1)
# Make a new array of shape (topology.size, 4, 2)
rows = np.stack([
grid[:-1, :-1],
grid[:-1, +1:],
grid[+1:, +1:],
grid[+1:, :-1],
], axis=2).reshape((-1, 4, 2))

polygons = np.full(self.topology.size, None, dtype=np.object_)
complete_row_indices = np.flatnonzero(np.isfinite(rows).all(axis=(1, 2)))
shapely.polygons(
rows[complete_row_indices],
indices=complete_row_indices,
out=polygons)
return polygons

@cached_property
def face_centres(self) -> np.ndarray:
Expand Down
18 changes: 13 additions & 5 deletions src/emsarray/conventions/ugrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@
from dataclasses import dataclass
from functools import cached_property
from typing import (
Any, Dict, FrozenSet, Hashable, Iterable, List, Optional, Set, Tuple, cast
Any, Dict, FrozenSet, Hashable, Iterable, List, Mapping, Optional, Set,
Tuple, cast
)

import numpy as np
import shapely
import xarray as xr
from shapely.geometry import Polygon
from shapely.geometry.base import BaseGeometry

from emsarray import utils
Expand Down Expand Up @@ -1089,11 +1090,18 @@ def polygons(self) -> np.ndarray:
node_y = topology.node_y.values
face_node = topology.face_node_array

def create_poly(row: np.ma.MaskedArray) -> Polygon:
polygons = np.full(topology.face_count, None, dtype=np.object_)
polygons_of_size: Mapping[int, Dict[int, np.ndarray]] = defaultdict(dict)

for index, row in enumerate(face_node):
vertices = row.compressed()
return Polygon(np.c_[node_x[vertices], node_y[vertices]])
polygons_of_size[vertices.size][index] = np.c_[node_x[vertices], node_y[vertices]]

for size, size_polygons in polygons_of_size.items():
coords = np.stack(list(size_polygons.values()))
shapely.polygons(coords, indices=list(size_polygons.keys()), out=polygons)

return np.array([create_poly(row) for row in face_node], dtype=object)
return polygons

@cached_property
def face_centres(self) -> np.ndarray:
Expand Down
5 changes: 2 additions & 3 deletions tests/cli/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import argparse
import json
from decimal import Decimal
from pathlib import Path
from typing import Any, List

Expand Down Expand Up @@ -79,7 +78,7 @@ def test_add_verbosity_group(args: List[str], expected: int) -> None:


def test_bounds_argument() -> None:
expected = box(Decimal('1.5'), Decimal('.2'), Decimal('3.'), Decimal('4'))
expected = box(1.5, .2, 3., 4)
actual = utils.bounds_argument("1.5 , .2 , 3.,4")

assert actual.equals(expected)
Expand All @@ -91,7 +90,7 @@ def test_bounds_argument_invalid() -> None:


def test_geometry_argument_bounds() -> None:
expected = box(Decimal('1.5'), Decimal('.2'), Decimal('3.'), Decimal('4'))
expected = box(1.5, .2, 3., 4)
actual = utils.geometry_argument("1.5 , .2 , 3.,4")

assert actual.equals(expected)
Expand Down
2 changes: 1 addition & 1 deletion tests/conventions/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def polygons(self) -> np.ndarray:
else None
for y in range(self.shape[0])
for x in range(self.shape[1])
], dtype=object)
], dtype=np.object_)

def make_clip_mask(
self,
Expand Down

0 comments on commit 3d4c767

Please sign in to comment.