Skip to content

Commit

Permalink
Merge pull request #79 from csiro-coasts/timed_func
Browse files Browse the repository at this point in the history
Add `emsarray.utils.timed_func`, set up loggers for plugins
  • Loading branch information
mx-moth authored Jun 26, 2023
2 parents 59c2cc6 + d359fac commit d4b1bbf
Show file tree
Hide file tree
Showing 8 changed files with 126 additions and 89 deletions.
1 change: 1 addition & 0 deletions docs/releases/development.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ Next release (in development)
rather than :class:`~matplotlib.collections.PatchCollection`
for significant speed improvements
(:pr:`77`).
* Added :func:`emsarray.utils.timed_func` for easily logging some performance metrics (:pr:`79`).
6 changes: 2 additions & 4 deletions src/emsarray/cli/commands/export_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from emsarray.cli import BaseCommand, CommandException
from emsarray.operations import geometry
from emsarray.types import Pathish
from emsarray.utils import PerfTimer

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -70,9 +69,8 @@ def handle(self, options: argparse.Namespace) -> None:
output_format = self.guess_format(output_path)
logger.debug("Guessed output format as %r", output_format)

with PerfTimer() as t:
count = dataset.ems.polygons.size
logger.debug("Generated %d polygons in %f seconds", count, t.elapsed)
count = dataset.ems.polygons[dataset.ems.mask].size
logger.debug("Dataset contains %d polygons", count)

try:
writer = format_writers[output_format]
Expand Down
13 changes: 13 additions & 0 deletions src/emsarray/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from shapely.geometry import box, shape
from shapely.geometry.base import BaseGeometry

from emsarray.conventions._registry import entry_point_conventions

from .exceptions import CommandException

cli_logger = logging.getLogger('emsarray.cli')
Expand Down Expand Up @@ -210,6 +212,13 @@ def set_verbosity(level: int) -> None:
elif level >= 3:
level_str = 'DEBUG'

# Include logging handlers for all plugins
entry_point_convention_modules = sorted({
convention.__module__
for convention in entry_point_conventions()
if not convention.__module__.startswith('emsarray.')
})

logging.captureWarnings(True)
logging.config.dictConfig({
'version': 1,
Expand Down Expand Up @@ -239,6 +248,10 @@ def set_verbosity(level: int) -> None:
'emsarray.cli.errors': {
'handlers': ['error'], 'level': level_str, 'propagate': False
},
**{
module: {'handlers': ['console'], 'level': level_str}
for module in entry_point_convention_modules
},
'py.warnings': {'handlers': ['console'], 'level': 'WARNING'},
'__main__': {'handlers': ['console'], 'level': level_str},
'shapely': {'handlers': [], 'level': 60},
Expand Down
18 changes: 8 additions & 10 deletions src/emsarray/conventions/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,7 @@ def animate_on_figure(
return animate_on_figure(figure, self, coordinate=coordinate, **kwargs)

@_requires_plot
@utils.timed_func
def make_poly_collection(
self,
data_array: Optional[DataArrayOrName] = None,
Expand Down Expand Up @@ -941,6 +942,7 @@ def mask(self) -> np.ndarray:
return cast(np.ndarray, mask)

@cached_property
@utils.timed_func
def spatial_index(self) -> SpatialIndex[SpatialIndexItem[Index]]:
"""
A shapely :class:`strtree.STRtree` spatial index of all cells in this dataset.
Expand Down Expand Up @@ -970,16 +972,12 @@ def spatial_index(self) -> SpatialIndex[SpatialIndexItem[Index]]:
--------
:class:`.SpatialIndexItem`
"""
logger.info("Building spatial index...")
with utils.PerfTimer() as timer:
items = [
(poly, SpatialIndexItem(index, self.unravel_index(index), poly))
for index, poly in enumerate(self.polygons)
if poly is not None
]
spatial_index = SpatialIndex(items)
logger.debug("Build spatial index in %f seconds", timer.elapsed)
return spatial_index
items = [
(poly, SpatialIndexItem(index, self.unravel_index(index), poly))
for index, poly in enumerate(self.polygons)
if poly is not None
]
return SpatialIndex(items)

def get_index_for_point(
self,
Expand Down
1 change: 1 addition & 0 deletions src/emsarray/conventions/arakawa_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ def get_grid_kind_and_size(self, data_array: xr.DataArray) -> Tuple[ArakawaCGrid
raise ValueError("Data array did not match any known grids")

@cached_property
@utils.timed_func
def polygons(self) -> np.ndarray:
# Make an array of shape (j, i, 2) of all the nodes
grid = np.stack([self.node.longitude.values, self.node.latitude.values], axis=-1)
Expand Down
2 changes: 2 additions & 0 deletions src/emsarray/conventions/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,7 @@ def check_dataset(cls, dataset: xr.Dataset) -> Optional[int]:
return Specificity.LOW

@cached_property
@utils.timed_func
def polygons(self) -> np.ndarray:
lon_bounds = self.topology.longitude_bounds.values
lat_bounds = self.topology.latitude_bounds.values
Expand Down Expand Up @@ -536,6 +537,7 @@ def check_dataset(cls, dataset: xr.Dataset) -> Optional[int]:
return Specificity.LOW

@cached_property
@utils.timed_func
def polygons(self) -> np.ndarray:
# Construct polygons from the bounds of the cells
lon_bounds = self.topology.longitude_bounds.values
Expand Down
139 changes: 64 additions & 75 deletions src/emsarray/conventions/ugrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,26 +579,25 @@ def edge_node_array(self) -> np.ndarray:
return self._to_index_array(
self.edge_node_connectivity, self.edge_dimension)

logger.info("Building edge_node_array")
with utils.PerfTimer() as timer:
# Each edge is composed of two nodes. Each edge may be named twice,
# once for each face. To de-duplicate this, edges are built up using
# this dict-of-sets, where the dict index is the node with the
# lower index, and the set is the node indices of the other end.
low_highs: Dict[int, Set[int]] = defaultdict(set)

for face_index, node_pairs in self._face_and_node_pair_iter():
for pair in node_pairs:
low, high = sorted(pair)
low_highs[low].add(high)
edge_node = np.array([
[low, high]
for low, highs in low_highs.items()
for high in highs
], dtype=self.sensible_dtype)
logger.debug("Built edge_node_array in %f seconds", timer.elapsed)

return edge_node
return self.make_edge_node_array()

@utils.timed_func
def make_edge_node_array(self) -> np.ndarray:
# Each edge is composed of two nodes. Each edge may be named twice,
# once for each face. To de-duplicate this, edges are built up using
# this dict-of-sets, where the dict index is the node with the
# lower index, and the set is the node indices of the other end.
low_highs: Dict[int, Set[int]] = defaultdict(set)

for face_index, node_pairs in self._face_and_node_pair_iter():
for pair in node_pairs:
low, high = sorted(pair)
low_highs[low].add(high)
return np.array([
[low, high]
for low, highs in low_highs.items()
for high in highs
], dtype=self.sensible_dtype)

@cached_property
def has_valid_edge_face_connectivity(self) -> bool:
Expand Down Expand Up @@ -650,26 +649,22 @@ def edge_face_array(self) -> np.ndarray:
return self._to_index_array(
self.edge_face_connectivity, self.edge_dimension)

# Access these outside of the timer below
fill_value = self.sensible_fill_value
face_edge = self.face_edge_array
return self.make_edge_face_array()

# Build an edge_face_connectivity matrix
logger.info("Building edge_face_array")
with utils.PerfTimer() as timer:
# The edge_face connectivity matrix
shape = (self.edge_count, 2)
filled = np.full(shape, fill_value, dtype=self.sensible_dtype)
edge_face: np.ndarray = np.ma.masked_array(filled, mask=True)
@utils.timed_func
def make_edge_face_array(self) -> np.ndarray:
# The edge_face connectivity matrix
shape = (self.edge_count, 2)
filled = np.full(shape, self.sensible_fill_value, dtype=self.sensible_dtype)
edge_face: np.ndarray = np.ma.masked_array(filled, mask=True)

# The number of faces already seen for this edge
edge_face_count = np.zeros(self.edge_count, dtype=self.sensible_dtype)
# The number of faces already seen for this edge
edge_face_count = np.zeros(self.edge_count, dtype=self.sensible_dtype)

for face_index, edge_indices in enumerate(face_edge):
for edge_index in edge_indices.compressed():
edge_face[edge_index, edge_face_count[edge_index]] = face_index
edge_face_count[edge_index] += 1
logger.debug("Built edge_face_array in %f seconds", timer.elapsed)
for face_index, edge_indices in enumerate(self.face_edge_array):
for edge_index in edge_indices.compressed():
edge_face[edge_index, edge_face_count[edge_index]] = face_index
edge_face_count[edge_index] += 1

return edge_face

Expand Down Expand Up @@ -763,27 +758,24 @@ def face_edge_array(self) -> np.ndarray:
return self._to_index_array(
self.face_edge_connectivity, self.face_dimension)

# Access these outside of the timer below
fill_value = self.sensible_fill_value
edge_node = self.edge_node_array
return self.make_face_edge_array()

@utils.timed_func
def make_face_edge_array(self) -> np.ndarray:
# Build a face_edge_connectivity matrix
logger.info("Building face_edge_array")
with utils.PerfTimer() as timer:
shape = (self.face_count, self.max_node_count)
filled = np.full(shape, fill_value, dtype=self.sensible_dtype)
face_edge: np.ndarray = np.ma.masked_array(filled, mask=True)

node_pair_to_edge_index = {
frozenset(edge): edge_index
for edge_index, edge in enumerate(edge_node)
}

for face_index, node_pairs in self._face_and_node_pair_iter():
for column, node_pair in enumerate(node_pairs):
edge_index = node_pair_to_edge_index[frozenset(node_pair)]
face_edge[face_index, column] = edge_index
logger.debug("Built face_edge_array in %f seconds", timer.elapsed)
shape = (self.face_count, self.max_node_count)
filled = np.full(shape, self.sensible_fill_value, dtype=self.sensible_dtype)
face_edge: np.ndarray = np.ma.masked_array(filled, mask=True)

node_pair_to_edge_index = {
frozenset(edge): edge_index
for edge_index, edge in enumerate(self.edge_node_array)
}

for face_index, node_pairs in self._face_and_node_pair_iter():
for column, node_pair in enumerate(node_pairs):
edge_index = node_pair_to_edge_index[frozenset(node_pair)]
face_edge[face_index, column] = edge_index

return face_edge

Expand Down Expand Up @@ -834,27 +826,23 @@ def face_face_array(self) -> np.ndarray:
return self._to_index_array(
self.face_face_connectivity, self.face_dimension)

# Access these outside of the timer below
fill_value = self.sensible_fill_value
edge_face = self.edge_face_array
return self.make_face_face_array()

def make_face_face_array(self) -> np.ndarray:
# Build a face_face_connectivity matrix
logger.info("Building face_face_array")
with utils.PerfTimer() as timer:
face_count = np.zeros(self.face_count, dtype=self.sensible_dtype)
shape = (self.face_count, self.max_node_count)
filled = np.full(shape, fill_value, dtype=self.sensible_dtype)
face_face: np.ndarray = np.ma.masked_array(filled, mask=True)

for edge_index, face_indices in enumerate(edge_face):
if np.any(np.ma.getmask(face_indices)):
continue
left, right = face_indices
face_face[left, face_count[left]] = right
face_face[right, face_count[right]] = left
face_count[left] += 1
face_count[right] += 1
logger.debug("Built face_face_array in %f seconds", timer.elapsed)
face_count = np.zeros(self.face_count, dtype=self.sensible_dtype)
shape = (self.face_count, self.max_node_count)
filled = np.full(shape, self.sensible_fill_value, dtype=self.sensible_dtype)
face_face: np.ndarray = np.ma.masked_array(filled, mask=True)

for edge_index, face_indices in enumerate(self.edge_face_array):
if np.any(np.ma.getmask(face_indices)):
continue
left, right = face_indices
face_face[left, face_count[left]] = right
face_face[right, face_count[right]] = left
face_count[left] += 1
face_count[right] += 1

return face_face

Expand Down Expand Up @@ -1083,6 +1071,7 @@ def get_grid_kind_and_size(
raise ValueError("Data array did not have any face, edge, or node dimension")

@cached_property
@utils.timed_func
def polygons(self) -> np.ndarray:
"""Generate list of Polygons"""
# X,Y coords of each node
Expand Down
35 changes: 35 additions & 0 deletions src/emsarray/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,41 @@ def elapsed(self) -> float:
return self._stop - self._start


def timed_func(fn: Callable[..., _T]) -> Callable[..., _T]:
"""
Log the execution time of the decorated function.
Logs "Calling ``<func.__qualname__>``" before the wrapped function is called,
and "Completed ``<func.__qualname__>`` in ``<time>``s" after.
The name of the logger is taken from ``func.__module__``.
Example
-------
.. code-block:: python
class Grass(Convention):
@cached_property
@timed_func
def polygons(self):
return ...
When called, this will log something like::
DEBUG Calling Grass.polygons
DEBUG Completed Grass.polygons in 3.14s
"""
fn_logger = logging.getLogger(fn.__module__)

@functools.wraps(fn)
def wrapper(*args: Any, **kwargs: Any) -> _T:
fn_logger.debug("Calling %s", fn.__qualname__)
with PerfTimer() as timer:
value = fn(*args, **kwargs)
fn_logger.debug("Completed %s in %fs", fn.__qualname__, timer.elapsed)
return value
return wrapper


def to_netcdf_with_fixes(
dataset: xr.Dataset,
path: Pathish,
Expand Down

0 comments on commit d4b1bbf

Please sign in to comment.