Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add emsarray.utils.timed_func, set up loggers for plugins #79

Merged
merged 3 commits into from
Jun 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/releases/development.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ Next release (in development)
rather than :class:`~matplotlib.collections.PatchCollection`
for significant speed improvements
(:pr:`77`).
* Added :func:`emsarray.utils.timed_func` for easily logging some performance metrics (:pr:`79`).
6 changes: 2 additions & 4 deletions src/emsarray/cli/commands/export_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from emsarray.cli import BaseCommand, CommandException
from emsarray.operations import geometry
from emsarray.types import Pathish
from emsarray.utils import PerfTimer

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -70,9 +69,8 @@ def handle(self, options: argparse.Namespace) -> None:
output_format = self.guess_format(output_path)
logger.debug("Guessed output format as %r", output_format)

with PerfTimer() as t:
count = dataset.ems.polygons.size
logger.debug("Generated %d polygons in %f seconds", count, t.elapsed)
count = dataset.ems.polygons[dataset.ems.mask].size
logger.debug("Dataset contains %d polygons", count)

try:
writer = format_writers[output_format]
Expand Down
13 changes: 13 additions & 0 deletions src/emsarray/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from shapely.geometry import box, shape
from shapely.geometry.base import BaseGeometry

from emsarray.conventions._registry import entry_point_conventions

from .exceptions import CommandException

cli_logger = logging.getLogger('emsarray.cli')
Expand Down Expand Up @@ -210,6 +212,13 @@ def set_verbosity(level: int) -> None:
elif level >= 3:
level_str = 'DEBUG'

# Include logging handlers for all plugins
entry_point_convention_modules = sorted({
convention.__module__
for convention in entry_point_conventions()
if not convention.__module__.startswith('emsarray.')
})

logging.captureWarnings(True)
logging.config.dictConfig({
'version': 1,
Expand Down Expand Up @@ -239,6 +248,10 @@ def set_verbosity(level: int) -> None:
'emsarray.cli.errors': {
'handlers': ['error'], 'level': level_str, 'propagate': False
},
**{
module: {'handlers': ['console'], 'level': level_str}
for module in entry_point_convention_modules
},
'py.warnings': {'handlers': ['console'], 'level': 'WARNING'},
'__main__': {'handlers': ['console'], 'level': level_str},
'shapely': {'handlers': [], 'level': 60},
Expand Down
18 changes: 8 additions & 10 deletions src/emsarray/conventions/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,7 @@ def animate_on_figure(
return animate_on_figure(figure, self, coordinate=coordinate, **kwargs)

@_requires_plot
@utils.timed_func
def make_poly_collection(
self,
data_array: Optional[DataArrayOrName] = None,
Expand Down Expand Up @@ -941,6 +942,7 @@ def mask(self) -> np.ndarray:
return cast(np.ndarray, mask)

@cached_property
@utils.timed_func
def spatial_index(self) -> SpatialIndex[SpatialIndexItem[Index]]:
"""
A shapely :class:`strtree.STRtree` spatial index of all cells in this dataset.
Expand Down Expand Up @@ -970,16 +972,12 @@ def spatial_index(self) -> SpatialIndex[SpatialIndexItem[Index]]:
--------
:class:`.SpatialIndexItem`
"""
logger.info("Building spatial index...")
with utils.PerfTimer() as timer:
items = [
(poly, SpatialIndexItem(index, self.unravel_index(index), poly))
for index, poly in enumerate(self.polygons)
if poly is not None
]
spatial_index = SpatialIndex(items)
logger.debug("Build spatial index in %f seconds", timer.elapsed)
return spatial_index
items = [
(poly, SpatialIndexItem(index, self.unravel_index(index), poly))
for index, poly in enumerate(self.polygons)
if poly is not None
]
return SpatialIndex(items)

def get_index_for_point(
self,
Expand Down
1 change: 1 addition & 0 deletions src/emsarray/conventions/arakawa_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ def get_grid_kind_and_size(self, data_array: xr.DataArray) -> Tuple[ArakawaCGrid
raise ValueError("Data array did not match any known grids")

@cached_property
@utils.timed_func
def polygons(self) -> np.ndarray:
# Make an array of shape (j, i, 2) of all the nodes
grid = np.stack([self.node.longitude.values, self.node.latitude.values], axis=-1)
Expand Down
2 changes: 2 additions & 0 deletions src/emsarray/conventions/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,7 @@ def check_dataset(cls, dataset: xr.Dataset) -> Optional[int]:
return Specificity.LOW

@cached_property
@utils.timed_func
def polygons(self) -> np.ndarray:
lon_bounds = self.topology.longitude_bounds.values
lat_bounds = self.topology.latitude_bounds.values
Expand Down Expand Up @@ -536,6 +537,7 @@ def check_dataset(cls, dataset: xr.Dataset) -> Optional[int]:
return Specificity.LOW

@cached_property
@utils.timed_func
def polygons(self) -> np.ndarray:
# Construct polygons from the bounds of the cells
lon_bounds = self.topology.longitude_bounds.values
Expand Down
139 changes: 64 additions & 75 deletions src/emsarray/conventions/ugrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,26 +579,25 @@ def edge_node_array(self) -> np.ndarray:
return self._to_index_array(
self.edge_node_connectivity, self.edge_dimension)

logger.info("Building edge_node_array")
with utils.PerfTimer() as timer:
# Each edge is composed of two nodes. Each edge may be named twice,
# once for each face. To de-duplicate this, edges are built up using
# this dict-of-sets, where the dict index is the node with the
# lower index, and the set is the node indices of the other end.
low_highs: Dict[int, Set[int]] = defaultdict(set)

for face_index, node_pairs in self._face_and_node_pair_iter():
for pair in node_pairs:
low, high = sorted(pair)
low_highs[low].add(high)
edge_node = np.array([
[low, high]
for low, highs in low_highs.items()
for high in highs
], dtype=self.sensible_dtype)
logger.debug("Built edge_node_array in %f seconds", timer.elapsed)

return edge_node
return self.make_edge_node_array()

@utils.timed_func
def make_edge_node_array(self) -> np.ndarray:
# Each edge is composed of two nodes. Each edge may be named twice,
# once for each face. To de-duplicate this, edges are built up using
# this dict-of-sets, where the dict index is the node with the
# lower index, and the set is the node indices of the other end.
low_highs: Dict[int, Set[int]] = defaultdict(set)

for face_index, node_pairs in self._face_and_node_pair_iter():
for pair in node_pairs:
low, high = sorted(pair)
low_highs[low].add(high)
return np.array([
[low, high]
for low, highs in low_highs.items()
for high in highs
], dtype=self.sensible_dtype)

@cached_property
def has_valid_edge_face_connectivity(self) -> bool:
Expand Down Expand Up @@ -650,26 +649,22 @@ def edge_face_array(self) -> np.ndarray:
return self._to_index_array(
self.edge_face_connectivity, self.edge_dimension)

# Access these outside of the timer below
fill_value = self.sensible_fill_value
face_edge = self.face_edge_array
return self.make_edge_face_array()

# Build an edge_face_connectivity matrix
logger.info("Building edge_face_array")
with utils.PerfTimer() as timer:
# The edge_face connectivity matrix
shape = (self.edge_count, 2)
filled = np.full(shape, fill_value, dtype=self.sensible_dtype)
edge_face: np.ndarray = np.ma.masked_array(filled, mask=True)
@utils.timed_func
def make_edge_face_array(self) -> np.ndarray:
# The edge_face connectivity matrix
shape = (self.edge_count, 2)
filled = np.full(shape, self.sensible_fill_value, dtype=self.sensible_dtype)
edge_face: np.ndarray = np.ma.masked_array(filled, mask=True)

# The number of faces already seen for this edge
edge_face_count = np.zeros(self.edge_count, dtype=self.sensible_dtype)
# The number of faces already seen for this edge
edge_face_count = np.zeros(self.edge_count, dtype=self.sensible_dtype)

for face_index, edge_indices in enumerate(face_edge):
for edge_index in edge_indices.compressed():
edge_face[edge_index, edge_face_count[edge_index]] = face_index
edge_face_count[edge_index] += 1
logger.debug("Built edge_face_array in %f seconds", timer.elapsed)
for face_index, edge_indices in enumerate(self.face_edge_array):
for edge_index in edge_indices.compressed():
edge_face[edge_index, edge_face_count[edge_index]] = face_index
edge_face_count[edge_index] += 1

return edge_face

Expand Down Expand Up @@ -763,27 +758,24 @@ def face_edge_array(self) -> np.ndarray:
return self._to_index_array(
self.face_edge_connectivity, self.face_dimension)

# Access these outside of the timer below
fill_value = self.sensible_fill_value
edge_node = self.edge_node_array
return self.make_face_edge_array()

@utils.timed_func
def make_face_edge_array(self) -> np.ndarray:
# Build a face_edge_connectivity matrix
logger.info("Building face_edge_array")
with utils.PerfTimer() as timer:
shape = (self.face_count, self.max_node_count)
filled = np.full(shape, fill_value, dtype=self.sensible_dtype)
face_edge: np.ndarray = np.ma.masked_array(filled, mask=True)

node_pair_to_edge_index = {
frozenset(edge): edge_index
for edge_index, edge in enumerate(edge_node)
}

for face_index, node_pairs in self._face_and_node_pair_iter():
for column, node_pair in enumerate(node_pairs):
edge_index = node_pair_to_edge_index[frozenset(node_pair)]
face_edge[face_index, column] = edge_index
logger.debug("Built face_edge_array in %f seconds", timer.elapsed)
shape = (self.face_count, self.max_node_count)
filled = np.full(shape, self.sensible_fill_value, dtype=self.sensible_dtype)
face_edge: np.ndarray = np.ma.masked_array(filled, mask=True)

node_pair_to_edge_index = {
frozenset(edge): edge_index
for edge_index, edge in enumerate(self.edge_node_array)
}

for face_index, node_pairs in self._face_and_node_pair_iter():
for column, node_pair in enumerate(node_pairs):
edge_index = node_pair_to_edge_index[frozenset(node_pair)]
face_edge[face_index, column] = edge_index

return face_edge

Expand Down Expand Up @@ -834,27 +826,23 @@ def face_face_array(self) -> np.ndarray:
return self._to_index_array(
self.face_face_connectivity, self.face_dimension)

# Access these outside of the timer below
fill_value = self.sensible_fill_value
edge_face = self.edge_face_array
return self.make_face_face_array()

def make_face_face_array(self) -> np.ndarray:
# Build a face_face_connectivity matrix
logger.info("Building face_face_array")
with utils.PerfTimer() as timer:
face_count = np.zeros(self.face_count, dtype=self.sensible_dtype)
shape = (self.face_count, self.max_node_count)
filled = np.full(shape, fill_value, dtype=self.sensible_dtype)
face_face: np.ndarray = np.ma.masked_array(filled, mask=True)

for edge_index, face_indices in enumerate(edge_face):
if np.any(np.ma.getmask(face_indices)):
continue
left, right = face_indices
face_face[left, face_count[left]] = right
face_face[right, face_count[right]] = left
face_count[left] += 1
face_count[right] += 1
logger.debug("Built face_face_array in %f seconds", timer.elapsed)
face_count = np.zeros(self.face_count, dtype=self.sensible_dtype)
shape = (self.face_count, self.max_node_count)
filled = np.full(shape, self.sensible_fill_value, dtype=self.sensible_dtype)
face_face: np.ndarray = np.ma.masked_array(filled, mask=True)

for edge_index, face_indices in enumerate(self.edge_face_array):
if np.any(np.ma.getmask(face_indices)):
continue
left, right = face_indices
face_face[left, face_count[left]] = right
face_face[right, face_count[right]] = left
face_count[left] += 1
face_count[right] += 1

return face_face

Expand Down Expand Up @@ -1083,6 +1071,7 @@ def get_grid_kind_and_size(
raise ValueError("Data array did not have any face, edge, or node dimension")

@cached_property
@utils.timed_func
def polygons(self) -> np.ndarray:
"""Generate list of Polygons"""
# X,Y coords of each node
Expand Down
35 changes: 35 additions & 0 deletions src/emsarray/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,41 @@ def elapsed(self) -> float:
return self._stop - self._start


def timed_func(fn: Callable[..., _T]) -> Callable[..., _T]:
"""
Log the execution time of the decorated function.
Logs "Calling ``<func.__qualname__>``" before the wrapped function is called,
and "Completed ``<func.__qualname__>`` in ``<time>``s" after.
The name of the logger is taken from ``func.__module__``.

Example
-------

.. code-block:: python

class Grass(Convention):
@cached_property
@timed_func
def polygons(self):
return ...

When called, this will log something like::

DEBUG Calling Grass.polygons
DEBUG Completed Grass.polygons in 3.14s
"""
fn_logger = logging.getLogger(fn.__module__)

@functools.wraps(fn)
def wrapper(*args: Any, **kwargs: Any) -> _T:
fn_logger.debug("Calling %s", fn.__qualname__)
with PerfTimer() as timer:
value = fn(*args, **kwargs)
fn_logger.debug("Completed %s in %fs", fn.__qualname__, timer.elapsed)
return value
return wrapper


def to_netcdf_with_fixes(
dataset: xr.Dataset,
path: Pathish,
Expand Down