From 79bee464c687c6bae975f602ea793e982042c3c8 Mon Sep 17 00:00:00 2001 From: Tim Heap Date: Mon, 1 May 2023 12:32:33 +1000 Subject: [PATCH 1/3] Add `emsarray.utils.timed_func` Used as a decorator, it will log the execution time of the wrapped function. --- src/emsarray/cli/commands/export_geometry.py | 6 +- src/emsarray/conventions/_base.py | 18 ++- src/emsarray/conventions/arakawa_c.py | 1 + src/emsarray/conventions/grid.py | 2 + src/emsarray/conventions/ugrid.py | 139 +++++++++---------- src/emsarray/utils.py | 19 +++ 6 files changed, 96 insertions(+), 89 deletions(-) diff --git a/src/emsarray/cli/commands/export_geometry.py b/src/emsarray/cli/commands/export_geometry.py index 831d001..d03e0f8 100644 --- a/src/emsarray/cli/commands/export_geometry.py +++ b/src/emsarray/cli/commands/export_geometry.py @@ -9,7 +9,6 @@ from emsarray.cli import BaseCommand, CommandException from emsarray.operations import geometry from emsarray.types import Pathish -from emsarray.utils import PerfTimer logger = logging.getLogger(__name__) @@ -70,9 +69,8 @@ def handle(self, options: argparse.Namespace) -> None: output_format = self.guess_format(output_path) logger.debug("Guessed output format as %r", output_format) - with PerfTimer() as t: - count = dataset.ems.polygons.size - logger.debug("Generated %d polygons in %f seconds", count, t.elapsed) + count = dataset.ems.polygons[dataset.ems.mask].size + logger.debug("Dataset contains %d polygons", count) try: writer = format_writers[output_format] diff --git a/src/emsarray/conventions/_base.py b/src/emsarray/conventions/_base.py index 0429055..276b199 100644 --- a/src/emsarray/conventions/_base.py +++ b/src/emsarray/conventions/_base.py @@ -746,6 +746,7 @@ def animate_on_figure( return animate_on_figure(figure, self, coordinate=coordinate, **kwargs) @_requires_plot + @utils.timed_func def make_poly_collection( self, data_array: Optional[DataArrayOrName] = None, @@ -941,6 +942,7 @@ def mask(self) -> np.ndarray: return cast(np.ndarray, mask) @cached_property + @utils.timed_func def spatial_index(self) -> SpatialIndex[SpatialIndexItem[Index]]: """ A shapely :class:`strtree.STRtree` spatial index of all cells in this dataset. @@ -970,16 +972,12 @@ def spatial_index(self) -> SpatialIndex[SpatialIndexItem[Index]]: -------- :class:`.SpatialIndexItem` """ - logger.info("Building spatial index...") - with utils.PerfTimer() as timer: - items = [ - (poly, SpatialIndexItem(index, self.unravel_index(index), poly)) - for index, poly in enumerate(self.polygons) - if poly is not None - ] - spatial_index = SpatialIndex(items) - logger.debug("Build spatial index in %f seconds", timer.elapsed) - return spatial_index + items = [ + (poly, SpatialIndexItem(index, self.unravel_index(index), poly)) + for index, poly in enumerate(self.polygons) + if poly is not None + ] + return SpatialIndex(items) def get_index_for_point( self, diff --git a/src/emsarray/conventions/arakawa_c.py b/src/emsarray/conventions/arakawa_c.py index b23801b..fb9fa28 100644 --- a/src/emsarray/conventions/arakawa_c.py +++ b/src/emsarray/conventions/arakawa_c.py @@ -279,6 +279,7 @@ def get_grid_kind_and_size(self, data_array: xr.DataArray) -> Tuple[ArakawaCGrid raise ValueError("Data array did not match any known grids") @cached_property + @utils.timed_func def polygons(self) -> np.ndarray: # Make an array of shape (j, i, 2) of all the nodes grid = np.stack([self.node.longitude.values, self.node.latitude.values], axis=-1) diff --git a/src/emsarray/conventions/grid.py b/src/emsarray/conventions/grid.py index 19e40b7..836f248 100644 --- a/src/emsarray/conventions/grid.py +++ b/src/emsarray/conventions/grid.py @@ -387,6 +387,7 @@ def check_dataset(cls, dataset: xr.Dataset) -> Optional[int]: return Specificity.LOW @cached_property + @utils.timed_func def polygons(self) -> np.ndarray: lon_bounds = self.topology.longitude_bounds.values lat_bounds = self.topology.latitude_bounds.values @@ -536,6 +537,7 @@ def check_dataset(cls, dataset: xr.Dataset) -> Optional[int]: return Specificity.LOW @cached_property + @utils.timed_func def polygons(self) -> np.ndarray: # Construct polygons from the bounds of the cells lon_bounds = self.topology.longitude_bounds.values diff --git a/src/emsarray/conventions/ugrid.py b/src/emsarray/conventions/ugrid.py index e1ed512..af3a332 100644 --- a/src/emsarray/conventions/ugrid.py +++ b/src/emsarray/conventions/ugrid.py @@ -579,26 +579,25 @@ def edge_node_array(self) -> np.ndarray: return self._to_index_array( self.edge_node_connectivity, self.edge_dimension) - logger.info("Building edge_node_array") - with utils.PerfTimer() as timer: - # Each edge is composed of two nodes. Each edge may be named twice, - # once for each face. To de-duplicate this, edges are built up using - # this dict-of-sets, where the dict index is the node with the - # lower index, and the set is the node indices of the other end. - low_highs: Dict[int, Set[int]] = defaultdict(set) - - for face_index, node_pairs in self._face_and_node_pair_iter(): - for pair in node_pairs: - low, high = sorted(pair) - low_highs[low].add(high) - edge_node = np.array([ - [low, high] - for low, highs in low_highs.items() - for high in highs - ], dtype=self.sensible_dtype) - logger.debug("Built edge_node_array in %f seconds", timer.elapsed) - - return edge_node + return self.make_edge_node_array() + + @utils.timed_func + def make_edge_node_array(self) -> np.ndarray: + # Each edge is composed of two nodes. Each edge may be named twice, + # once for each face. To de-duplicate this, edges are built up using + # this dict-of-sets, where the dict index is the node with the + # lower index, and the set is the node indices of the other end. + low_highs: Dict[int, Set[int]] = defaultdict(set) + + for face_index, node_pairs in self._face_and_node_pair_iter(): + for pair in node_pairs: + low, high = sorted(pair) + low_highs[low].add(high) + return np.array([ + [low, high] + for low, highs in low_highs.items() + for high in highs + ], dtype=self.sensible_dtype) @cached_property def has_valid_edge_face_connectivity(self) -> bool: @@ -650,26 +649,22 @@ def edge_face_array(self) -> np.ndarray: return self._to_index_array( self.edge_face_connectivity, self.edge_dimension) - # Access these outside of the timer below - fill_value = self.sensible_fill_value - face_edge = self.face_edge_array + return self.make_edge_face_array() - # Build an edge_face_connectivity matrix - logger.info("Building edge_face_array") - with utils.PerfTimer() as timer: - # The edge_face connectivity matrix - shape = (self.edge_count, 2) - filled = np.full(shape, fill_value, dtype=self.sensible_dtype) - edge_face: np.ndarray = np.ma.masked_array(filled, mask=True) + @utils.timed_func + def make_edge_face_array(self) -> np.ndarray: + # The edge_face connectivity matrix + shape = (self.edge_count, 2) + filled = np.full(shape, self.sensible_fill_value, dtype=self.sensible_dtype) + edge_face: np.ndarray = np.ma.masked_array(filled, mask=True) - # The number of faces already seen for this edge - edge_face_count = np.zeros(self.edge_count, dtype=self.sensible_dtype) + # The number of faces already seen for this edge + edge_face_count = np.zeros(self.edge_count, dtype=self.sensible_dtype) - for face_index, edge_indices in enumerate(face_edge): - for edge_index in edge_indices.compressed(): - edge_face[edge_index, edge_face_count[edge_index]] = face_index - edge_face_count[edge_index] += 1 - logger.debug("Built edge_face_array in %f seconds", timer.elapsed) + for face_index, edge_indices in enumerate(self.face_edge_array): + for edge_index in edge_indices.compressed(): + edge_face[edge_index, edge_face_count[edge_index]] = face_index + edge_face_count[edge_index] += 1 return edge_face @@ -763,27 +758,24 @@ def face_edge_array(self) -> np.ndarray: return self._to_index_array( self.face_edge_connectivity, self.face_dimension) - # Access these outside of the timer below - fill_value = self.sensible_fill_value - edge_node = self.edge_node_array + return self.make_face_edge_array() + @utils.timed_func + def make_face_edge_array(self) -> np.ndarray: # Build a face_edge_connectivity matrix - logger.info("Building face_edge_array") - with utils.PerfTimer() as timer: - shape = (self.face_count, self.max_node_count) - filled = np.full(shape, fill_value, dtype=self.sensible_dtype) - face_edge: np.ndarray = np.ma.masked_array(filled, mask=True) - - node_pair_to_edge_index = { - frozenset(edge): edge_index - for edge_index, edge in enumerate(edge_node) - } - - for face_index, node_pairs in self._face_and_node_pair_iter(): - for column, node_pair in enumerate(node_pairs): - edge_index = node_pair_to_edge_index[frozenset(node_pair)] - face_edge[face_index, column] = edge_index - logger.debug("Built face_edge_array in %f seconds", timer.elapsed) + shape = (self.face_count, self.max_node_count) + filled = np.full(shape, self.sensible_fill_value, dtype=self.sensible_dtype) + face_edge: np.ndarray = np.ma.masked_array(filled, mask=True) + + node_pair_to_edge_index = { + frozenset(edge): edge_index + for edge_index, edge in enumerate(self.edge_node_array) + } + + for face_index, node_pairs in self._face_and_node_pair_iter(): + for column, node_pair in enumerate(node_pairs): + edge_index = node_pair_to_edge_index[frozenset(node_pair)] + face_edge[face_index, column] = edge_index return face_edge @@ -834,27 +826,23 @@ def face_face_array(self) -> np.ndarray: return self._to_index_array( self.face_face_connectivity, self.face_dimension) - # Access these outside of the timer below - fill_value = self.sensible_fill_value - edge_face = self.edge_face_array + return self.make_face_face_array() + def make_face_face_array(self) -> np.ndarray: # Build a face_face_connectivity matrix - logger.info("Building face_face_array") - with utils.PerfTimer() as timer: - face_count = np.zeros(self.face_count, dtype=self.sensible_dtype) - shape = (self.face_count, self.max_node_count) - filled = np.full(shape, fill_value, dtype=self.sensible_dtype) - face_face: np.ndarray = np.ma.masked_array(filled, mask=True) - - for edge_index, face_indices in enumerate(edge_face): - if np.any(np.ma.getmask(face_indices)): - continue - left, right = face_indices - face_face[left, face_count[left]] = right - face_face[right, face_count[right]] = left - face_count[left] += 1 - face_count[right] += 1 - logger.debug("Built face_face_array in %f seconds", timer.elapsed) + face_count = np.zeros(self.face_count, dtype=self.sensible_dtype) + shape = (self.face_count, self.max_node_count) + filled = np.full(shape, self.sensible_fill_value, dtype=self.sensible_dtype) + face_face: np.ndarray = np.ma.masked_array(filled, mask=True) + + for edge_index, face_indices in enumerate(self.edge_face_array): + if np.any(np.ma.getmask(face_indices)): + continue + left, right = face_indices + face_face[left, face_count[left]] = right + face_face[right, face_count[right]] = left + face_count[left] += 1 + face_count[right] += 1 return face_face @@ -1083,6 +1071,7 @@ def get_grid_kind_and_size( raise ValueError("Data array did not have any face, edge, or node dimension") @cached_property + @utils.timed_func def polygons(self) -> np.ndarray: """Generate list of Polygons""" # X,Y coords of each node diff --git a/src/emsarray/utils.py b/src/emsarray/utils.py index 62c5472..117e01a 100644 --- a/src/emsarray/utils.py +++ b/src/emsarray/utils.py @@ -80,6 +80,25 @@ def elapsed(self) -> float: return self._stop - self._start +def timed_func(fn: Callable[..., _T]) -> Callable[..., _T]: + """ + Log the execution time of the decorated function. + Logs "Calling ````" before the wrapped function is called, + and "Completed ```` in ``