From 7c118140f3be64ed3d555d8ec0ef2d69db224a39 Mon Sep 17 00:00:00 2001 From: William Jones Date: Fri, 8 Dec 2023 21:08:31 +0000 Subject: [PATCH 1/9] Update index.rst to allow maxdepth 3 for bulk statistics --- doc/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/index.rst b/doc/index.rst index 7cfc5888..8474cae3 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -68,7 +68,7 @@ The project is currently being extended by several contributors to include addit .. toctree:: :caption: Compute bulk statistics online or in postprocessing - :maxdepth: 2 + :maxdepth: 3 bulk_statistics/index From 054240e2914c871b95bda4f2c5065090149a95cd Mon Sep 17 00:00:00 2001 From: William Jones Date: Fri, 8 Dec 2023 21:13:29 +0000 Subject: [PATCH 2/9] Update index.rst to allow maxdepth 3 for bulk statistics --- doc/bulk_statistics/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/bulk_statistics/index.rst b/doc/bulk_statistics/index.rst index bb06de32..6292f5f2 100644 --- a/doc/bulk_statistics/index.rst +++ b/doc/bulk_statistics/index.rst @@ -3,7 +3,7 @@ ########################## .. toctree:: - :maxdepth: 2 + :maxdepth: 3 notebooks/compute_statistics_during_feature_detection_example notebooks/compute_statistics_during_segmentation_example From 753dc9c6f6f07bc39170d1f7855441297929bb97 Mon Sep 17 00:00:00 2001 From: William Jones Date: Fri, 8 Dec 2023 21:16:55 +0000 Subject: [PATCH 3/9] Fix naming of bulk statistics notebooks in index --- doc/bulk_statistics/index.rst | 6 +++--- doc/index.rst | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/bulk_statistics/index.rst b/doc/bulk_statistics/index.rst index 6292f5f2..141ea945 100644 --- a/doc/bulk_statistics/index.rst +++ b/doc/bulk_statistics/index.rst @@ -3,8 +3,8 @@ ########################## .. toctree:: - :maxdepth: 3 + :maxdepth: 2 - notebooks/compute_statistics_during_feature_detection_example - notebooks/compute_statistics_during_segmentation_example + notebooks/compute_statistics_during_feature_detection + notebooks/compute_statistics_during_segmentation notebooks/compute_statistics_postprocessing_example diff --git a/doc/index.rst b/doc/index.rst index 8474cae3..7cfc5888 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -68,7 +68,7 @@ The project is currently being extended by several contributors to include addit .. toctree:: :caption: Compute bulk statistics online or in postprocessing - :maxdepth: 3 + :maxdepth: 2 bulk_statistics/index From 302db233a18f3e9ec43f505e59156621cf45bf48 Mon Sep 17 00:00:00 2001 From: William Jones Date: Fri, 8 Dec 2023 21:26:31 +0000 Subject: [PATCH 4/9] Add an overview of bulk statistics --- doc/bulk_statistics/index.rst | 7 ++++++- doc/index.rst | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/doc/bulk_statistics/index.rst b/doc/bulk_statistics/index.rst index 141ea945..8bc04639 100644 --- a/doc/bulk_statistics/index.rst +++ b/doc/bulk_statistics/index.rst @@ -2,8 +2,13 @@ Compute bulk statistics ########################## +Bulk statistics allow for a wide range of properties of detected objects to be calculated during feature detection and segmentation or as a postprocessing step. +Th get_statistics_from_mask function applies one or more functions over one or more data fields for each detected object. +For example, one could calculate the convective mass flux for each detected feature by providing fields of vertical velocity, cloud water content and area. +Numpy-like broadcasting is supported, allowing 2D and 3D data to be combined. + .. toctree:: - :maxdepth: 2 + :maxdepth: 1 notebooks/compute_statistics_during_feature_detection notebooks/compute_statistics_during_segmentation diff --git a/doc/index.rst b/doc/index.rst index 7cfc5888..d008e946 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -67,7 +67,7 @@ The project is currently being extended by several contributors to include addit .. toctree:: - :caption: Compute bulk statistics online or in postprocessing + :caption: Compute bulk statistics :maxdepth: 2 bulk_statistics/index From 958e9b08ee0b428d1c2d0f7a655e835363635550 Mon Sep 17 00:00:00 2001 From: William Jones Date: Fri, 8 Dec 2023 21:32:22 +0000 Subject: [PATCH 5/9] Add bulk_statistics to API ref --- doc/bulk_statistics/index.rst | 2 +- doc/tobac.rst | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/doc/bulk_statistics/index.rst b/doc/bulk_statistics/index.rst index 8bc04639..2560e3bb 100644 --- a/doc/bulk_statistics/index.rst +++ b/doc/bulk_statistics/index.rst @@ -3,7 +3,7 @@ ########################## Bulk statistics allow for a wide range of properties of detected objects to be calculated during feature detection and segmentation or as a postprocessing step. -Th get_statistics_from_mask function applies one or more functions over one or more data fields for each detected object. +The :py:meth:`tobac.utils.bulk_statistics.get_statistics_from_mask` function applies one or more functions over one or more data fields for each detected object. For example, one could calculate the convective mass flux for each detected feature by providing fields of vertical velocity, cloud water content and area. Numpy-like broadcasting is supported, allowing 2D and 3D data to be combined. diff --git a/doc/tobac.rst b/doc/tobac.rst index a3b011d4..87cd45ab 100644 --- a/doc/tobac.rst +++ b/doc/tobac.rst @@ -71,7 +71,7 @@ tobac.tracking module tobac.utils modules ------------------ -tobac.utils.general modules +tobac.utils.general module ------------------ .. automodule:: tobac.utils.general @@ -79,7 +79,15 @@ tobac.utils.general modules :undoc-members: :show-inheritance: -tobac.utils.mask modules +tobac.utils.bulk_statistics module +------------------ + +.. automodule:: tobac.utils.bulk_statistics + :members: + :undoc-members: + :show-inheritance: + +tobac.utils.mask module ------------------ .. automodule:: tobac.utils.mask From c3dc07f6d2ada4ac4958762bcda8e5a52a5689a6 Mon Sep 17 00:00:00 2001 From: William Jones Date: Fri, 8 Dec 2023 21:39:39 +0000 Subject: [PATCH 6/9] Update docstrings in bulk_statistics for changed argument order, and describe broadcasting of fields --- tobac/utils/bulk_statistics.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tobac/utils/bulk_statistics.py b/tobac/utils/bulk_statistics.py index f27f9fd5..160b5473 100644 --- a/tobac/utils/bulk_statistics.py +++ b/tobac/utils/bulk_statistics.py @@ -34,13 +34,13 @@ def get_statistics( Parameters ---------- - labels : np.ndarray[int] - Mask with labels of each regions to apply function to (e.g. output of segmentation for a specific timestep) - *fields : tuple[np.ndarray] - Fields to give as arguments to each function call. Must have the same shape as labels. features: pd.DataFrame Dataframe with features or segmented features (output from feature detection or segmentation) can be for the specific timestep or for the whole dataset + labels : np.ndarray[int] + Mask with labels of each regions to apply function to (e.g. output of segmentation for a specific timestep) + *fields : tuple[np.ndarray] + Fields to give as arguments to each function call. If the shape does not match that of labels, numpy-style broadcasting will be applied. statistic: dict[str, Callable], optional (default: {'ncells':np.count_nonzero}) Dictionary with function(s) to apply over each region as values and the name of the respective statistics as keys default is to just count the number of cells associated with each feature and write it to the feature dataframe @@ -163,13 +163,16 @@ def get_statistics_from_mask( Parameters: ----------- - segmentation_mask : xr.DataArray - Segmentation mask output - *fields : xr.DataArray[np.ndarray] - Field(s) with input data. Needs to have the same dimensions as the segmentation mask. features: pd.DataFrame Dataframe with segmented features (output from feature detection or segmentation). Timesteps must not be exactly the same as in segmentation mask but all labels in the mask need to be present in the feature dataframe. + segmentation_mask : xr.DataArray + Segmentation mask output + *fields : xr.DataArray[np.ndarray] + Field(s) with input data. If field does not have a time dimension it + will be considered time invariant, and the entire field will be passed + for each time step in segmentation_mask. If the shape does not match + that of labels, numpy-style broadcasting will be applied. statistic: dict[str, Callable], optional (default: {'ncells':np.count_nonzero}) Dictionary with function(s) to apply over each region as values and the name of the respective statistics as keys default is to just count the number of cells associated with each feature and write it to the feature dataframe From ad2dd4f4a74ded999f6b0363150e6d7c17359a46 Mon Sep 17 00:00:00 2001 From: William Jones Date: Fri, 8 Dec 2023 21:46:20 +0000 Subject: [PATCH 7/9] Reformat docstrings in bulk_statistics --- tobac/utils/bulk_statistics.py | 68 ++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 24 deletions(-) diff --git a/tobac/utils/bulk_statistics.py b/tobac/utils/bulk_statistics.py index 160b5473..5109beb2 100644 --- a/tobac/utils/bulk_statistics.py +++ b/tobac/utils/bulk_statistics.py @@ -35,27 +35,40 @@ def get_statistics( Parameters ---------- features: pd.DataFrame - Dataframe with features or segmented features (output from feature detection or segmentation) - can be for the specific timestep or for the whole dataset + Dataframe with features or segmented features (output from feature + detection or segmentation), which can be for the specific timestep or + for the whole dataset + labels : np.ndarray[int] - Mask with labels of each regions to apply function to (e.g. output of segmentation for a specific timestep) + Mask with labels of each regions to apply function to (e.g. output of + segmentation for a specific timestep) + *fields : tuple[np.ndarray] - Fields to give as arguments to each function call. If the shape does not match that of labels, numpy-style broadcasting will be applied. + Fields to give as arguments to each function call. If the shape does not + match that of labels, numpy-style broadcasting will be applied. + statistic: dict[str, Callable], optional (default: {'ncells':np.count_nonzero}) - Dictionary with function(s) to apply over each region as values and the name of the respective statistics as keys - default is to just count the number of cells associated with each feature and write it to the feature dataframe + Dictionary with function(s) to apply over each region as values and the + name of the respective statistics as keys. Default is to just count the + number of cells associated with each feature and write it to the feature + dataframe. + index: None | list[int], optional (default: None) list of indices of regions in labels to apply function to. If None, will - default to all integer feature labels in labels + default to all integer feature labels in labels. + default: None | float, optional (default: None) - default value to return in a region that has no values + default value to return in a region that has no values. + id_column: str, optional (default: "feature") - Name of the column in feature dataframe that contains IDs that match with the labels in mask. The default is the column "feature". + Name of the column in feature dataframe that contains IDs that match with + the labels in mask. The default is the column "feature". - Returns: - ------- - features: pd.DataFrame - Updated feature dataframe with bulk statistics for each feature saved in a new column + Returns: + ------- + features: pd.DataFrame + Updated feature dataframe with bulk statistics for each feature saved + in a new column. """ # if mask and input data dimensions do not match we can broadcast using numpy broadcasting rules for field in fields: @@ -160,35 +173,42 @@ def get_statistics_from_mask( """ Derives bulk statistics for each object in the segmentation mask. - Parameters: ----------- features: pd.DataFrame - Dataframe with segmented features (output from feature detection or segmentation). - Timesteps must not be exactly the same as in segmentation mask but all labels in the mask need to be present in the feature dataframe. + Dataframe with segmented features (output from feature detection or + segmentation). Timesteps must not be exactly the same as in segmentation + mask but all labels in the mask need to be present in the feature + dataframe. + segmentation_mask : xr.DataArray Segmentation mask output + *fields : xr.DataArray[np.ndarray] Field(s) with input data. If field does not have a time dimension it will be considered time invariant, and the entire field will be passed for each time step in segmentation_mask. If the shape does not match that of labels, numpy-style broadcasting will be applied. + statistic: dict[str, Callable], optional (default: {'ncells':np.count_nonzero}) - Dictionary with function(s) to apply over each region as values and the name of the respective statistics as keys - default is to just count the number of cells associated with each feature and write it to the feature dataframe + Dictionary with function(s) to apply over each region as values and the + name of the respective statistics as keys. Default is to calculate the + mean value of the field over each feature. + index: None | list[int], optional (default: None) list of indexes of regions in labels to apply function to. If None, will - default to all integers between 1 and the maximum value in labels + default to all integers between 1 and the maximum value in labels + default: None | float, optional (default: None) default value to return in a region that has no values + id_column: str, optional (default: "feature") Name of the column in feature dataframe that contains IDs that match with the labels in mask. The default is the column "feature". - - Returns: - ------- - features: pd.DataFrame - Updated feature dataframe with bulk statistics for each feature saved in a new column + Returns: + ------- + features: pd.DataFrame + Updated feature dataframe with bulk statistics for each feature saved in a new column """ # check that mask and input data have the same dimensions for field in fields: From 91a2e129c802f497bd4bd6ecafb32443b9f5c4c7 Mon Sep 17 00:00:00 2001 From: William Jones Date: Fri, 8 Dec 2023 21:49:33 +0000 Subject: [PATCH 8/9] Reformat docstrings in bulk_statistics --- tobac/utils/bulk_statistics.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tobac/utils/bulk_statistics.py b/tobac/utils/bulk_statistics.py index 5109beb2..ecea42e3 100644 --- a/tobac/utils/bulk_statistics.py +++ b/tobac/utils/bulk_statistics.py @@ -55,14 +55,14 @@ def get_statistics( index: None | list[int], optional (default: None) list of indices of regions in labels to apply function to. If None, will - default to all integer feature labels in labels. + default to all integer feature labels in labels. default: None | float, optional (default: None) default value to return in a region that has no values. id_column: str, optional (default: "feature") - Name of the column in feature dataframe that contains IDs that match with - the labels in mask. The default is the column "feature". + Name of the column in feature dataframe that contains IDs that match with + the labels in mask. The default is the column "feature". Returns: ------- @@ -203,7 +203,8 @@ def get_statistics_from_mask( default value to return in a region that has no values id_column: str, optional (default: "feature") - Name of the column in feature dataframe that contains IDs that match with the labels in mask. The default is the column "feature". + Name of the column in feature dataframe that contains IDs that match + with the labels in mask. The default is the column "feature". Returns: ------- From fbd7b19ea8149a67f8e7a397969a101920cb2f87 Mon Sep 17 00:00:00 2001 From: William Jones Date: Fri, 8 Dec 2023 21:55:08 +0000 Subject: [PATCH 9/9] Reformat docstrings in bulk_statistics --- tobac/utils/bulk_statistics.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/tobac/utils/bulk_statistics.py b/tobac/utils/bulk_statistics.py index ecea42e3..adc782cc 100644 --- a/tobac/utils/bulk_statistics.py +++ b/tobac/utils/bulk_statistics.py @@ -25,12 +25,14 @@ def get_statistics( default: Union[None, float] = None, id_column: str = "feature", ) -> pd.DataFrame: - """ - Get bulk statistics for objects (e.g. features or segmented features) given a labelled mask of the objects - and any input field with the same dimensions. + """Get bulk statistics for objects (e.g. features or segmented features) + given a labelled mask of the objects and any input field with the same + dimensions or that can be broadcast with labels according to numpy-like + broadcasting rules. - The statistics are added as a new column to the existing feature dataframe. Users can specify which statistics are computed by - providing a dictionary with the column name of the metric and the respective function. + The statistics are added as a new column to the existing feature dataframe. + Users can specify which statistics are computed by providing a dictionary + with the column name of the metric and the respective function. Parameters ---------- @@ -64,12 +66,13 @@ def get_statistics( Name of the column in feature dataframe that contains IDs that match with the labels in mask. The default is the column "feature". - Returns: + Returns ------- features: pd.DataFrame Updated feature dataframe with bulk statistics for each feature saved in a new column. """ + # if mask and input data dimensions do not match we can broadcast using numpy broadcasting rules for field in fields: if labels.shape != field.shape: @@ -170,11 +173,11 @@ def get_statistics_from_mask( default: Union[None, float] = None, id_column: str = "feature", ) -> pd.DataFrame: - """ - Derives bulk statistics for each object in the segmentation mask. + """Derives bulk statistics for each object in the segmentation mask, and + returns a features Dataframe with these properties for each feature. - Parameters: - ----------- + Parameters + ---------- features: pd.DataFrame Dataframe with segmented features (output from feature detection or segmentation). Timesteps must not be exactly the same as in segmentation @@ -203,14 +206,15 @@ def get_statistics_from_mask( default value to return in a region that has no values id_column: str, optional (default: "feature") - Name of the column in feature dataframe that contains IDs that match + Name of the column in feature dataframe that contains IDs that match with the labels in mask. The default is the column "feature". - Returns: + Returns ------- features: pd.DataFrame Updated feature dataframe with bulk statistics for each feature saved in a new column """ + # check that mask and input data have the same dimensions for field in fields: if segmentation_mask.shape != field.shape: