nci · nicholasloveday · Jan 13, 2025 · Jan 16, 2025 · Jan 16, 2025 · Jan 17, 2025
diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@ Below is a **curated selection** of the metrics, tools and statistical tests inc
 |-----------------------	|-----------------	|--------------	|
 | **[Continuous](https://scores.readthedocs.io/en/stable/included.html#continuous)**        	|Scores for evaluating single-valued continuous forecasts.                  	|MAE, MSE, RMSE, Additive Bias, Multiplicative Bias, Percent Bias, Pearson's Correlation Coefficient, Kling-Gupta Efficiency, Flip-Flop Index, Quantile Loss, Quantile Interval Score, Interval Score, Murphy Score, and threshold weighted scores for expectiles, quantiles and Huber Loss.             	|
 | **[Probability](https://scores.readthedocs.io/en/stable/included.html#probability)**        |Scores for evaluating forecasts that are expressed as predictive distributions, ensembles, and probabilities of binary events.                   |Brier Score, Continuous Ranked Probability Score (CRPS) for Cumulative Density Functions (CDF) and ensembles (including threshold weighted versions), Receiver Operating Characteristic (ROC), Isotonic Regression (reliability diagrams).               |
-| **[Categorical](https://scores.readthedocs.io/en/stable/included.html#categorical)**       	|Scores for evaluating forecasts of categories.                	|18 binary contingency table (confusion matrix) metrics and the FIxed Risk Multicategorical (FIRM) Score.               	|
+| **[Categorical](https://scores.readthedocs.io/en/stable/included.html#categorical)**       	|Scores for evaluating forecasts of categories.                	|18 binary contingency table (confusion matrix) metrics, the FIxed Risk Multicategorical (FIRM) Score, and the SEEPS score.               	|
 | **[Spatial](https://scores.readthedocs.io/en/stable/included.html#spatial)** 	|Scores that take into account spatial structure.                 	|Fractions Skill Score.              	|
 | **[Statistical Tests](https://scores.readthedocs.io/en/stable/included.html#statistical-tests)** 	|Tools to conduct statistical tests and generate confidence intervals.                 	|Diebold Mariano.              	|
 | **[Processing Tools](https://scores.readthedocs.io/en/stable/included.html#processing-tools-for-preparing-data)**        	|Tools to pre-process data.                 	|Data matching, Discretisation, Cumulative Density Function Manipulation.              	|

diff --git a/docs/api.md b/docs/api.md
@@ -68,6 +68,7 @@
     :members:
 .. autoclass:: scores.categorical.EventOperator
     :members:
+.. autofunction:: scores.categorical.seeps
 ```
 
 ## scores.spatial

diff --git a/docs/included.md b/docs/included.md
@@ -559,6 +559,10 @@
   - [API](api.md#scores.categorical.probability_of_false_detection)
   - [Tutorial](project:./tutorials/ROC.md)
   - [Probability of false detection (WWRP/WGNE Joint Working Group on Forecast Verification Research)](https://www.cawcr.gov.au/projects/verification/#POFD)
+* - Stable Equitable Error in Probability Space (SEEPS)
+  - [API](api.md#scores.categorical.seeps)
+  - [Tutorial](project:./tutorials/SEEPS.md)
+  - [Rodwell et al. (2010)](https://doi.org/10.1002/qj.656)
 * - Threshold Event Operator
   - [API](api.md#scores.categorical.ThresholdEventOperator)
   - [Tutorial](project:./tutorials/Binary_Contingency_Scores.md)

diff --git a/src/scores/categorical/__init__.py b/src/scores/categorical/__init__.py
@@ -12,12 +12,13 @@
     EventOperator,
     ThresholdEventOperator,
 )
-from scores.categorical.multicategorical_impl import firm
+from scores.categorical.multicategorical_impl import firm, seeps
 
 __all__ = [
     "probability_of_detection",
     "probability_of_false_detection",
     "firm",
+    "seeps",
     "BasicContingencyManager",
     "BinaryContingencyManager",
     "ThresholdEventOperator",

diff --git a/src/scores/categorical/multicategorical_impl.py b/src/scores/categorical/multicategorical_impl.py
@@ -3,12 +3,13 @@
 """
 
 from collections.abc import Sequence
-from typing import Iterable, Optional, Union
+from typing import Optional, Union
 
 import numpy as np
 import xarray as xr
 
 from scores.functions import apply_weights
+from scores.processing import broadcast_and_match_nan
 from scores.typing import FlexibleDimensionTypes
 from scores.utils import check_dims, gather_dimensions
 
@@ -132,7 +133,7 @@ def firm(  # pylint: disable=too-many-arguments
 
 def _check_firm_inputs(
     obs, risk_parameter, categorical_thresholds, threshold_weights, discount_distance, threshold_assignment
-):  # pylint: disable=too-many-positional-arguments
+):
     """
     Checks that the FIRM inputs are suitable
     """
@@ -244,3 +245,176 @@ def _single_category_score(
     )
     score = score.transpose(*fcst.dims)
     return score
+
+
+def seeps(  # pylint: disable=too-many-arguments, too-many-locals
+    fcst: xr.DataArray,
+    obs: xr.DataArray,
+    p1: xr.DataArray,
+    p3: xr.DataArray,
+    light_heavy_threshold: xr.DataArray,
+    *,  # Force keywords arguments to be keyword-only
+    dry_light_threshold: Optional[float] = 0.2,
+    mask_clim_extremes: Optional[bool] = True,
+    min_masked_value: Optional[float] = 0.1,
+    max_masked_value: Optional[float] = 0.85,
+    reduce_dims: Optional[FlexibleDimensionTypes] = None,
+    preserve_dims: Optional[FlexibleDimensionTypes] = None,
+    weights: Optional[xr.DataArray] = None,
+) -> xr.DataArray:
+    r"""
+    Calculates the stable equitable error in probability space (SEEPS) score.
+
+    When used to evaluate precipitation forecasts, the SEEPS score calculates the
+    performance of a forecast across three categories:
+
+    - Dry weather (e.g., less than or equal to 0.2mm),
+    - Light precipitation (the climatological lower two-thirds of rainfall above
+      the dry threshold),
+    - Heavy precipitation (the climatological upper one-third of rainfall above
+      the dry threshold).
+
+    The SEEPS penalty matrix is defined as
+
+
+    .. math::
+        s = \frac{1}{2} \left(
+        \begin{matrix}
+         0 & \frac{1}{1-p_1} & \frac{1}{p_3}+\frac{1}{1-p_1} \\
+        \frac{1}{p_1} & 0 & \frac{1}{p_3} \\
+        \frac{1}{p_1}+\frac{1}{1-p_3} & \frac{1}{1-p_3} & 0
+        \end{matrix}
+        \right)
+
+
+    where 
+        - :math:`p_1` is the climatological probability of the dry weather category
+        - :math:`p_3` is the climatological probability of the heavy precipitation category.
+        - The rows correspond to the forecast category (dry, light, heavy).
+        - The columns correspond to the observation category (dry, light, heavy).
+
+    Note that although :math:`p_2`, does not appear in the penalty matrix, it is defined as
+    :math:`p_2 = 2p_3` with :math:`p_1 + p_2 + p_3 = 1` which means that the light 
+    precipitation category is twice as likely to occur climatologically as the 
+    heavy precipitation category.
+
+    This implementation of the score is negatively oriented, meaning that lower scores are better. 
+    Sometimes in the literature, a SEEPS skill score is used, which is defined as 1 - SEEPS.
+
+    By default, the scores are only calculated for points where :math:`p_1 \in [0.1, 0.85]` 
+    as per Rodwell et al. (2010). This can be changed by setting ``mask_clim_extremes`` to ``False`` or
+    by changing the ``min_masked_value`` and ``max_masked_value`` parameters.
+
+    For further details on generating the p1 and p3 arrays, see Rodwell et al. (2010).
+
+    Args:
+        fcst: An array of real-valued forecasts.
+        obs: An array of real-valued observations.
+        p1: The climatological probability of the dry weather category.
+        p3: The climatological probability of the heavy precipitation category. 
+        light_heavy_threshold: An array of the rainfall thresholds that separates 
+            light and heavy precipitation. Light precipitation is inclusive of this
+            threshold.
+        dry_light_threshold: The threshold that separates dry weather from light precipitation.
+            Defaults to 0.2. Dry weather is defined as less than or equal to this threshold.
+        mask_clim_extremes: If True, mask out the climatological extremes.
+        min_masked_value: Points with climatolgical probabilities of dry weather
+            less than this value are masked. Defaults to 0.1.
+        max_masked_value: Points with climatolgical probabilities of dry weather
+            greater than this value are masked. Defaults to 0.85.
+        reduce_dims: Optionally specify which dimensions to reduce when
+            calculating the SEEPS score. All other dimensions will be preserved. As a
+            special case, 'all' will allow all dimensions to be reduced. Only one
+            of `reduce_dims` and `preserve_dims` can be supplied. The default behaviour
+            if neither are supplied is to reduce all dims.
+        preserve_dims: Optionally specify which dimensions to preserve
+            when calculating SEEPS. All other dimensions will be reduced.
+            As a special case, 'all' will allow all dimensions to be
+            preserved. In this case, the result will be in the same
+            shape/dimensionality as the forecast, and the errors will be
+            the SEEPS score at each point (i.e. single-value comparison
+            against observed), and the forecast and observed dimensions
+            must match precisely. Only one of `reduce_dims` and `preserve_dims` can be
+            supplied. The default behaviour if neither are supplied is to reduce all dims.
+        weights: Optionally provide an array for weighted averaging (e.g. by area).
+
+    Returns:
+        An xarray DataArray containing the SEEPS score.
+
+    Raises:
+        ValueError: if any values in `p1` are outside the range [0, 1].
+        ValueError: if any values in `p3` are outside the range [0, 1].
+
+    References:
+        Rodwell, M. J., Richardson, D. S., Hewson, T. D., & Haiden, T. (2010). 
+        A new equitable score suitable for verifying precipitation in numerical 
+        weather prediction. Quarterly Journal of the Royal Meteorological Society, 
+        136(650), 1344–1363. https://doi.org/10.1002/qj.656
+
+    Examples:
+        >>> import numpy as np
+        >>> import xarray as xr
+        >>> from scores.categorical import seeps
+        >>> fcst = xr.DataArray(np.random.rand(4, 6, 8), dims=['time', 'lat', 'lon'])
+        >>> obs = xr.DataArray(np.random.rand(4, 6, 8), dims=['time', 'lat', 'lon'])
+        >>> p1 = xr.DataArray(np.random.rand(6, 8), dims=['lat', 'lon'])
+        >>> p3 = (1 - p1) / 3 
+        >>> light_heavy_threshold = 2 * xr.DataArray(np.random.rand(4, 6, 8), dims=['time', 'lat', 'lon'])
+        >>> seeps(fcst, obs, p1, p3, light_heavy_threshold=light_heavy_threshold)
+    """
+    if p1.min() < 0 or p1.max() > 1:
+        raise ValueError("`p1` must have values between 0 and 1 inclusive")
+    if p3.min() < 0 or p3.max() > 1:
+        raise ValueError("`p3` must have values between 0 and 1 inclusive")
+
+    reduce_dims = gather_dimensions(fcst.dims, obs.dims, reduce_dims=reduce_dims, preserve_dims=preserve_dims)
+    fcst, obs = broadcast_and_match_nan(fcst, obs)
+
+    # Penalties for index i, j in the penalty matrix. Row i corresponds to the
+    # forecast category while row j corresponds to the observation category
+    # row 1 of the penalty matrix
+    index_12 = 1 / (1 - p1)
+    index_13 = (1 / p3) + (1 / (1 - p1))
+    # row 2 of the penalty matrix
+    index_21 = 1 / p1
+    index_23 = 1 / p3
+    # row 3 of the penalty matrix
+    index_31 = (1 / p1) + (1 / (1 - p3))
+    index_32 = 1 / (1 - p3)
+
+    # Get conditions for each category
+    fcst1_condition = fcst <= dry_light_threshold
+    fcst2_condition = (fcst > dry_light_threshold) & (fcst <= light_heavy_threshold)
+    fcst3_condition = fcst > light_heavy_threshold
+
+    obs1_condition = obs <= dry_light_threshold
+    obs2_condition = (obs > dry_light_threshold) & (obs <= light_heavy_threshold)
+    obs3_condition = obs > light_heavy_threshold
+
+    # Calculate the penalties
+    result = fcst.copy() * 0
+    result = result.where(~(fcst1_condition & obs2_condition), index_12.broadcast_like(result))
+    result = result.where(~(fcst1_condition & obs3_condition), index_13.broadcast_like(result))
+    result = result.where(~(fcst2_condition & obs1_condition), index_21.broadcast_like(result))
+    result = result.where(~(fcst2_condition & obs3_condition), index_23.broadcast_like(result))
+    result = result.where(~(fcst3_condition & obs1_condition), index_31.broadcast_like(result))
+    result = result.where(~(fcst3_condition & obs2_condition), index_32.broadcast_like(result))
+
+    result = result / 2
+    # return NaNs
+    result = result.where(
+        ~np.isnan(fcst)
+        & ~np.isnan(obs)
+        & ~np.isnan(p1)
+        & ~np.isnan(p3)
+        & ~np.isnan(light_heavy_threshold)
+        & ~np.isnan(dry_light_threshold)
+    )
+
+    if mask_clim_extremes:
+        result = result.where(np.logical_and(p1 <= max_masked_value, p1 >= min_masked_value))
+
+    result = apply_weights(result, weights=weights)
+    result = result.mean(dim=reduce_dims)
+
+    return result