parent 41fa0e5

author Liam Bluett <[email protected]> 1732590092 +1000 committer Liam Bluett <[email protected]> 1734391465 +1000 Implemented spearman's correlationship Modified notebook to remove noise and add an explanation and reference. Add Spearman's to gallery Change notebook metadata to use 'Python 3 (ipykernel)' and 'python3' rather than custom 'ml' kernel. Testing for spearman implemented Maintainer notes followed, notebook fixed... again Modified notebook to remove noise and add an explanation and reference. cleanup more Add Spearman's to gallery Testing for spearman implemented Maintainer notes followed, notebook fixed... again Notebook kernel changed for testing Update src/scores/continuous/correlation/correlation_impl.py add pyfunc for hyperlink Co-authored-by: Stephanie Chong <[email protected]> Signed-off-by: Liam Bluett <[email protected]> reorder alphabetically
nci · Dec 17, 2024 · e37eb61 · e37eb61
1 parent 84ff284
commit e37eb61
Show file tree

Hide file tree

Showing 7 changed files with 554 additions and 4 deletions.
diff --git a/docs/api.md b/docs/api.md
@@ -21,6 +21,7 @@
 .. autofunction:: scores.continuous.flip_flop_index
 .. autofunction:: scores.continuous.flip_flop_index_proportion_exceeding
 .. autofunction:: scores.continuous.correlation.pearsonr
+.. autofunction:: scores.continuous.correlation.spearmanr
 .. autofunction:: scores.continuous.multiplicative_bias
 .. autofunction:: scores.continuous.pbias
 .. autofunction:: scores.continuous.kge

diff --git a/docs/included.md b/docs/included.md
@@ -133,6 +133,10 @@
   - [API](api.md#scores.continuous.rmse)
   - [Tutorial](project:./tutorials/Root_Mean_Squared_Error.md)
   - [Wikipedia](https://en.wikipedia.org/wiki/Root-mean-square_deviation)
+* - Spearman's Correlation Coefficient
+  - [API](api.md#scores.continuous.correlation.spearmanr)
+  - [Tutorial](project:./tutorials/Spearmans_Correlation.md)
+  - [Wikipedia](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient)
 * - Threshold Weighted Absolute Error
   - [API](api.md#scores.continuous.tw_absolute_error)
   - [Tutorial](project:./tutorials/Threshold_Weighted_Scores.md)

diff --git a/src/scores/continuous/correlation/__init__.py b/src/scores/continuous/correlation/__init__.py
@@ -2,6 +2,6 @@
 Import the functions from the implementations into the public API
 """
 
-from scores.continuous.correlation.correlation_impl import pearsonr
+from scores.continuous.correlation.correlation_impl import pearsonr, spearmanr
 
-__all__ = ["pearsonr"]
+__all__ = ["pearsonr", "spearmanr"]
diff --git a/src/scores/continuous/correlation/correlation_impl.py b/src/scores/continuous/correlation/correlation_impl.py
@@ -58,3 +58,65 @@ def pearsonr(
     )
 
     return xr.corr(fcst, obs, reduce_dims)
+
+
+def spearmanr(
+    fcst: xr.DataArray,
+    obs: xr.DataArray,
+    *,  # Force keywords arguments to be keyword-only
+    reduce_dims: Optional[FlexibleDimensionTypes] = None,
+    preserve_dims: Optional[FlexibleDimensionTypes] = None,
+) -> xr.DataArray:
+    """
+    Calculates the Spearman's rank correlation coefficient between two xarray DataArrays \
+    Spearman's correlation is identical to Pearson's correlation when the relationship \
+    is linear. They diverge when the relationship is not linear as Spearman's correlation \
+    assesses monotic relationships whereas Pearson's correlation assesses strictly linear \
+    functions.
+
+
+    .. math::
+        \\rho = 1 - \\frac{6\\sum_{i=1}^{n}{d_i^2}}{n(n^2-1)}
+
+    where:
+        - :math:`\\rho` = Spearman's rank correlation coefficient
+        - :math:`d_i` = the difference between the ranks of x and y in a sample
+        - :math:`n` = the number of samples
+
+    Args:
+        fcst: Forecast or predicted variables
+        obs: Observed variables.
+        reduce_dims: Optionally specify which dimensions to reduce when
+            calculating the Spearman's rank correlation coefficient.
+            All other dimensions will be preserved.
+        preserve_dims: Optionally specify which dimensions to preserve when
+            calculating the Spearman's rank correlation coefficient. All other dimensions will
+            be reduced. As a special case, 'all' will allow all dimensions to be
+            preserved. In this case, the result will be in the same shape/dimensionality
+            as the forecast, and the errors will be the absolute error at each
+            point (i.e. single-value comparison against observed), and the
+            forecast and observed dimensions must match precisely.
+    Returns:
+        xr.DataArray: An xarray object with Spearman's rank correlation coefficient values
+
+    Note:
+        This function isn't set up to take weights.
+
+    See also:
+    :py:func:`scores.continuous.correlation.pearsonr`
+
+    Reference:
+        https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient
+    """
+    reduce_dims = scores.utils.gather_dimensions(
+        fcst.dims, obs.dims, reduce_dims=reduce_dims, preserve_dims=preserve_dims
+    )
+
+    # If reduce_dims contains multiple dimensions, handle ranking per dimension
+    fcst_ranks = fcst
+    obs_ranks = obs
+    for dim in reduce_dims:
+        fcst_ranks = fcst_ranks.rank(dim=dim)
+        obs_ranks = obs_ranks.rank(dim=dim)
+
+    return xr.corr(fcst_ranks, obs_ranks, reduce_dims)
diff --git a/tests/continuous/test_correlation.py b/tests/continuous/test_correlation.py
@@ -6,7 +6,7 @@
 import pytest
 import xarray as xr
 
-from scores.continuous.correlation import pearsonr
+from scores.continuous.correlation import pearsonr, spearmanr
 
 try:
     import dask
@@ -68,6 +68,19 @@
     coords=[("time", [1, 2, 3])],
 )
 
+# Adding testing for divergence between Pearson and Spearman
+
+# Generate non-linear monotonic data using a logistic function
+np.random.seed(42)
+X = np.linspace(0, 10, 100)
+Y = 1 / (1 + np.exp(-X))  # Logistic relationship
+
+# Convert to xarray.DataArray
+X_DA = xr.DataArray(X, dims="sample", name="x")
+Y_DA = xr.DataArray(Y, dims="sample", name="y")
+PEARSON_OUTPUT = 0.76
+SPEARMAN_OUTPUT = 1.0
+
 
 @pytest.mark.parametrize(
     ("da1", "da2", "reduce_dims", "preserve_dims", "expected"),
@@ -82,7 +95,7 @@
         (DA4_CORR, DA5_CORR, "space", None, EXP_CORR_DIFF_SIZE),
     ],
 )
-def test_correlation(da1, da2, reduce_dims, preserve_dims, expected):
+def test_pearson_correlation(da1, da2, reduce_dims, preserve_dims, expected):
     """
     Tests continuous.correlation
     """
@@ -103,3 +116,56 @@ def test_correlation_dask():
     result = result.compute()
     assert isinstance(result.data, (np.ndarray, np.generic))
     xr.testing.assert_allclose(result, EXP_CORR_REDUCE_ALL)
+
+
+@pytest.mark.parametrize(
+    ("da1", "da2", "reduce_dims", "preserve_dims", "expected"),
+    [
+        # Check reduce dim arg
+        (DA1_CORR, DA2_CORR, None, "space", EXP_CORR_KEEP_SPACE_DIM),
+        # Check preserve dim arg
+        (DA1_CORR, DA2_CORR, "time", None, EXP_CORR_KEEP_SPACE_DIM),
+        # Check reduce all
+        (DA3_CORR, DA2_CORR, None, None, EXP_CORR_REDUCE_ALL),
+        # Check different size arrays as input
+        (DA4_CORR, DA5_CORR, "space", None, EXP_CORR_DIFF_SIZE),
+    ],
+)
+def test_spearman_correlation(da1, da2, reduce_dims, preserve_dims, expected):
+    """
+    Tests continuous.correlation.spearmanr
+    """
+    result = spearmanr(da1, da2, preserve_dims=preserve_dims, reduce_dims=reduce_dims)
+    xr.testing.assert_allclose(result, expected)
+
+
+def test_spearman_correlation_dask():
+    """
+    Tests continuous.correlation.spearmanr works with Dask
+    """
+
+    if dask == "Unavailable":  # pragma: no cover
+        pytest.skip("Dask unavailable, could not run test")  # pragma: no cover
+
+    result = spearmanr(DA3_CORR.chunk(), DA2_CORR.chunk())
+    assert isinstance(result.data, dask.array.Array)
+    result = result.compute()
+    assert isinstance(result.data, (np.ndarray, np.generic))
+    xr.testing.assert_allclose(result, EXP_CORR_REDUCE_ALL)
+
+
+@pytest.mark.parametrize(
+    ("da1", "da2", "reduce_dims", "preserve_dims", "expected", "corr"),
+    [
+        # Check non-linear monotonic relationship
+        (X_DA, Y_DA, None, None, PEARSON_OUTPUT, "pearson"),
+        (X_DA, Y_DA, None, None, SPEARMAN_OUTPUT, "spearman"),
+    ],
+)
+def test_divergence(da1, da2, reduce_dims, preserve_dims, expected, corr):
+    if corr == "spearman":
+        result = spearmanr(da1, da2, preserve_dims=preserve_dims, reduce_dims=reduce_dims)
+        assert result.item() == expected
+    else:
+        result = pearsonr(da1, da2, preserve_dims=preserve_dims, reduce_dims=reduce_dims)
+        assert np.round(result.item(), 2) == expected
diff --git a/tutorials/Spearmans_Correlation.ipynb b/tutorials/Spearmans_Correlation.ipynb
diff --git a/tutorials/Tutorial_Gallery.ipynb b/tutorials/Tutorial_Gallery.ipynb
@@ -54,6 +54,7 @@
     "- [RMSE](./Root_Mean_Squared_Error.ipynb)\n",
     "- [MSE](./Mean_Squared_Error.ipynb)\n",
     "- [Pearson's Correlation](./Pearsons_Correlation.ipynb)\n",
+    "- [Spearman's Correlation](./Spearmans_Correlation.ipynb)\n",
     "- [Kling-Gupta Efficiency](./Kling_Gupta_Efficiency.ipynb)\n",
     "- [Quantile Loss](./Quantile_Loss.ipynb)\n",
     "- [Murphy Diagrams](./Murphy_Diagrams.ipynb)\n",