Skip to content

Commit

Permalink
parent 41fa0e5
Browse files Browse the repository at this point in the history
author Liam Bluett <[email protected]> 1732590092 +1000
committer Liam Bluett <[email protected]> 1734391465 +1000

Implemented spearman's correlationship

Modified notebook to remove noise and add an explanation and reference.

Add Spearman's to gallery

Change notebook metadata to use 'Python 3 (ipykernel)' and 'python3' rather than custom 'ml' kernel.

Testing for spearman implemented

Maintainer notes followed, notebook fixed... again

Modified notebook to remove noise and add an explanation and reference.

cleanup more

Add Spearman's to gallery

Testing for spearman implemented

Maintainer notes followed, notebook fixed... again

Notebook kernel changed for testing

Update src/scores/continuous/correlation/correlation_impl.py

add pyfunc for hyperlink

Co-authored-by: Stephanie Chong <[email protected]>
Signed-off-by: Liam Bluett <[email protected]>

reorder alphabetically
  • Loading branch information
lbluett committed Dec 17, 2024
1 parent 84ff284 commit e37eb61
Show file tree
Hide file tree
Showing 7 changed files with 554 additions and 4 deletions.
1 change: 1 addition & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
.. autofunction:: scores.continuous.flip_flop_index
.. autofunction:: scores.continuous.flip_flop_index_proportion_exceeding
.. autofunction:: scores.continuous.correlation.pearsonr
.. autofunction:: scores.continuous.correlation.spearmanr
.. autofunction:: scores.continuous.multiplicative_bias
.. autofunction:: scores.continuous.pbias
.. autofunction:: scores.continuous.kge
Expand Down
4 changes: 4 additions & 0 deletions docs/included.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@
- [API](api.md#scores.continuous.rmse)
- [Tutorial](project:./tutorials/Root_Mean_Squared_Error.md)
- [Wikipedia](https://en.wikipedia.org/wiki/Root-mean-square_deviation)
* - Spearman's Correlation Coefficient
- [API](api.md#scores.continuous.correlation.spearmanr)
- [Tutorial](project:./tutorials/Spearmans_Correlation.md)
- [Wikipedia](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient)
* - Threshold Weighted Absolute Error
- [API](api.md#scores.continuous.tw_absolute_error)
- [Tutorial](project:./tutorials/Threshold_Weighted_Scores.md)
Expand Down
4 changes: 2 additions & 2 deletions src/scores/continuous/correlation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
Import the functions from the implementations into the public API
"""

from scores.continuous.correlation.correlation_impl import pearsonr
from scores.continuous.correlation.correlation_impl import pearsonr, spearmanr

__all__ = ["pearsonr"]
__all__ = ["pearsonr", "spearmanr"]
62 changes: 62 additions & 0 deletions src/scores/continuous/correlation/correlation_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,65 @@ def pearsonr(
)

return xr.corr(fcst, obs, reduce_dims)


def spearmanr(
fcst: xr.DataArray,
obs: xr.DataArray,
*, # Force keywords arguments to be keyword-only
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
) -> xr.DataArray:
"""
Calculates the Spearman's rank correlation coefficient between two xarray DataArrays \
Spearman's correlation is identical to Pearson's correlation when the relationship \
is linear. They diverge when the relationship is not linear as Spearman's correlation \
assesses monotic relationships whereas Pearson's correlation assesses strictly linear \
functions.
.. math::
\\rho = 1 - \\frac{6\\sum_{i=1}^{n}{d_i^2}}{n(n^2-1)}
where:
- :math:`\\rho` = Spearman's rank correlation coefficient
- :math:`d_i` = the difference between the ranks of x and y in a sample
- :math:`n` = the number of samples
Args:
fcst: Forecast or predicted variables
obs: Observed variables.
reduce_dims: Optionally specify which dimensions to reduce when
calculating the Spearman's rank correlation coefficient.
All other dimensions will be preserved.
preserve_dims: Optionally specify which dimensions to preserve when
calculating the Spearman's rank correlation coefficient. All other dimensions will
be reduced. As a special case, 'all' will allow all dimensions to be
preserved. In this case, the result will be in the same shape/dimensionality
as the forecast, and the errors will be the absolute error at each
point (i.e. single-value comparison against observed), and the
forecast and observed dimensions must match precisely.
Returns:
xr.DataArray: An xarray object with Spearman's rank correlation coefficient values
Note:
This function isn't set up to take weights.
See also:
:py:func:`scores.continuous.correlation.pearsonr`
Reference:
https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient
"""
reduce_dims = scores.utils.gather_dimensions(
fcst.dims, obs.dims, reduce_dims=reduce_dims, preserve_dims=preserve_dims
)

# If reduce_dims contains multiple dimensions, handle ranking per dimension
fcst_ranks = fcst
obs_ranks = obs
for dim in reduce_dims:
fcst_ranks = fcst_ranks.rank(dim=dim)
obs_ranks = obs_ranks.rank(dim=dim)

return xr.corr(fcst_ranks, obs_ranks, reduce_dims)
70 changes: 68 additions & 2 deletions tests/continuous/test_correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest
import xarray as xr

from scores.continuous.correlation import pearsonr
from scores.continuous.correlation import pearsonr, spearmanr

try:
import dask
Expand Down Expand Up @@ -68,6 +68,19 @@
coords=[("time", [1, 2, 3])],
)

# Adding testing for divergence between Pearson and Spearman

# Generate non-linear monotonic data using a logistic function
np.random.seed(42)
X = np.linspace(0, 10, 100)
Y = 1 / (1 + np.exp(-X)) # Logistic relationship

# Convert to xarray.DataArray
X_DA = xr.DataArray(X, dims="sample", name="x")
Y_DA = xr.DataArray(Y, dims="sample", name="y")
PEARSON_OUTPUT = 0.76
SPEARMAN_OUTPUT = 1.0


@pytest.mark.parametrize(
("da1", "da2", "reduce_dims", "preserve_dims", "expected"),
Expand All @@ -82,7 +95,7 @@
(DA4_CORR, DA5_CORR, "space", None, EXP_CORR_DIFF_SIZE),
],
)
def test_correlation(da1, da2, reduce_dims, preserve_dims, expected):
def test_pearson_correlation(da1, da2, reduce_dims, preserve_dims, expected):
"""
Tests continuous.correlation
"""
Expand All @@ -103,3 +116,56 @@ def test_correlation_dask():
result = result.compute()
assert isinstance(result.data, (np.ndarray, np.generic))
xr.testing.assert_allclose(result, EXP_CORR_REDUCE_ALL)


@pytest.mark.parametrize(
("da1", "da2", "reduce_dims", "preserve_dims", "expected"),
[
# Check reduce dim arg
(DA1_CORR, DA2_CORR, None, "space", EXP_CORR_KEEP_SPACE_DIM),
# Check preserve dim arg
(DA1_CORR, DA2_CORR, "time", None, EXP_CORR_KEEP_SPACE_DIM),
# Check reduce all
(DA3_CORR, DA2_CORR, None, None, EXP_CORR_REDUCE_ALL),
# Check different size arrays as input
(DA4_CORR, DA5_CORR, "space", None, EXP_CORR_DIFF_SIZE),
],
)
def test_spearman_correlation(da1, da2, reduce_dims, preserve_dims, expected):
"""
Tests continuous.correlation.spearmanr
"""
result = spearmanr(da1, da2, preserve_dims=preserve_dims, reduce_dims=reduce_dims)
xr.testing.assert_allclose(result, expected)


def test_spearman_correlation_dask():
"""
Tests continuous.correlation.spearmanr works with Dask
"""

if dask == "Unavailable": # pragma: no cover
pytest.skip("Dask unavailable, could not run test") # pragma: no cover

result = spearmanr(DA3_CORR.chunk(), DA2_CORR.chunk())
assert isinstance(result.data, dask.array.Array)
result = result.compute()
assert isinstance(result.data, (np.ndarray, np.generic))
xr.testing.assert_allclose(result, EXP_CORR_REDUCE_ALL)


@pytest.mark.parametrize(
("da1", "da2", "reduce_dims", "preserve_dims", "expected", "corr"),
[
# Check non-linear monotonic relationship
(X_DA, Y_DA, None, None, PEARSON_OUTPUT, "pearson"),
(X_DA, Y_DA, None, None, SPEARMAN_OUTPUT, "spearman"),
],
)
def test_divergence(da1, da2, reduce_dims, preserve_dims, expected, corr):
if corr == "spearman":
result = spearmanr(da1, da2, preserve_dims=preserve_dims, reduce_dims=reduce_dims)
assert result.item() == expected
else:
result = pearsonr(da1, da2, preserve_dims=preserve_dims, reduce_dims=reduce_dims)
assert np.round(result.item(), 2) == expected
416 changes: 416 additions & 0 deletions tutorials/Spearmans_Correlation.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions tutorials/Tutorial_Gallery.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
"- [RMSE](./Root_Mean_Squared_Error.ipynb)\n",
"- [MSE](./Mean_Squared_Error.ipynb)\n",
"- [Pearson's Correlation](./Pearsons_Correlation.ipynb)\n",
"- [Spearman's Correlation](./Spearmans_Correlation.ipynb)\n",
"- [Kling-Gupta Efficiency](./Kling_Gupta_Efficiency.ipynb)\n",
"- [Quantile Loss](./Quantile_Loss.ipynb)\n",
"- [Murphy Diagrams](./Murphy_Diagrams.ipynb)\n",
Expand Down

0 comments on commit e37eb61

Please sign in to comment.