Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support complex arrays in xr.corr #7392

Merged
merged 11 commits into from
Feb 14, 2023
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ Bug fixes
- add a ``keep_attrs`` parameter to :py:meth:`Dataset.pad`, :py:meth:`DataArray.pad`,
and :py:meth:`Variable.pad` (:pull:`7267`).
By `Justus Magin <https://github.com/keewis>`_.
- Fix :py:meth:`xr.cov` and :py:meth:`xr.corr` for complex valued arrays (:issue:`7340`, :pull:`7392`).
By `Michael Niklas <https://github.com/headtr1ck>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
33 changes: 21 additions & 12 deletions xarray/core/computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Callable,
Hashable,
Iterable,
Literal,
Mapping,
Sequence,
TypeVar,
Expand Down Expand Up @@ -1217,7 +1218,9 @@ def apply_ufunc(
return apply_array_ufunc(func, *args, dask=dask)


def cov(da_a, da_b, dim=None, ddof=1):
def cov(
da_a: T_DataArray, da_b: T_DataArray, dim: Hashable | None = None, ddof: int = 1
) -> T_DataArray:
"""
Compute covariance between two DataArray objects along a shared dimension.

Expand All @@ -1227,9 +1230,9 @@ def cov(da_a, da_b, dim=None, ddof=1):
Array to compute.
da_b : DataArray
Array to compute.
dim : str, optional
dim : Hashable, optional
The dimension along which the covariance will be computed
ddof : int, optional
ddof : int, default: 1
If ddof=1, covariance is normalized by N-1, giving an unbiased estimate,
else normalization is by N.

Expand Down Expand Up @@ -1297,7 +1300,9 @@ def cov(da_a, da_b, dim=None, ddof=1):
return _cov_corr(da_a, da_b, dim=dim, ddof=ddof, method="cov")


def corr(da_a, da_b, dim=None):
def corr(
da_a: T_DataArray, da_b: T_DataArray, dim: Hashable | None = None
) -> T_DataArray:
"""
Compute the Pearson correlation coefficient between
two DataArray objects along a shared dimension.
Expand All @@ -1308,7 +1313,7 @@ def corr(da_a, da_b, dim=None):
Array to compute.
da_b : DataArray
Array to compute.
dim : str, optional
dim : Hashable, optional
The dimension along which the correlation will be computed

Returns
Expand Down Expand Up @@ -1376,12 +1381,17 @@ def corr(da_a, da_b, dim=None):


def _cov_corr(
da_a: T_DataArray, da_b: T_DataArray, dim=None, ddof=0, method=None
da_a: T_DataArray,
da_b: T_DataArray,
dim: Hashable | None = None,
ddof: int = 0,
method: Literal["cov", "corr", None] = None,
) -> T_DataArray:
"""
Internal method for xr.cov() and xr.corr() so only have to
sanitize the input arrays once and we don't repeat code.
"""
dim = None if dim is None else (dim,)
# 1. Broadcast the two arrays
da_a, da_b = align(da_a, da_b, join="inner", copy=False)

Expand All @@ -1396,22 +1406,21 @@ def _cov_corr(
demeaned_da_b = da_b - da_b.mean(dim=dim)

# 4. Compute covariance along the given dim
#
# N.B. `skipna=True` is required or auto-covariance is computed incorrectly. E.g.
# Try xr.cov(da,da) for da = xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"])
cov = (demeaned_da_a * demeaned_da_b).sum(dim=dim, skipna=True, min_count=1) / (
valid_count
)
cov = (demeaned_da_a.conjugate() * demeaned_da_b).sum(
dcherian marked this conversation as resolved.
Show resolved Hide resolved
dim=dim, skipna=True, min_count=1
) / (valid_count)

if method == "cov":
return cov
return cov # type: ignore[return-value]

else:
# compute std + corr
da_a_std = da_a.std(dim=dim)
da_b_std = da_b.std(dim=dim)
corr = cov / (da_a_std * da_b_std)
return corr
return corr # type: ignore[return-value]


def cross(
Expand Down
8 changes: 7 additions & 1 deletion xarray/tests/test_computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1378,7 +1378,7 @@ def test_vectorize_exclude_dims_dask() -> None:

def test_corr_only_dataarray() -> None:
with pytest.raises(TypeError, match="Only xr.DataArray is supported"):
xr.corr(xr.Dataset(), xr.Dataset())
xr.corr(xr.Dataset(), xr.Dataset()) # type: ignore[type-var]


def arrays_w_tuples():
Expand Down Expand Up @@ -1598,6 +1598,12 @@ def test_autocov(da_a, dim) -> None:
assert_allclose(actual, expected)


def test_complex_cov() -> None:
da = xr.DataArray([1j, -1j])
actual = xr.cov(da, da)
assert abs(actual.item()) == 2


@requires_dask
def test_vectorize_dask_new_output_dims() -> None:
# regression test for GH3574
Expand Down