diff --git a/doc/api.rst b/doc/api.rst index 8ec6843d24a..c9f24e8c3f1 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -29,6 +29,8 @@ Top-level functions full_like zeros_like ones_like + cov + corr dot polyval map_blocks diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 447aaf5b0bf..e9f2920b0f7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -36,6 +36,8 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`). + By `Andrew Williams `_ and `Robin Beer `_. - Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`) By `Pascal Bourgault `_. - Control over attributes of result in :py:func:`merge`, :py:func:`concat`, diff --git a/xarray/__init__.py b/xarray/__init__.py index 0fead57e5fb..e8274d13ffe 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -17,7 +17,7 @@ from .core.alignment import align, broadcast from .core.combine import auto_combine, combine_by_coords, combine_nested from .core.common import ALL_DIMS, full_like, ones_like, zeros_like -from .core.computation import apply_ufunc, dot, polyval, where +from .core.computation import apply_ufunc, corr, cov, dot, polyval, where from .core.concat import concat from .core.dataarray import DataArray from .core.dataset import Dataset @@ -54,6 +54,8 @@ "concat", "decode_cf", "dot", + "cov", + "corr", "full_like", "load_dataarray", "load_dataset", diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 28bf818e4a3..6ac4f74c3a6 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -24,7 +24,7 @@ import numpy as np from . import dtypes, duck_array_ops, utils -from .alignment import deep_align +from .alignment import align, deep_align from .merge import merge_coordinates_without_align from .options import OPTIONS from .pycompat import dask_array_type @@ -1069,6 +1069,184 @@ def earth_mover_distance(first_samples, return apply_array_ufunc(func, *args, dask=dask) +def cov(da_a, da_b, dim=None, ddof=1): + """ + Compute covariance between two DataArray objects along a shared dimension. + + Parameters + ---------- + da_a: DataArray object + Array to compute. + da_b: DataArray object + Array to compute. + dim : str, optional + The dimension along which the covariance will be computed + ddof: int, optional + If ddof=1, covariance is normalized by N-1, giving an unbiased estimate, + else normalization is by N. + + Returns + ------- + covariance: DataArray + + See also + -------- + pandas.Series.cov: corresponding pandas function + xr.corr: respective function to calculate correlation + + Examples + -------- + >>> da_a = DataArray(np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]), + ... dims=("space", "time"), + ... coords=[('space', ['IA', 'IL', 'IN']), + ... ('time', pd.date_range("2000-01-01", freq="1D", periods=3))]) + >>> da_a + + array([[1. , 2. , 3. ], + [0.1, 0.2, 0.3], + [3.2, 0.6, 1.8]]) + Coordinates: + * space (space) >> da_a = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]), + ... dims=("space", "time"), + ... coords=[('space', ['IA', 'IL', 'IN']), + ... ('time', pd.date_range("2000-01-01", freq="1D", periods=3))]) + >>> da_b + + array([[ 0.2, 0.4, 0.6], + [15. , 10. , 5. ], + [ 3.2, 0.6, 1.8]]) + Coordinates: + * space (space) >> xr.cov(da_a, da_b) + + array(-3.53055556) + >>> xr.cov(da_a, da_b, dim='time') + + array([ 0.2, -0.5, 1.69333333]) + Coordinates: + * space (space) >> da_a = DataArray(np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]), + ... dims=("space", "time"), + ... coords=[('space', ['IA', 'IL', 'IN']), + ... ('time', pd.date_range("2000-01-01", freq="1D", periods=3))]) + >>> da_a + + array([[1. , 2. , 3. ], + [0.1, 0.2, 0.3], + [3.2, 0.6, 1.8]]) + Coordinates: + * space (space) >> da_a = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]), + ... dims=("space", "time"), + ... coords=[('space', ['IA', 'IL', 'IN']), + ... ('time', pd.date_range("2000-01-01", freq="1D", periods=3))]) + >>> da_b + + array([[ 0.2, 0.4, 0.6], + [15. , 10. , 5. ], + [ 3.2, 0.6, 1.8]]) + Coordinates: + * space (space) >> xr.corr(da_a, da_b) + + array(-0.57087777) + >>> xr.corr(da_a, da_b, dim='time') + + array([ 1., -1., 1.]) + Coordinates: + * space (space)