From a5633c2045b2946b5ea8b83b89f3af6ab6d0fcfa Mon Sep 17 00:00:00 2001 From: Mayank Anand <36782063+mayankanand007@users.noreply.github.com> Date: Tue, 7 Dec 2021 15:29:27 -0500 Subject: [PATCH] Adding support for `Series.autocorr` (#9833) Fixes: #9635 TODO: - [x] add implementation - [x] tests - [x] add to `.rst` files for documentation Authors: - Mayank Anand (https://github.com/mayankanand007) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Michael Wang (https://github.com/isVoid) URL: https://github.com/rapidsai/cudf/pull/9833 --- docs/cudf/source/api_docs/series.rst | 1 + python/cudf/cudf/core/frame.py | 1 - python/cudf/cudf/core/series.py | 25 +++++++++++++++++++++++++ python/cudf/cudf/tests/test_series.py | 17 +++++++++++++++++ 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index a3b621a9f7d..d234dfc4bcb 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -118,6 +118,7 @@ Computations / descriptive stats Series.abs Series.all Series.any + Series.autocorr Series.ceil Series.clip Series.corr diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index d7a75cb9f40..9969b9ac0fa 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1919,7 +1919,6 @@ def round(self, decimals=0, how="half_even"): 2 0.7 0.0 3 0.2 0.0 """ - if isinstance(decimals, cudf.Series): decimals = decimals.to_pandas() diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 0f0ebe55043..3aae79af4e8 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -2824,6 +2824,31 @@ def corr(self, other, method="pearson", min_periods=None): return lhs._column.corr(rhs._column) + def autocorr(self, lag=1): + """Compute the lag-N autocorrelation. This method computes the Pearson + correlation between the Series and its shifted self. + + Parameters + ---------- + lag : int, default 1 + Number of lags to apply before performing autocorrelation. + + Returns + ------- + result : float + The Pearson correlation between self and self.shift(lag). + + Examples + -------- + >>> import cudf + >>> s = cudf.Series([0.25, 0.5, 0.2, -0.05]) + >>> s.autocorr() + 0.10355263309024071 + >>> s.autocorr(lag=2) + -0.9999999999999999 + """ + return self.corr(self.shift(lag)) + def isin(self, values): """Check whether values are contained in Series. diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index 20f5f3a19e4..d59e3ba7571 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -1348,6 +1348,23 @@ def test_nullable_bool_dtype_series(data, bool_dtype): assert_eq(psr, gsr.to_pandas(nullable=True)) +@pytest.mark.parametrize( + "cudf_series", + [ + cudf.Series([0.25, 0.5, 0.2, -0.05]), + cudf.Series([0, 1, 2, np.nan, 4, cudf.NA, 6]), + ], +) +@pytest.mark.parametrize("lag", [1, 2, 3, 4]) +def test_autocorr(cudf_series, lag): + psr = cudf_series.to_pandas() + + cudf_corr = cudf_series.autocorr(lag=lag) + pd_corr = psr.autocorr(lag=lag) + + assert_eq(pd_corr, cudf_corr) + + @pytest.mark.parametrize( "data", [