diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 436363aadae..4982b75f753 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -5664,12 +5664,20 @@ def cov(self, **kwargs): df._set_column_names_like(self) return df - def corr(self, method="pearson"): + def corr(self, method="pearson", min_periods=None): """Compute the correlation matrix of a DataFrame. + Parameters ---------- method : {'pearson', 'spearman'}, default 'pearson' - The correlation method to use, one of 'pearson' or 'spearman'. + Method used to compute correlation: + + - pearson : Standard correlation coefficient + - spearman : Spearman rank correlation + + min_periods : int, optional + Minimum number of observations required per pair of columns to + have a valid result. Returns ------- @@ -5682,6 +5690,10 @@ def corr(self, method="pearson"): values = self.rank().values else: raise ValueError("method must be either 'pearson', 'spearman'") + + if min_periods is not None: + raise NotImplementedError("Unsupported argument 'min_periods'") + corr = cupy.corrcoef(values, rowvar=False) cols = self._data.to_pandas_index() df = DataFrame(cupy.asfortranarray(corr)).set_index(cols) @@ -5692,6 +5704,7 @@ def corr(self, method="pearson"): def to_struct(self, name=None): """ Return a struct Series composed of the columns of the DataFrame. + Parameters ---------- name: optional diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 3244497933e..b1ee9e99dfb 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -2413,6 +2413,19 @@ def corr(self, other, method="pearson", min_periods=None): """Calculates the sample correlation between two Series, excluding missing values. + Parameters + ---------- + other : Series + Series with which to compute the correlation. + method : {'pearson', 'spearman'}, default 'pearson' + Method used to compute correlation: + + - pearson : Standard correlation coefficient + - spearman : Spearman rank correlation + + min_periods : int, optional + Minimum number of observations needed to have a valid result. + Examples -------- >>> import cudf @@ -2427,7 +2440,7 @@ def corr(self, other, method="pearson", min_periods=None): if method not in {"pearson", "spearman"}: raise ValueError(f"Unknown method {method}") - if min_periods not in (None,): + if min_periods is not None: raise NotImplementedError("Unsupported argument 'min_periods'") if self.empty or other.empty: