Skip to content

Commit

Permalink
Fix documentation for DataFrame.corr and Series.corr. (#10493)
Browse files Browse the repository at this point in the history
Follow-up work to fix documentation from #7141 before the 22.04 release.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - https://github.com/brandon-b-miller

URL: #10493
  • Loading branch information
bdice authored Mar 23, 2022
1 parent 7b9646b commit 9edcbd4
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 3 deletions.
17 changes: 15 additions & 2 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5664,12 +5664,20 @@ def cov(self, **kwargs):
df._set_column_names_like(self)
return df

def corr(self, method="pearson"):
def corr(self, method="pearson", min_periods=None):
"""Compute the correlation matrix of a DataFrame.
Parameters
----------
method : {'pearson', 'spearman'}, default 'pearson'
The correlation method to use, one of 'pearson' or 'spearman'.
Method used to compute correlation:
- pearson : Standard correlation coefficient
- spearman : Spearman rank correlation
min_periods : int, optional
Minimum number of observations required per pair of columns to
have a valid result.
Returns
-------
Expand All @@ -5682,6 +5690,10 @@ def corr(self, method="pearson"):
values = self.rank().values
else:
raise ValueError("method must be either 'pearson', 'spearman'")

if min_periods is not None:
raise NotImplementedError("Unsupported argument 'min_periods'")

corr = cupy.corrcoef(values, rowvar=False)
cols = self._data.to_pandas_index()
df = DataFrame(cupy.asfortranarray(corr)).set_index(cols)
Expand All @@ -5692,6 +5704,7 @@ def corr(self, method="pearson"):
def to_struct(self, name=None):
"""
Return a struct Series composed of the columns of the DataFrame.
Parameters
----------
name: optional
Expand Down
15 changes: 14 additions & 1 deletion python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2413,6 +2413,19 @@ def corr(self, other, method="pearson", min_periods=None):
"""Calculates the sample correlation between two Series,
excluding missing values.
Parameters
----------
other : Series
Series with which to compute the correlation.
method : {'pearson', 'spearman'}, default 'pearson'
Method used to compute correlation:
- pearson : Standard correlation coefficient
- spearman : Spearman rank correlation
min_periods : int, optional
Minimum number of observations needed to have a valid result.
Examples
--------
>>> import cudf
Expand All @@ -2427,7 +2440,7 @@ def corr(self, other, method="pearson", min_periods=None):
if method not in {"pearson", "spearman"}:
raise ValueError(f"Unknown method {method}")

if min_periods not in (None,):
if min_periods is not None:
raise NotImplementedError("Unsupported argument 'min_periods'")

if self.empty or other.empty:
Expand Down

0 comments on commit 9edcbd4

Please sign in to comment.