Skip to content

Commit

Permalink
Remove remaining "support" methods from DataFrame (#9068)
Browse files Browse the repository at this point in the history
This PR rewrites DataFrame's `kurtosis` and `skew` to use the `_reduce` method introduced in #8944, and it inlines the logic for the `count` to bypass the `_apply_support_method` machinery. This allows us to remove most of that logic entirely aside from the code for row-wise reductions and scans that dispatches to `cupy`.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Charles Blackmon-Luca (https://github.com/charlesbluca)

URL: #9068
  • Loading branch information
vyasr authored Aug 20, 2021
1 parent ca58c1e commit 5869264
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 220 deletions.
140 changes: 17 additions & 123 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6305,12 +6305,9 @@ def count(self, axis=0, level=None, numeric_only=False, **kwargs):
if axis != 0:
raise NotImplementedError("Only axis=0 is currently supported.")

return self._apply_support_method(
"count",
axis=axis,
level=level,
numeric_only=numeric_only,
**kwargs,
return Series._from_data(
{None: [self._data[col].valid_count for col in self._data.names]},
as_index(self._data.names),
)

_SUPPORT_AXIS_LOOKUP = {
Expand Down Expand Up @@ -6343,7 +6340,7 @@ def _reduce(
{None: result}, as_index(self._data.names)
)
elif axis == 1:
return self._apply_support_method_axis_1(op, **kwargs)
return self._apply_cupy_method_axis_1(op, **kwargs)

def _scan(
self, op, axis=None, *args, **kwargs,
Expand All @@ -6353,7 +6350,7 @@ def _scan(
if axis == 0:
return super()._scan(op, axis=axis, *args, **kwargs)
elif axis == 1:
return self._apply_support_method_axis_1(f"cum{op}", **kwargs)
return self._apply_cupy_method_axis_1(f"cum{op}", **kwargs)

def mode(self, axis=0, numeric_only=False, dropna=True):
"""
Expand Down Expand Up @@ -6458,100 +6455,17 @@ def mode(self, axis=0, numeric_only=False, dropna=True):
def kurtosis(
self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
):
"""
Return Fisher's unbiased kurtosis of a sample.
Kurtosis obtained using Fisher’s definition of
kurtosis (kurtosis of normal == 0.0). Normalized by N-1.
Parameters
----------
skipna: bool, default True
Exclude NA/null values when computing the result.
Returns
-------
Series
Notes
-----
Parameters currently not supported are `axis`, `level` and
`numeric_only`
Examples
--------
>>> import cudf
>>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
>>> df.kurt()
a -1.2
b -1.2
dtype: float64
"""
if axis not in (0, "index", None):
raise NotImplementedError("Only axis=0 is currently supported.")

if numeric_only not in (None, True):
msg = "Kurtosis only supports int, float, and bool dtypes."
raise NotImplementedError(msg)

filtered = self.select_dtypes(include=[np.number, np.bool_])
return filtered._apply_support_method(
"kurtosis",
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs,
obj = self.select_dtypes(include=[np.number, np.bool_])
return super(DataFrame, obj).kurtosis(
axis, skipna, level, numeric_only, **kwargs
)

# Alias for kurtosis.
kurt = kurtosis

def skew(
self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
):
"""
Return unbiased Fisher-Pearson skew of a sample.
Parameters
----------
skipna: bool, default True
Exclude NA/null values when computing the result.
Returns
-------
Series
Notes
-----
Parameters currently not supported are `axis`, `level` and
`numeric_only`
Examples
--------
>>> import cudf
>>> df = cudf.DataFrame({'a': [3, 2, 3, 4], 'b': [7, 8, 10, 10]})
>>> df.skew()
a 0.00000
b -0.37037
dtype: float64
"""
if axis not in (0, "index", None):
raise NotImplementedError("Only axis=0 is currently supported.")

if numeric_only not in (None, True):
msg = "Skew only supports int, float, and bool dtypes."
raise NotImplementedError(msg)

filtered = self.select_dtypes(include=[np.number, np.bool_])
return filtered._apply_support_method(
"skew",
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs,
obj = self.select_dtypes(include=[np.number, np.bool_])
return super(DataFrame, obj).skew(
axis, skipna, level, numeric_only, **kwargs
)

def all(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs):
Expand All @@ -6562,23 +6476,11 @@ def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs):
obj = self.select_dtypes(include="bool") if bool_only else self
return super(DataFrame, obj).any(axis, skipna, level, **kwargs)

def _apply_support_method_axis_0(self, method, *args, **kwargs):
result = [
getattr(self[col], method)(*args, **kwargs)
for col in self._data.names
]
def _apply_cupy_method_axis_1(self, method, *args, **kwargs):
# This method uses cupy to perform scans and reductions along rows of a
# DataFrame. Since cuDF is designed around columnar storage and
# operations, we convert DataFrames to 2D cupy arrays for these ops.

if isinstance(result[0], Series):
support_result = result
result = DataFrame(index=support_result[0].index)
for idx, col in enumerate(self._data.names):
result[col] = support_result[idx]
else:
result = Series(result)
result = result.set_index(self._data.names)
return result

def _apply_support_method_axis_1(self, method, *args, **kwargs):
# for dask metadata compatibility
skipna = kwargs.pop("skipna", None)
skipna = True if skipna is None else skipna
Expand Down Expand Up @@ -6608,13 +6510,13 @@ def _apply_support_method_axis_1(self, method, *args, **kwargs):
min_count = kwargs.pop("min_count", None)
if min_count not in (None, 0):
raise NotImplementedError(
"Row-wise operations currently do not " "support `min_count`."
"Row-wise operations currently do not support `min_count`."
)

bool_only = kwargs.pop("bool_only", None)
if bool_only not in (None, True):
raise NotImplementedError(
"Row-wise operations currently do not " "support `bool_only`."
"Row-wise operations currently do not support `bool_only`."
)

# This parameter is only necessary for axis 0 reductions that cuDF
Expand Down Expand Up @@ -6674,14 +6576,6 @@ def _apply_support_method_axis_1(self, method, *args, **kwargs):
result_df.columns = prepared.columns
return result_df

def _apply_support_method(self, method, axis=0, *args, **kwargs):
axis = self._get_axis_from_axis_arg(axis)

if axis == 0:
return self._apply_support_method_axis_0(method, *args, **kwargs)
elif axis == 1:
return self._apply_support_method_axis_1(method, *args, **kwargs)

def _columns_view(self, columns):
"""
Return a subset of the DataFrame's columns as a view.
Expand Down
126 changes: 126 additions & 0 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
)
from cudf.core.column_accessor import ColumnAccessor
from cudf.core.join import merge
from cudf.utils.docutils import copy_docstring
from cudf.utils.dtypes import (
_is_non_decimal_numeric_dtype,
_is_scalar_or_zero_d_array,
Expand Down Expand Up @@ -4056,6 +4057,131 @@ def var(
**kwargs,
)

def kurtosis(
self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
):
"""
Return Fisher's unbiased kurtosis of a sample.
Kurtosis obtained using Fisher’s definition of
kurtosis (kurtosis of normal == 0.0). Normalized by N-1.
Parameters
----------
axis: {index (0), columns(1)}
Axis for the function to be applied on.
skipna: bool, default True
Exclude NA/null values when computing the result.
Returns
-------
Series or scalar
Notes
-----
Parameters currently not supported are `level` and `numeric_only`
Examples
--------
**Series**
>>> import cudf
>>> series = cudf.Series([1, 2, 3, 4])
>>> series.kurtosis()
-1.1999999999999904
**DataFrame**
>>> import cudf
>>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
>>> df.kurt()
a -1.2
b -1.2
dtype: float64
"""
if axis not in (0, "index", None):
raise NotImplementedError("Only axis=0 is currently supported.")

return self._reduce(
"kurtosis",
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs,
)

# Alias for kurtosis.
@copy_docstring(kurtosis)
def kurt(
self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
):
return self.kurtosis(
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs,
)

def skew(
self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
):
"""
Return unbiased Fisher-Pearson skew of a sample.
Parameters
----------
skipna: bool, default True
Exclude NA/null values when computing the result.
Returns
-------
Series
Notes
-----
Parameters currently not supported are `axis`, `level` and
`numeric_only`
Examples
--------
**Series**
>>> import cudf
>>> series = cudf.Series([1, 2, 3, 4, 5, 6, 6])
>>> series
0 1
1 2
2 3
3 4
4 5
5 6
6 6
dtype: int64
**DataFrame**
>>> import cudf
>>> df = cudf.DataFrame({'a': [3, 2, 3, 4], 'b': [7, 8, 10, 10]})
>>> df.skew()
a 0.00000
b -0.37037
dtype: float64
"""
if axis not in (0, "index", None):
raise NotImplementedError("Only axis=0 is currently supported.")

return self._reduce(
"skew",
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs,
)

def all(self, axis=0, skipna=True, level=None, **kwargs):
"""
Return whether all elements are True in DataFrame.
Expand Down
Loading

0 comments on commit 5869264

Please sign in to comment.