Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove remaining "support" methods from DataFrame #9068

Merged
merged 4 commits into from
Aug 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 17 additions & 123 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6302,12 +6302,9 @@ def count(self, axis=0, level=None, numeric_only=False, **kwargs):
if axis != 0:
raise NotImplementedError("Only axis=0 is currently supported.")

return self._apply_support_method(
"count",
axis=axis,
level=level,
numeric_only=numeric_only,
**kwargs,
return Series._from_data(
{None: [self._data[col].valid_count for col in self._data.names]},
as_index(self._data.names),
)

_SUPPORT_AXIS_LOOKUP = {
Expand Down Expand Up @@ -6340,7 +6337,7 @@ def _reduce(
{None: result}, as_index(self._data.names)
)
elif axis == 1:
return self._apply_support_method_axis_1(op, **kwargs)
return self._apply_cupy_method_axis_1(op, **kwargs)

def _scan(
self, op, axis=None, *args, **kwargs,
Expand All @@ -6350,7 +6347,7 @@ def _scan(
if axis == 0:
return super()._scan(op, axis=axis, *args, **kwargs)
elif axis == 1:
return self._apply_support_method_axis_1(f"cum{op}", **kwargs)
return self._apply_cupy_method_axis_1(f"cum{op}", **kwargs)

def mode(self, axis=0, numeric_only=False, dropna=True):
"""
Expand Down Expand Up @@ -6455,100 +6452,17 @@ def mode(self, axis=0, numeric_only=False, dropna=True):
def kurtosis(
self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
):
"""
Return Fisher's unbiased kurtosis of a sample.

Kurtosis obtained using Fisher’s definition of
kurtosis (kurtosis of normal == 0.0). Normalized by N-1.

Parameters
----------

skipna: bool, default True
Exclude NA/null values when computing the result.

Returns
-------
Series

Notes
-----
Parameters currently not supported are `axis`, `level` and
`numeric_only`

Examples
--------
>>> import cudf
>>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
>>> df.kurt()
a -1.2
b -1.2
dtype: float64
"""
if axis not in (0, "index", None):
raise NotImplementedError("Only axis=0 is currently supported.")

if numeric_only not in (None, True):
msg = "Kurtosis only supports int, float, and bool dtypes."
raise NotImplementedError(msg)

filtered = self.select_dtypes(include=[np.number, np.bool_])
return filtered._apply_support_method(
"kurtosis",
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs,
obj = self.select_dtypes(include=[np.number, np.bool_])
return super(DataFrame, obj).kurtosis(
axis, skipna, level, numeric_only, **kwargs
)

# Alias for kurtosis.
kurt = kurtosis

def skew(
self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
):
"""
Return unbiased Fisher-Pearson skew of a sample.

Parameters
----------
skipna: bool, default True
Exclude NA/null values when computing the result.

Returns
-------
Series

Notes
-----
Parameters currently not supported are `axis`, `level` and
`numeric_only`

Examples
--------
>>> import cudf
>>> df = cudf.DataFrame({'a': [3, 2, 3, 4], 'b': [7, 8, 10, 10]})
>>> df.skew()
a 0.00000
b -0.37037
dtype: float64
"""
if axis not in (0, "index", None):
raise NotImplementedError("Only axis=0 is currently supported.")

if numeric_only not in (None, True):
msg = "Skew only supports int, float, and bool dtypes."
raise NotImplementedError(msg)

filtered = self.select_dtypes(include=[np.number, np.bool_])
return filtered._apply_support_method(
"skew",
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs,
obj = self.select_dtypes(include=[np.number, np.bool_])
return super(DataFrame, obj).skew(
axis, skipna, level, numeric_only, **kwargs
)

def all(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs):
Expand All @@ -6559,23 +6473,11 @@ def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs):
obj = self.select_dtypes(include="bool") if bool_only else self
return super(DataFrame, obj).any(axis, skipna, level, **kwargs)

def _apply_support_method_axis_0(self, method, *args, **kwargs):
result = [
getattr(self[col], method)(*args, **kwargs)
for col in self._data.names
]
def _apply_cupy_method_axis_1(self, method, *args, **kwargs):
# This method uses cupy to perform scans and reductions along rows of a
# DataFrame. Since cuDF is designed around columnar storage and
# operations, we convert DataFrames to 2D cupy arrays for these ops.

if isinstance(result[0], Series):
support_result = result
result = DataFrame(index=support_result[0].index)
for idx, col in enumerate(self._data.names):
result[col] = support_result[idx]
else:
result = Series(result)
result = result.set_index(self._data.names)
return result

def _apply_support_method_axis_1(self, method, *args, **kwargs):
# for dask metadata compatibility
skipna = kwargs.pop("skipna", None)
skipna = True if skipna is None else skipna
Expand Down Expand Up @@ -6605,13 +6507,13 @@ def _apply_support_method_axis_1(self, method, *args, **kwargs):
min_count = kwargs.pop("min_count", None)
if min_count not in (None, 0):
raise NotImplementedError(
"Row-wise operations currently do not " "support `min_count`."
"Row-wise operations currently do not support `min_count`."
)

bool_only = kwargs.pop("bool_only", None)
if bool_only not in (None, True):
raise NotImplementedError(
"Row-wise operations currently do not " "support `bool_only`."
"Row-wise operations currently do not support `bool_only`."
)

# This parameter is only necessary for axis 0 reductions that cuDF
Expand Down Expand Up @@ -6671,14 +6573,6 @@ def _apply_support_method_axis_1(self, method, *args, **kwargs):
result_df.columns = prepared.columns
return result_df

def _apply_support_method(self, method, axis=0, *args, **kwargs):
axis = self._get_axis_from_axis_arg(axis)

if axis == 0:
return self._apply_support_method_axis_0(method, *args, **kwargs)
elif axis == 1:
return self._apply_support_method_axis_1(method, *args, **kwargs)

def _columns_view(self, columns):
"""
Return a subset of the DataFrame's columns as a view.
Expand Down
126 changes: 126 additions & 0 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
)
from cudf.core.column_accessor import ColumnAccessor
from cudf.core.join import merge
from cudf.utils.docutils import copy_docstring
from cudf.utils.dtypes import (
_is_non_decimal_numeric_dtype,
_is_scalar_or_zero_d_array,
Expand Down Expand Up @@ -4056,6 +4057,131 @@ def var(
**kwargs,
)

def kurtosis(
self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
):
"""
Return Fisher's unbiased kurtosis of a sample.

Kurtosis obtained using Fisher’s definition of
kurtosis (kurtosis of normal == 0.0). Normalized by N-1.

Parameters
----------

axis: {index (0), columns(1)}
Axis for the function to be applied on.
skipna: bool, default True
Exclude NA/null values when computing the result.

Returns
-------
Series or scalar

Notes
-----
Parameters currently not supported are `level` and `numeric_only`

Examples
--------
**Series**

>>> import cudf
>>> series = cudf.Series([1, 2, 3, 4])
>>> series.kurtosis()
-1.1999999999999904

**DataFrame**

>>> import cudf
>>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
>>> df.kurt()
a -1.2
b -1.2
dtype: float64
"""
if axis not in (0, "index", None):
raise NotImplementedError("Only axis=0 is currently supported.")

return self._reduce(
"kurtosis",
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs,
)

# Alias for kurtosis.
@copy_docstring(kurtosis)
def kurt(
self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
):
return self.kurtosis(
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs,
)

def skew(
self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
):
"""
Return unbiased Fisher-Pearson skew of a sample.

Parameters
----------
skipna: bool, default True
Exclude NA/null values when computing the result.

Returns
-------
Series

Notes
-----
Parameters currently not supported are `axis`, `level` and
`numeric_only`

Examples
--------
**Series**

>>> import cudf
>>> series = cudf.Series([1, 2, 3, 4, 5, 6, 6])
>>> series
0 1
1 2
2 3
3 4
4 5
5 6
6 6
dtype: int64

**DataFrame**

>>> import cudf
>>> df = cudf.DataFrame({'a': [3, 2, 3, 4], 'b': [7, 8, 10, 10]})
>>> df.skew()
a 0.00000
b -0.37037
dtype: float64
"""
if axis not in (0, "index", None):
raise NotImplementedError("Only axis=0 is currently supported.")

return self._reduce(
"skew",
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs,
)

def all(self, axis=0, skipna=True, level=None, **kwargs):
"""
Return whether all elements are True in DataFrame.
Expand Down
Loading