Skip to content

Commit

Permalink
Fix any, all reduction behavior for axis=None and warn for othe…
Browse files Browse the repository at this point in the history
…r reductions (#13831)

Fixes: #13827 

This PR:

- [x] Fixes `axis=None` behavior for `any` & `all` reductions.
- [x] Introduces `FutureWarning` for upcoming change in behavior for the rest of the reductions since some of the reductions are only updated in pandas-2.0 and the rest would be updated in pandas-3.0.
- [x] Fixed numpy array function inconsistency because of mismatching default for `axis` parameter.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: #13831
  • Loading branch information
galipremsagar authored Aug 9, 2023
1 parent ba6ff60 commit edb25a8
Show file tree
Hide file tree
Showing 6 changed files with 156 additions and 29 deletions.
48 changes: 39 additions & 9 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,11 @@ def __array_function__(self, func, types, args, kwargs):
return NotImplemented

try:
if func.__name__ in {"any", "all"}:
# NumPy default for `axis` is
# different from `cudf`/`pandas`
# hence need this special handling.
kwargs.setdefault("axis", None)
if cudf_func := getattr(self.__class__, func.__name__, None):
out = cudf_func(*args, **kwargs)
# The dot product of two DataFrames returns an array in pandas.
Expand Down Expand Up @@ -2557,7 +2562,7 @@ def reindex(
"Cannot specify both 'axis' and any of 'index' or 'columns'."
)

axis = self._get_axis_from_axis_arg(axis)
axis = 0 if axis is None else self._get_axis_from_axis_arg(axis)
if axis == 0:
if index is None:
index = labels
Expand Down Expand Up @@ -5798,7 +5803,6 @@ def count(self, axis=0, level=None, numeric_only=False, **kwargs):
_SUPPORT_AXIS_LOOKUP = {
0: 0,
1: 1,
None: 0,
"index": 0,
"columns": 1,
}
Expand Down Expand Up @@ -5826,9 +5830,26 @@ def _reduce(
if source.empty:
return Series(index=cudf.Index([], dtype="str"))

axis = source._get_axis_from_axis_arg(axis)
if axis is None:
if op in {"any", "all"}:
axis = 2
else:
# Do not remove until pandas 2.0 support is added.
warnings.warn(
f"In a future version, {type(self).__name__}"
f".{op}(axis=None) will return a scalar {op} over "
"the entire DataFrame. To retain the old behavior, "
f"use '{type(self).__name__}.{op}(axis=0)' or "
f"just '{type(self)}.{op}()'",
FutureWarning,
)
axis = 0
elif axis is no_default:
axis = 0
else:
axis = source._get_axis_from_axis_arg(axis)

if axis == 0:
if axis in {0, 2}:
try:
result = [
getattr(source._data[col], op)(**kwargs)
Expand Down Expand Up @@ -5867,7 +5888,10 @@ def _reduce(
)
source = self._get_columns_by_label(numeric_cols)
if source.empty:
return Series(index=cudf.Index([], dtype="str"))
if axis == 2:
return getattr(as_column([]), op)(**kwargs)
else:
return Series(index=cudf.Index([], dtype="str"))
try:
result = [
getattr(source._data[col], op)(**kwargs)
Expand All @@ -5879,12 +5903,16 @@ def _reduce(
)
else:
raise

return Series._from_data(
{None: result}, as_index(source._data.names)
)
if axis == 2:
return getattr(as_column(result), op)(**kwargs)
else:
return Series._from_data(
{None: result}, as_index(source._data.names)
)
elif axis == 1:
return source._apply_cupy_method_axis_1(op, **kwargs)
else:
raise ValueError(f"Invalid value of {axis=} received for {op}")

@_cudf_nvtx_annotate
def _scan(
Expand All @@ -5894,6 +5922,8 @@ def _scan(
*args,
**kwargs,
):
if axis is None:
axis = 0
axis = self._get_axis_from_axis_arg(axis)

if axis == 0:
Expand Down
64 changes: 49 additions & 15 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import cudf
from cudf import _lib as libcudf
from cudf._typing import Dtype
from cudf.api.extensions import no_default
from cudf.api.types import is_bool_dtype, is_dtype_equal, is_scalar
from cudf.core.buffer import acquire_spill_lock
from cudf.core.column import (
Expand Down Expand Up @@ -1885,7 +1886,7 @@ def _reduce(self, *args, **kwargs):
@_cudf_nvtx_annotate
def min(
self,
axis=None,
axis=no_default,
skipna=True,
level=None,
numeric_only=None,
Expand Down Expand Up @@ -1936,7 +1937,7 @@ def min(
@_cudf_nvtx_annotate
def max(
self,
axis=None,
axis=no_default,
skipna=True,
level=None,
numeric_only=None,
Expand Down Expand Up @@ -1987,7 +1988,7 @@ def max(
@_cudf_nvtx_annotate
def sum(
self,
axis=None,
axis=no_default,
skipna=True,
dtype=None,
level=None,
Expand Down Expand Up @@ -2045,7 +2046,7 @@ def sum(
@_cudf_nvtx_annotate
def product(
self,
axis=None,
axis=no_default,
skipna=True,
dtype=None,
level=None,
Expand Down Expand Up @@ -2089,11 +2090,11 @@ def product(
b 5040
dtype: int64
"""
axis = self._get_axis_from_axis_arg(axis)

return self._reduce(
# cuDF columns use "product" as the op name, but cupy uses "prod"
# and we need cupy if axis == 1.
"product" if axis == 0 else "prod",
"prod" if axis in {1, "columns"} else "product",
axis=axis,
skipna=skipna,
dtype=dtype,
Expand All @@ -2108,7 +2109,12 @@ def product(

@_cudf_nvtx_annotate
def mean(
self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs
self,
axis=no_default,
skipna=True,
level=None,
numeric_only=None,
**kwargs,
):
"""
Return the mean of the values for the requested axis.
Expand Down Expand Up @@ -2154,7 +2160,7 @@ def mean(
@_cudf_nvtx_annotate
def std(
self,
axis=None,
axis=no_default,
skipna=True,
level=None,
ddof=1,
Expand Down Expand Up @@ -2210,7 +2216,7 @@ def std(
@_cudf_nvtx_annotate
def var(
self,
axis=None,
axis=no_default,
skipna=True,
level=None,
ddof=1,
Expand Down Expand Up @@ -2264,7 +2270,12 @@ def var(

@_cudf_nvtx_annotate
def kurtosis(
self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs
self,
axis=no_default,
skipna=True,
level=None,
numeric_only=None,
**kwargs,
):
"""
Return Fisher's unbiased kurtosis of a sample.
Expand Down Expand Up @@ -2305,7 +2316,7 @@ def kurtosis(
b -1.2
dtype: float64
"""
if axis not in (0, "index", None):
if axis not in (0, "index", None, no_default):
raise NotImplementedError("Only axis=0 is currently supported.")

return self._reduce(
Expand All @@ -2322,7 +2333,12 @@ def kurtosis(

@_cudf_nvtx_annotate
def skew(
self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs
self,
axis=no_default,
skipna=True,
level=None,
numeric_only=None,
**kwargs,
):
"""
Return unbiased Fisher-Pearson skew of a sample.
Expand Down Expand Up @@ -2366,7 +2382,7 @@ def skew(
b -0.37037
dtype: float64
"""
if axis not in (0, "index", None):
if axis not in (0, "index", None, no_default):
raise NotImplementedError("Only axis=0 is currently supported.")

return self._reduce(
Expand All @@ -2385,6 +2401,15 @@ def all(self, axis=0, skipna=True, level=None, **kwargs):
Parameters
----------
axis : {0 or 'index', 1 or 'columns', None}, default 0
Indicate which axis or axes should be reduced. For `Series`
this parameter is unused and defaults to `0`.
- 0 or 'index' : reduce the index, return a Series
whose index is the original column labels.
- 1 or 'columns' : reduce the columns, return a Series
whose index is the original index.
- None : reduce all axes, return a scalar.
skipna: bool, default True
Exclude NA/null values. If the entire row/column is NA and
skipna is True, then the result will be True, as for an
Expand All @@ -2398,7 +2423,7 @@ def all(self, axis=0, skipna=True, level=None, **kwargs):
Notes
-----
Parameters currently not supported are `axis`, `bool_only`, `level`.
Parameters currently not supported are `bool_only`, `level`.
Examples
--------
Expand All @@ -2424,6 +2449,15 @@ def any(self, axis=0, skipna=True, level=None, **kwargs):
Parameters
----------
axis : {0 or 'index', 1 or 'columns', None}, default 0
Indicate which axis or axes should be reduced. For `Series`
this parameter is unused and defaults to `0`.
- 0 or 'index' : reduce the index, return a Series
whose index is the original column labels.
- 1 or 'columns' : reduce the columns, return a Series
whose index is the original index.
- None : reduce all axes, return a scalar.
skipna: bool, default True
Exclude NA/null values. If the entire row/column is NA and
skipna is True, then the result will be False, as for an
Expand All @@ -2437,7 +2471,7 @@ def any(self, axis=0, skipna=True, level=None, **kwargs):
Notes
-----
Parameters currently not supported are `axis`, `bool_only`, `level`.
Parameters currently not supported are `bool_only`, `level`.
Examples
--------
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3421,7 +3421,7 @@ def sample(
0 1 3
1 2 4
"""
axis = self._get_axis_from_axis_arg(axis)
axis = 0 if axis is None else self._get_axis_from_axis_arg(axis)
size = self.shape[axis]

# Compute `n` from parameter `frac`.
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/single_column_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import cudf
from cudf._typing import Dtype, NotImplementedType, ScalarLike
from cudf.api.extensions import no_default
from cudf.api.types import (
_is_scalar_or_zero_d_array,
is_bool_dtype,
Expand All @@ -30,20 +31,19 @@ class SingleColumnFrame(Frame, NotIterable):

_SUPPORT_AXIS_LOOKUP = {
0: 0,
None: 0,
"index": 0,
}

@_cudf_nvtx_annotate
def _reduce(
self,
op,
axis=None,
axis=no_default,
level=None,
numeric_only=None,
**kwargs,
):
if axis not in (None, 0):
if axis not in (None, 0, no_default):
raise NotImplementedError("axis parameter is not implemented yet")

if level is not None:
Expand Down
5 changes: 5 additions & 0 deletions python/cudf/cudf/tests/test_array_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ def test_array_func_cudf_series(np_ar, func):
lambda x: np.sum(x, axis=0),
lambda x: np.var(x, ddof=1),
lambda x: np.dot(x, x.transpose()),
lambda x: np.all(x),
lambda x: np.any(x),
lambda x: np.product(x),
lambda x: np.product(x, axis=0),
lambda x: np.product(x, axis=1),
],
)
def test_array_func_cudf_dataframe(pd_df, func):
Expand Down
Loading

0 comments on commit edb25a8

Please sign in to comment.