Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Frame scans #9021

Merged
merged 13 commits into from
Aug 17, 2021
168 changes: 18 additions & 150 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
"max": "nanmax",
"sum": "nansum",
"prod": "nanprod",
"product": "nanprod",
"mean": "nanmean",
"std": "nanstd",
"var": "nanvar",
Expand Down Expand Up @@ -5851,7 +5852,7 @@ def _from_arrays(cls, data, index=None, columns=None, nan_as_null=False):
)

if data.ndim == 2:
num_cols = len(data[0])
num_cols = data.shape[1]
else:
# Since we validate ndim to be either 1 or 2 above,
# this case can be assumed to be ndim == 1.
Expand Down Expand Up @@ -6225,7 +6226,7 @@ def _prepare_for_rowwise_op(self, method, skipna):
col.nullable for col in self._columns
):
msg = (
f"Row-wise operations to calculate '{method}' is not "
f"Row-wise operations to calculate '{method}' do not "
f"currently support columns with null values. "
f"Consider removing them with .dropna() "
f"or using .fillna()."
Expand Down Expand Up @@ -6340,154 +6341,15 @@ def _reduce(
elif axis == 1:
return self._apply_support_method_axis_1(op, **kwargs)

def cummin(self, axis=None, skipna=True, *args, **kwargs):
"""
Return cumulative minimum of the DataFrame.

Parameters
----------

skipna: bool, default True
Exclude NA/null values. If an entire row/column is NA,
the result will be NA.

Returns
-------
DataFrame

Notes
-----
Parameters currently not supported is `axis`

Examples
--------
>>> import cudf
>>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
>>> df.cummin()
a b
0 1 7
1 1 7
2 1 7
3 1 7
"""
if axis not in (0, "index", None):
raise NotImplementedError("Only axis=0 is currently supported.")

return self._apply_support_method(
"cummin", axis=axis, skipna=skipna, *args, **kwargs
)

def cummax(self, axis=None, skipna=True, *args, **kwargs):
"""
Return cumulative maximum of the DataFrame.

Parameters
----------

skipna: bool, default True
Exclude NA/null values. If an entire row/column is NA,
the result will be NA.

Returns
-------
DataFrame

Notes
-----
Parameters currently not supported is `axis`

Examples
--------
>>> import cudf
>>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
>>> df.cummax()
a b
0 1 7
1 2 8
2 3 9
3 4 10
"""
if axis not in (0, "index", None):
raise NotImplementedError("Only axis=0 is currently supported.")

return self._apply_support_method(
"cummax", axis=axis, skipna=skipna, *args, **kwargs
)

def cumsum(self, axis=None, skipna=True, *args, **kwargs):
"""
Return cumulative sum of the DataFrame.

Parameters
----------

skipna: bool, default True
Exclude NA/null values. If an entire row/column is NA,
the result will be NA.


Returns
-------
DataFrame

Notes
-----
Parameters currently not supported is `axis`

Examples
--------
>>> import cudf
>>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
>>> s.cumsum()
a b
0 1 7
1 3 15
2 6 24
3 10 34
"""
if axis not in (0, "index", None):
raise NotImplementedError("Only axis=0 is currently supported.")

return self._apply_support_method(
"cumsum", axis=axis, skipna=skipna, *args, **kwargs
)

def cumprod(self, axis=None, skipna=True, *args, **kwargs):
"""
Return cumulative product of the DataFrame.

Parameters
----------

skipna: bool, default True
Exclude NA/null values. If an entire row/column is NA,
the result will be NA.

Returns
-------
DataFrame

Notes
-----
Parameters currently not supported is `axis`

Examples
--------
>>> import cudf
>>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
>>> s.cumprod()
a b
0 1 7
1 2 56
2 6 504
3 24 5040
"""
if axis not in (0, "index", None):
raise NotImplementedError("Only axis=0 is currently supported.")
def _scan(
self, op, axis=None, *args, **kwargs,
):
axis = self._get_axis_from_axis_arg(axis)

return self._apply_support_method(
"cumprod", axis=axis, skipna=skipna, *args, **kwargs
)
if axis == 0:
return super()._scan(op, axis=axis, *args, **kwargs)
elif axis == 1:
return self._apply_support_method_axis_1(f"cum{op}", **kwargs)

def mode(self, axis=0, numeric_only=False, dropna=True):
"""
Expand Down Expand Up @@ -6715,13 +6577,14 @@ def _apply_support_method_axis_0(self, method, *args, **kwargs):
def _apply_support_method_axis_1(self, method, *args, **kwargs):
# for dask metadata compatibility
skipna = kwargs.pop("skipna", None)
skipna = True if skipna is None else skipna
if method not in _cupy_nan_methods_map and skipna not in (
None,
True,
1,
):
raise NotImplementedError(
f"Row-wise operation to calculate '{method}'"
f"Row-wise operations to calculate '{method}'"
f" currently do not support `skipna=False`."
)

Expand Down Expand Up @@ -6750,6 +6613,11 @@ def _apply_support_method_axis_1(self, method, *args, **kwargs):
"Row-wise operations currently do not " "support `bool_only`."
)

# This parameter is only necessary for axis 0 reductions that cuDF
# performs internally. cupy already upcasts smaller integer/bool types
# to int64 when accumulating.
kwargs.pop("cast_to_int", None)

prepared, mask, common_dtype = self._prepare_for_rowwise_op(
method, skipna
)
Expand Down
Loading