Skip to content

Commit

Permalink
Align Series APIs with pandas 2.x (#16333)
Browse files Browse the repository at this point in the history
Similar to #16310, the follow APIs have been modified to adjust/add parameters

* `reindex`
* `reset_index`
* `add_suffix`
* `searchsorted`
* `clip`
* `mask`
* `shift`
* `dropna`
* `rename`
* `cov`
* `apply`
* `replace`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #16333
  • Loading branch information
mroeschke authored Jul 22, 2024
1 parent 852b151 commit 135c995
Show file tree
Hide file tree
Showing 4 changed files with 240 additions and 39 deletions.
19 changes: 17 additions & 2 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2844,6 +2844,10 @@ def reindex(
index=index,
inplace=False,
fill_value=fill_value,
level=level,
method=method,
limit=limit,
tolerance=tolerance,
)

@_performance_tracking
Expand Down Expand Up @@ -3187,7 +3191,14 @@ class speed type
)
)
def reset_index(
self, level=None, drop=False, inplace=False, col_level=0, col_fill=""
self,
level=None,
drop=False,
inplace=False,
col_level=0,
col_fill="",
allow_duplicates: bool = False,
names: abc.Hashable | abc.Sequence[abc.Hashable] | None = None,
):
return self._mimic_inplace(
DataFrame._from_data(
Expand All @@ -3196,6 +3207,8 @@ def reset_index(
drop=drop,
col_level=col_level,
col_fill=col_fill,
allow_duplicates=allow_duplicates,
names=names,
)
),
inplace=inplace,
Expand Down Expand Up @@ -3666,7 +3679,9 @@ def add_prefix(self, prefix, axis=None):
return out

@_performance_tracking
def add_suffix(self, suffix):
def add_suffix(self, suffix, axis=None):
if axis is not None:
raise NotImplementedError("axis is currently not implemented.")
# TODO: Change to deep=False when copy-on-write is default
out = self.copy(deep=True)
out.columns = [
Expand Down
9 changes: 8 additions & 1 deletion python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1187,6 +1187,7 @@ def searchsorted(
self,
values,
side: Literal["left", "right"] = "left",
sorter=None,
ascending: bool = True,
na_position: Literal["first", "last"] = "last",
) -> ScalarLike | cupy.ndarray:
Expand All @@ -1199,6 +1200,10 @@ def searchsorted(
side : str {'left', 'right'} optional, default 'left'
If 'left', the index of the first suitable location found is given
If 'right', return the last such index
sorter : 1-D array-like, optional
Optional array of integer indices that sort `self` into ascending
order. They are typically the result of ``np.argsort``.
Currently not supported.
ascending : bool optional, default True
Sorted Frame is in ascending order (otherwise descending)
na_position : str {'last', 'first'} optional, default 'last'
Expand Down Expand Up @@ -1245,10 +1250,12 @@ def searchsorted(
>>> df.searchsorted(values_df, ascending=False)
array([4, 4, 4, 0], dtype=int32)
"""
# Call libcudf search_sorted primitive
# Note: pandas.DataFrame does not support searchsorted

if na_position not in {"first", "last"}:
raise ValueError(f"invalid na_position: {na_position}")
elif sorter is not None:
raise NotImplementedError("sorter is currently not supported.")

scalar_flag = None
if is_scalar(values):
Expand Down
87 changes: 78 additions & 9 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@
{argument}
inplace : bool, default False
Modify the DataFrame in place (do not create a new object).
allow_duplicates : bool, default False
Allow duplicate column labels to be created.
Currently not supported.
Returns
-------
Expand Down Expand Up @@ -902,7 +905,7 @@ def replace(
return self._mimic_inplace(result, inplace=inplace)

@_performance_tracking
def clip(self, lower=None, upper=None, inplace=False, axis=1):
def clip(self, lower=None, upper=None, axis=1, inplace=False):
"""
Trim values at input threshold(s).
Expand Down Expand Up @@ -1779,7 +1782,14 @@ def skew(self, axis=0, skipna=True, numeric_only=False, **kwargs):
)

@_performance_tracking
def mask(self, cond, other=None, inplace: bool = False) -> Self | None:
def mask(
self,
cond,
other=None,
inplace: bool = False,
axis=None,
level=None,
) -> Self | None:
"""
Replace values where the condition is True.
Expand Down Expand Up @@ -1831,6 +1841,10 @@ def mask(self, cond, other=None, inplace: bool = False) -> Self | None:
4 0
dtype: int64
"""
if axis is not None:
raise NotImplementedError("axis is not supported.")
elif level is not None:
raise NotImplementedError("level is not supported.")

if not hasattr(cond, "__invert__"):
# We Invert `cond` below and call `where`, so
Expand Down Expand Up @@ -2042,13 +2056,26 @@ def interpolate(
)

@_performance_tracking
def shift(self, periods=1, freq=None, axis=0, fill_value=None):
def shift(
self,
periods=1,
freq=None,
axis=0,
fill_value=None,
suffix: str | None = None,
):
"""Shift values by `periods` positions."""
axis = self._get_axis_from_axis_arg(axis)
if axis != 0:
raise ValueError("Only axis=0 is supported.")
raise NotImplementedError("Only axis=0 is supported.")
if freq is not None:
raise ValueError("The freq argument is not yet supported.")
raise NotImplementedError(
"The freq argument is not yet supported."
)
if suffix is not None:
raise NotImplementedError(
"The suffix argument is not yet supported."
)

data_columns = (
col.shift(periods, fill_value) for col in self._columns
Expand Down Expand Up @@ -3225,14 +3252,19 @@ def _split(self, splits, keep_index=True):
]

@_performance_tracking
def bfill(self, value=None, axis=None, inplace=None, limit=None):
def bfill(
self, value=None, axis=None, inplace=None, limit=None, limit_area=None
):
"""
Synonym for :meth:`Series.fillna` with ``method='bfill'``.
Returns
-------
Object with missing values filled or None if ``inplace=True``.
"""
if limit_area is not None:
raise NotImplementedError("limit_area is currently not supported.")

with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
return self.fillna(
Expand Down Expand Up @@ -3264,14 +3296,24 @@ def backfill(self, value=None, axis=None, inplace=None, limit=None):
return self.bfill(value=value, axis=axis, inplace=inplace, limit=limit)

@_performance_tracking
def ffill(self, value=None, axis=None, inplace=None, limit=None):
def ffill(
self,
value=None,
axis=None,
inplace=None,
limit=None,
limit_area: Literal["inside", "outside", None] = None,
):
"""
Synonym for :meth:`Series.fillna` with ``method='ffill'``.
Returns
-------
Object with missing values filled or None if ``inplace=True``.
"""
if limit_area is not None:
raise NotImplementedError("limit_area is currently not supported.")

with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
return self.fillna(
Expand Down Expand Up @@ -3363,7 +3405,7 @@ def add_prefix(self, prefix, axis=None):
Use `Series.add_prefix` or `DataFrame.add_prefix`"
)

def add_suffix(self, suffix):
def add_suffix(self, suffix, axis=None):
"""
Suffix labels with string `suffix`.
Expand Down Expand Up @@ -3653,6 +3695,10 @@ def _reindex(
index=None,
inplace=False,
fill_value=NA,
level=None,
method=None,
limit=None,
tolerance=None,
):
"""
Helper for `.reindex`
Expand All @@ -3677,6 +3723,15 @@ def _reindex(
-------
Series or DataFrame
"""
if method is not None:
raise NotImplementedError("method is not currently supported.")
if level is not None:
raise NotImplementedError("level is not currently supported.")
if limit is not None:
raise NotImplementedError("limit is not currently supported.")
if tolerance is not None:
raise NotImplementedError("tolerance is not currently supported.")

if dtypes is None:
dtypes = {}

Expand Down Expand Up @@ -4303,8 +4358,22 @@ def take(self, indices, axis=0):

return self._gather(GatherMap(indices, len(self), nullify=False))

def _reset_index(self, level, drop, col_level=0, col_fill=""):
def _reset_index(
self,
level,
drop,
col_level=0,
col_fill="",
allow_duplicates: bool = False,
names: abc.Hashable | abc.Sequence[abc.Hashable] | None = None,
):
"""Shared path for DataFrame.reset_index and Series.reset_index."""
if allow_duplicates is not False:
raise NotImplementedError(
"allow_duplicates is not currently supported."
)
elif names is not None:
raise NotImplementedError("names is not currently supported.")
if level is not None:
if (
isinstance(level, int)
Expand Down
Loading

0 comments on commit 135c995

Please sign in to comment.