Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Align Series APIs with pandas 2.x #16333

Merged
merged 5 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2841,6 +2841,10 @@ def reindex(
index=index,
inplace=False,
fill_value=fill_value,
level=level,
method=method,
limit=limit,
tolerance=tolerance,
)

@_performance_tracking
Expand Down Expand Up @@ -3184,7 +3188,14 @@ class speed type
)
)
def reset_index(
self, level=None, drop=False, inplace=False, col_level=0, col_fill=""
self,
level=None,
drop=False,
inplace=False,
col_level=0,
col_fill="",
allow_duplicates: bool = False,
names: abc.Hashable | abc.Sequence[abc.Hashable] | None = None,
):
return self._mimic_inplace(
DataFrame._from_data(
Expand All @@ -3193,6 +3204,8 @@ def reset_index(
drop=drop,
col_level=col_level,
col_fill=col_fill,
allow_duplicates=allow_duplicates,
names=names,
)
),
inplace=inplace,
Expand Down Expand Up @@ -3663,7 +3676,9 @@ def add_prefix(self, prefix, axis=None):
return out

@_performance_tracking
def add_suffix(self, suffix):
def add_suffix(self, suffix, axis=None):
if axis is not None:
raise NotImplementedError("axis is currently not implemented.")
# TODO: Change to deep=False when copy-on-write is default
out = self.copy(deep=True)
out.columns = [
Expand Down
9 changes: 8 additions & 1 deletion python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1187,6 +1187,7 @@ def searchsorted(
self,
values,
side: Literal["left", "right"] = "left",
sorter=None,
ascending: bool = True,
na_position: Literal["first", "last"] = "last",
) -> ScalarLike | cupy.ndarray:
Expand All @@ -1199,6 +1200,10 @@ def searchsorted(
side : str {'left', 'right'} optional, default 'left'
If 'left', the index of the first suitable location found is given
If 'right', return the last such index
sorter : 1-D array-like, optional
Optional array of integer indices that sort `self` into ascending
order. They are typically the result of ``np.argsort``.
Currently not supported.
ascending : bool optional, default True
Sorted Frame is in ascending order (otherwise descending)
na_position : str {'last', 'first'} optional, default 'last'
Expand Down Expand Up @@ -1245,10 +1250,12 @@ def searchsorted(
>>> df.searchsorted(values_df, ascending=False)
array([4, 4, 4, 0], dtype=int32)
"""
# Call libcudf search_sorted primitive
# Note: pandas.DataFrame does not support searchsorted

if na_position not in {"first", "last"}:
raise ValueError(f"invalid na_position: {na_position}")
elif sorter is not None:
raise NotImplementedError("sorter is currently not supported.")

scalar_flag = None
if is_scalar(values):
Expand Down
87 changes: 78 additions & 9 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@
{argument}
inplace : bool, default False
Modify the DataFrame in place (do not create a new object).
allow_duplicates : bool, default False
Allow duplicate column labels to be created.
Currently not supported.

Returns
-------
Expand Down Expand Up @@ -902,7 +905,7 @@ def replace(
return self._mimic_inplace(result, inplace=inplace)

@_performance_tracking
def clip(self, lower=None, upper=None, inplace=False, axis=1):
def clip(self, lower=None, upper=None, axis=1, inplace=False):
"""
Trim values at input threshold(s).

Expand Down Expand Up @@ -1779,7 +1782,14 @@ def skew(self, axis=0, skipna=True, numeric_only=False, **kwargs):
)

@_performance_tracking
def mask(self, cond, other=None, inplace: bool = False) -> Self | None:
def mask(
self,
cond,
other=None,
inplace: bool = False,
axis=None,
level=None,
) -> Self | None:
"""
Replace values where the condition is True.

Expand Down Expand Up @@ -1831,6 +1841,10 @@ def mask(self, cond, other=None, inplace: bool = False) -> Self | None:
4 0
dtype: int64
"""
if axis is not None:
raise NotImplementedError("axis is not supported.")
elif level is not None:
raise NotImplementedError("level is not supported.")

if not hasattr(cond, "__invert__"):
# We Invert `cond` below and call `where`, so
Expand Down Expand Up @@ -2042,13 +2056,26 @@ def interpolate(
)

@_performance_tracking
def shift(self, periods=1, freq=None, axis=0, fill_value=None):
def shift(
self,
periods=1,
freq=None,
axis=0,
fill_value=None,
suffix: str | None = None,
):
"""Shift values by `periods` positions."""
axis = self._get_axis_from_axis_arg(axis)
if axis != 0:
raise ValueError("Only axis=0 is supported.")
raise NotImplementedError("Only axis=0 is supported.")
if freq is not None:
raise ValueError("The freq argument is not yet supported.")
raise NotImplementedError(
"The freq argument is not yet supported."
)
if suffix is not None:
raise NotImplementedError(
"The suffix argument is not yet supported."
)

data_columns = (
col.shift(periods, fill_value) for col in self._columns
Expand Down Expand Up @@ -3225,14 +3252,19 @@ def _split(self, splits, keep_index=True):
]

@_performance_tracking
def bfill(self, value=None, axis=None, inplace=None, limit=None):
def bfill(
self, value=None, axis=None, inplace=None, limit=None, limit_area=None
):
"""
Synonym for :meth:`Series.fillna` with ``method='bfill'``.

Returns
-------
Object with missing values filled or None if ``inplace=True``.
"""
if limit_area is not None:
raise NotImplementedError("limit_area is currently not supported.")

with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
return self.fillna(
Expand Down Expand Up @@ -3264,14 +3296,24 @@ def backfill(self, value=None, axis=None, inplace=None, limit=None):
return self.bfill(value=value, axis=axis, inplace=inplace, limit=limit)

@_performance_tracking
def ffill(self, value=None, axis=None, inplace=None, limit=None):
def ffill(
self,
value=None,
axis=None,
inplace=None,
limit=None,
limit_area: Literal["inside", "outside", None] = None,
):
"""
Synonym for :meth:`Series.fillna` with ``method='ffill'``.

Returns
-------
Object with missing values filled or None if ``inplace=True``.
"""
if limit_area is not None:
raise NotImplementedError("limit_area is currently not supported.")

with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
return self.fillna(
Expand Down Expand Up @@ -3363,7 +3405,7 @@ def add_prefix(self, prefix, axis=None):
Use `Series.add_prefix` or `DataFrame.add_prefix`"
)

def add_suffix(self, suffix):
def add_suffix(self, suffix, axis=None):
"""
Suffix labels with string `suffix`.

Expand Down Expand Up @@ -3653,6 +3695,10 @@ def _reindex(
index=None,
inplace=False,
fill_value=NA,
level=None,
method=None,
limit=None,
tolerance=None,
):
"""
Helper for `.reindex`
Expand All @@ -3677,6 +3723,15 @@ def _reindex(
-------
Series or DataFrame
"""
if method is not None:
raise NotImplementedError("method is not currently supported.")
if level is not None:
raise NotImplementedError("level is not currently supported.")
if limit is not None:
raise NotImplementedError("limit is not currently supported.")
if tolerance is not None:
raise NotImplementedError("tolerance is not currently supported.")

if dtypes is None:
dtypes = {}

Expand Down Expand Up @@ -4303,8 +4358,22 @@ def take(self, indices, axis=0):

return self._gather(GatherMap(indices, len(self), nullify=False))

def _reset_index(self, level, drop, col_level=0, col_fill=""):
def _reset_index(
self,
level,
drop,
col_level=0,
col_fill="",
allow_duplicates: bool = False,
names: abc.Hashable | abc.Sequence[abc.Hashable] | None = None,
):
"""Shared path for DataFrame.reset_index and Series.reset_index."""
if allow_duplicates is not False:
raise NotImplementedError(
"allow_duplicates is not currently supported."
)
elif names is not None:
raise NotImplementedError("names is not currently supported.")
if level is not None:
if (
isinstance(level, int)
Expand Down
Loading
Loading