Skip to content

Commit

Permalink
Add_suffix and add_prefix for DataFrames and Series (#9846)
Browse files Browse the repository at this point in the history
This PR fixes #9590, by adding `add_suffix` and `add_prefix` for `cudf.DataFrame` and `cudf.Series`.

To make things concise, we unify the docstrings of these methods in both `Series` and `DataFrame` by defining them within `IndexedFrame` (with a unified docstring and raising `NotImplementedError`, asking the user to refer to the implementations in `Series` or `DataFrame`)

Its preferred to raise `NotImplementedError` so that if someone later creates another class by inheriting from `IndexedFrame`, it clarifies that they must reimplement `add_suffix` and `add_prefix`

Authors:
  - Mayank Anand (https://github.com/mayankanand007)

Approvers:
  - Charles Blackmon-Luca (https://github.com/charlesbluca)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - https://github.com/brandon-b-miller

URL: #9846
  • Loading branch information
mayankanand007 authored Dec 8, 2021
1 parent ffc6241 commit 4579d23
Show file tree
Hide file tree
Showing 7 changed files with 186 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docs/cudf/source/api_docs/dataframe.rst
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ Reindexing / selection / label manipulation
.. autosummary::
:toctree: api/

DataFrame.add_prefix
DataFrame.add_suffix
DataFrame.drop
DataFrame.drop_duplicates
DataFrame.equals
Expand Down
2 changes: 2 additions & 0 deletions docs/cudf/source/api_docs/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ Reindexing / selection / label manipulation
.. autosummary::
:toctree: api/

Series.add_prefix
Series.add_suffix
Series.drop
Series.drop_duplicates
Series.equals
Expand Down
14 changes: 14 additions & 0 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3040,6 +3040,20 @@ def rename(
else:
return out.copy(deep=copy)

def add_prefix(self, prefix):
out = self.copy(deep=True)
out.columns = [
prefix + col_name for col_name in list(self._data.keys())
]
return out

def add_suffix(self, suffix):
out = self.copy(deep=True)
out.columns = [
col_name + suffix for col_name in list(self._data.keys())
]
return out

def as_gpu_matrix(self, columns=None, order="F"):
warnings.warn(
"The as_gpu_matrix method will be removed in a future cuDF "
Expand Down
118 changes: 118 additions & 0 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,124 @@ def drop_duplicates(
result._copy_type_metadata(self)
return result

def add_prefix(self, prefix):
"""
Prefix labels with string `prefix`.
For Series, the row labels are prefixed.
For DataFrame, the column labels are prefixed.
Parameters
----------
prefix : str
The string to add before each label.
Returns
-------
Series or DataFrame
New Series with updated labels or DataFrame with updated labels.
See Also
--------
Series.add_suffix: Suffix row labels with string 'suffix'.
DataFrame.add_suffix: Suffix column labels with string 'suffix'.
Examples
--------
**Series**
>>> s = cudf.Series([1, 2, 3, 4])
>>> s
0 1
1 2
2 3
3 4
dtype: int64
>>> s.add_prefix('item_')
item_0 1
item_1 2
item_2 3
item_3 4
dtype: int64
**DataFrame**
>>> df = cudf.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
>>> df
A B
0 1 3
1 2 4
2 3 5
3 4 6
>>> df.add_prefix('col_')
col_A col_B
0 1 3
1 2 4
2 3 5
3 4 6
"""
raise NotImplementedError(
"`IndexedFrame.add_prefix` not currently implemented. \
Use `Series.add_prefix` or `DataFrame.add_prefix`"
)

def add_suffix(self, suffix):
"""
Suffix labels with string `suffix`.
For Series, the row labels are suffixed.
For DataFrame, the column labels are suffixed.
Parameters
----------
prefix : str
The string to add after each label.
Returns
-------
Series or DataFrame
New Series with updated labels or DataFrame with updated labels.
See Also
--------
Series.add_prefix: prefix row labels with string 'prefix'.
DataFrame.add_prefix: Prefix column labels with string 'prefix'.
Examples
--------
**Series**
>>> s = cudf.Series([1, 2, 3, 4])
>>> s
0 1
1 2
2 3
3 4
dtype: int64
>>> s.add_suffix('_item')
0_item 1
1_item 2
2_item 3
3_item 4
dtype: int64
**DataFrame**
>>> df = cudf.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
>>> df
A B
0 1 3
1 2 4
2 3 5
3 4 6
>>> df.add_suffix('_col')
A_col B_col
0 1 3
1 2 4
2 3 5
3 4 6
"""
raise NotImplementedError(
"`IndexedFrame.add_suffix` not currently implemented. \
Use `Series.add_suffix` or `DataFrame.add_suffix`"
)

def sort_values(
self,
by,
Expand Down
10 changes: 10 additions & 0 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3694,6 +3694,16 @@ def merge(

return result

def add_prefix(self, prefix):
result = self.copy(deep=True)
result.index = prefix + self.index.astype(str)
return result

def add_suffix(self, suffix):
result = self.copy(deep=True)
result.index = self.index.astype(str) + suffix
return result

def keys(self):
"""
Return alias for index.
Expand Down
20 changes: 20 additions & 0 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -9039,3 +9039,23 @@ def test_pearson_corr_multiindex_dataframe():
expected = gdf.to_pandas().groupby(level="a").corr("pearson")

assert_eq(expected, actual)


def test_dataframe_add_prefix():
cdf = cudf.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]})
pdf = cdf.to_pandas()

got = cdf.add_prefix("item_")
expected = pdf.add_prefix("item_")

assert_eq(got, expected)


def test_dataframe_add_suffix():
cdf = cudf.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]})
pdf = cdf.to_pandas()

got = cdf.add_suffix("_item")
expected = pdf.add_suffix("_item")

assert_eq(got, expected)
20 changes: 20 additions & 0 deletions python/cudf/cudf/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1348,6 +1348,26 @@ def test_nullable_bool_dtype_series(data, bool_dtype):
assert_eq(psr, gsr.to_pandas(nullable=True))


def test_series_add_prefix():
cd_s = cudf.Series([1, 2, 3, 4])
pd_s = cd_s.to_pandas()

got = cd_s.add_prefix("item_")
expected = pd_s.add_prefix("item_")

assert_eq(got, expected)


def test_series_add_suffix():
cd_s = cudf.Series([1, 2, 3, 4])
pd_s = cd_s.to_pandas()

got = cd_s.add_suffix("_item")
expected = pd_s.add_suffix("_item")

assert_eq(got, expected)


@pytest.mark.parametrize(
"cudf_series",
[
Expand Down

0 comments on commit 4579d23

Please sign in to comment.