Add_suffix and add_prefix for DataFrames and Series (#9846)

This PR fixes #9590, by adding `add_suffix` and `add_prefix` for `cudf.DataFrame` and `cudf.Series`. To make things concise, we unify the docstrings of these methods in both `Series` and `DataFrame` by defining them within `IndexedFrame` (with a unified docstring and raising `NotImplementedError`, asking the user to refer to the implementations in `Series` or `DataFrame`) Its preferred to raise `NotImplementedError` so that if someone later creates another class by inheriting from `IndexedFrame`, it clarifies that they must reimplement `add_suffix` and `add_prefix` Authors: - Mayank Anand (https://github.com/mayankanand007) Approvers: - Charles Blackmon-Luca (https://github.com/charlesbluca) - GALI PREM SAGAR (https://github.com/galipremsagar) - https://github.com/brandon-b-miller URL: #9846
rapidsai · Dec 8, 2021 · 4579d23 · 4579d23
1 parent ffc6241
commit 4579d23
Show file tree

Hide file tree

Showing 7 changed files with 186 additions and 0 deletions.
diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst
@@ -154,6 +154,8 @@ Reindexing / selection / label manipulation
 .. autosummary::
    :toctree: api/
 
+   DataFrame.add_prefix
+   DataFrame.add_suffix
    DataFrame.drop
    DataFrame.drop_duplicates
    DataFrame.equals

diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst
@@ -162,6 +162,8 @@ Reindexing / selection / label manipulation
 .. autosummary::
    :toctree: api/
 
+   Series.add_prefix
+   Series.add_suffix
    Series.drop
    Series.drop_duplicates
    Series.equals

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
@@ -3040,6 +3040,20 @@ def rename(
         else:
             return out.copy(deep=copy)
 
+    def add_prefix(self, prefix):
+        out = self.copy(deep=True)
+        out.columns = [
+            prefix + col_name for col_name in list(self._data.keys())
+        ]
+        return out
+
+    def add_suffix(self, suffix):
+        out = self.copy(deep=True)
+        out.columns = [
+            col_name + suffix for col_name in list(self._data.keys())
+        ]
+        return out
+
     def as_gpu_matrix(self, columns=None, order="F"):
         warnings.warn(
             "The as_gpu_matrix method will be removed in a future cuDF "

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
@@ -544,6 +544,124 @@ def drop_duplicates(
         result._copy_type_metadata(self)
         return result
 
+    def add_prefix(self, prefix):
+        """
+        Prefix labels with string `prefix`.
+
+        For Series, the row labels are prefixed.
+        For DataFrame, the column labels are prefixed.
+
+        Parameters
+        ----------
+        prefix : str
+            The string to add before each label.
+
+        Returns
+        -------
+        Series or DataFrame
+            New Series with updated labels or DataFrame with updated labels.
+
+        See Also
+        --------
+        Series.add_suffix: Suffix row labels with string 'suffix'.
+        DataFrame.add_suffix: Suffix column labels with string 'suffix'.
+
+        Examples
+        --------
+        **Series**
+        >>> s = cudf.Series([1, 2, 3, 4])
+        >>> s
+        0    1
+        1    2
+        2    3
+        3    4
+        dtype: int64
+        >>> s.add_prefix('item_')
+        item_0    1
+        item_1    2
+        item_2    3
+        item_3    4
+        dtype: int64
+
+        **DataFrame**
+        >>> df = cudf.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
+        >>> df
+           A  B
+        0  1  3
+        1  2  4
+        2  3  5
+        3  4  6
+        >>> df.add_prefix('col_')
+             col_A  col_B
+        0       1       3
+        1       2       4
+        2       3       5
+        3       4       6
+        """
+        raise NotImplementedError(
+            "`IndexedFrame.add_prefix` not currently implemented. \
+                Use `Series.add_prefix` or `DataFrame.add_prefix`"
+        )
+
+    def add_suffix(self, suffix):
+        """
+        Suffix labels with string `suffix`.
+
+        For Series, the row labels are suffixed.
+        For DataFrame, the column labels are suffixed.
+
+        Parameters
+        ----------
+        prefix : str
+            The string to add after each label.
+
+        Returns
+        -------
+        Series or DataFrame
+            New Series with updated labels or DataFrame with updated labels.
+
+        See Also
+        --------
+        Series.add_prefix: prefix row labels with string 'prefix'.
+        DataFrame.add_prefix: Prefix column labels with string 'prefix'.
+
+        Examples
+        --------
+        **Series**
+        >>> s = cudf.Series([1, 2, 3, 4])
+        >>> s
+        0    1
+        1    2
+        2    3
+        3    4
+        dtype: int64
+        >>> s.add_suffix('_item')
+        0_item    1
+        1_item    2
+        2_item    3
+        3_item    4
+        dtype: int64
+
+        **DataFrame**
+        >>> df = cudf.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
+        >>> df
+           A  B
+        0  1  3
+        1  2  4
+        2  3  5
+        3  4  6
+        >>> df.add_suffix('_col')
+             A_col  B_col
+        0       1       3
+        1       2       4
+        2       3       5
+        3       4       6
+        """
+        raise NotImplementedError(
+            "`IndexedFrame.add_suffix` not currently implemented. \
+                Use `Series.add_suffix` or `DataFrame.add_suffix`"
+        )
+
     def sort_values(
         self,
         by,

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
@@ -3694,6 +3694,16 @@ def merge(
 
         return result
 
+    def add_prefix(self, prefix):
+        result = self.copy(deep=True)
+        result.index = prefix + self.index.astype(str)
+        return result
+
+    def add_suffix(self, suffix):
+        result = self.copy(deep=True)
+        result.index = self.index.astype(str) + suffix
+        return result
+
     def keys(self):
         """
         Return alias for index.

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
@@ -9039,3 +9039,23 @@ def test_pearson_corr_multiindex_dataframe():
     expected = gdf.to_pandas().groupby(level="a").corr("pearson")
 
     assert_eq(expected, actual)
+
+
+def test_dataframe_add_prefix():
+    cdf = cudf.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]})
+    pdf = cdf.to_pandas()
+
+    got = cdf.add_prefix("item_")
+    expected = pdf.add_prefix("item_")
+
+    assert_eq(got, expected)
+
+
+def test_dataframe_add_suffix():
+    cdf = cudf.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]})
+    pdf = cdf.to_pandas()
+
+    got = cdf.add_suffix("_item")
+    expected = pdf.add_suffix("_item")
+
+    assert_eq(got, expected)
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
@@ -1348,6 +1348,26 @@ def test_nullable_bool_dtype_series(data, bool_dtype):
     assert_eq(psr, gsr.to_pandas(nullable=True))
 
 
+def test_series_add_prefix():
+    cd_s = cudf.Series([1, 2, 3, 4])
+    pd_s = cd_s.to_pandas()
+
+    got = cd_s.add_prefix("item_")
+    expected = pd_s.add_prefix("item_")
+
+    assert_eq(got, expected)
+
+
+def test_series_add_suffix():
+    cd_s = cudf.Series([1, 2, 3, 4])
+    pd_s = cd_s.to_pandas()
+
+    got = cd_s.add_suffix("_item")
+    expected = pd_s.add_suffix("_item")
+
+    assert_eq(got, expected)
+
+
 @pytest.mark.parametrize(
     "cudf_series",
     [