From 4579d237ad93d2416a1441b7661b029cfb9a5c10 Mon Sep 17 00:00:00 2001 From: Mayank Anand <36782063+mayankanand007@users.noreply.github.com> Date: Wed, 8 Dec 2021 14:06:19 -0500 Subject: [PATCH] Add_suffix and add_prefix for DataFrames and Series (#9846) This PR fixes #9590, by adding `add_suffix` and `add_prefix` for `cudf.DataFrame` and `cudf.Series`. To make things concise, we unify the docstrings of these methods in both `Series` and `DataFrame` by defining them within `IndexedFrame` (with a unified docstring and raising `NotImplementedError`, asking the user to refer to the implementations in `Series` or `DataFrame`) Its preferred to raise `NotImplementedError` so that if someone later creates another class by inheriting from `IndexedFrame`, it clarifies that they must reimplement `add_suffix` and `add_prefix` Authors: - Mayank Anand (https://github.com/mayankanand007) Approvers: - Charles Blackmon-Luca (https://github.com/charlesbluca) - GALI PREM SAGAR (https://github.com/galipremsagar) - https://github.com/brandon-b-miller URL: https://github.com/rapidsai/cudf/pull/9846 --- docs/cudf/source/api_docs/dataframe.rst | 2 + docs/cudf/source/api_docs/series.rst | 2 + python/cudf/cudf/core/dataframe.py | 14 +++ python/cudf/cudf/core/indexed_frame.py | 118 +++++++++++++++++++++++ python/cudf/cudf/core/series.py | 10 ++ python/cudf/cudf/tests/test_dataframe.py | 20 ++++ python/cudf/cudf/tests/test_series.py | 20 ++++ 7 files changed, 186 insertions(+) diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst index 76026c23d50..94f88a40ea5 100644 --- a/docs/cudf/source/api_docs/dataframe.rst +++ b/docs/cudf/source/api_docs/dataframe.rst @@ -154,6 +154,8 @@ Reindexing / selection / label manipulation .. autosummary:: :toctree: api/ + DataFrame.add_prefix + DataFrame.add_suffix DataFrame.drop DataFrame.drop_duplicates DataFrame.equals diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index d234dfc4bcb..a3b17926bdd 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -162,6 +162,8 @@ Reindexing / selection / label manipulation .. autosummary:: :toctree: api/ + Series.add_prefix + Series.add_suffix Series.drop Series.drop_duplicates Series.equals diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 279b1f44961..bbe691595e7 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -3040,6 +3040,20 @@ def rename( else: return out.copy(deep=copy) + def add_prefix(self, prefix): + out = self.copy(deep=True) + out.columns = [ + prefix + col_name for col_name in list(self._data.keys()) + ] + return out + + def add_suffix(self, suffix): + out = self.copy(deep=True) + out.columns = [ + col_name + suffix for col_name in list(self._data.keys()) + ] + return out + def as_gpu_matrix(self, columns=None, order="F"): warnings.warn( "The as_gpu_matrix method will be removed in a future cuDF " diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 2044bad9675..9625231a6ef 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -544,6 +544,124 @@ def drop_duplicates( result._copy_type_metadata(self) return result + def add_prefix(self, prefix): + """ + Prefix labels with string `prefix`. + + For Series, the row labels are prefixed. + For DataFrame, the column labels are prefixed. + + Parameters + ---------- + prefix : str + The string to add before each label. + + Returns + ------- + Series or DataFrame + New Series with updated labels or DataFrame with updated labels. + + See Also + -------- + Series.add_suffix: Suffix row labels with string 'suffix'. + DataFrame.add_suffix: Suffix column labels with string 'suffix'. + + Examples + -------- + **Series** + >>> s = cudf.Series([1, 2, 3, 4]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + >>> s.add_prefix('item_') + item_0 1 + item_1 2 + item_2 3 + item_3 4 + dtype: int64 + + **DataFrame** + >>> df = cudf.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) + >>> df + A B + 0 1 3 + 1 2 4 + 2 3 5 + 3 4 6 + >>> df.add_prefix('col_') + col_A col_B + 0 1 3 + 1 2 4 + 2 3 5 + 3 4 6 + """ + raise NotImplementedError( + "`IndexedFrame.add_prefix` not currently implemented. \ + Use `Series.add_prefix` or `DataFrame.add_prefix`" + ) + + def add_suffix(self, suffix): + """ + Suffix labels with string `suffix`. + + For Series, the row labels are suffixed. + For DataFrame, the column labels are suffixed. + + Parameters + ---------- + prefix : str + The string to add after each label. + + Returns + ------- + Series or DataFrame + New Series with updated labels or DataFrame with updated labels. + + See Also + -------- + Series.add_prefix: prefix row labels with string 'prefix'. + DataFrame.add_prefix: Prefix column labels with string 'prefix'. + + Examples + -------- + **Series** + >>> s = cudf.Series([1, 2, 3, 4]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + >>> s.add_suffix('_item') + 0_item 1 + 1_item 2 + 2_item 3 + 3_item 4 + dtype: int64 + + **DataFrame** + >>> df = cudf.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) + >>> df + A B + 0 1 3 + 1 2 4 + 2 3 5 + 3 4 6 + >>> df.add_suffix('_col') + A_col B_col + 0 1 3 + 1 2 4 + 2 3 5 + 3 4 6 + """ + raise NotImplementedError( + "`IndexedFrame.add_suffix` not currently implemented. \ + Use `Series.add_suffix` or `DataFrame.add_suffix`" + ) + def sort_values( self, by, diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 3aae79af4e8..140c68d4ce0 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -3694,6 +3694,16 @@ def merge( return result + def add_prefix(self, prefix): + result = self.copy(deep=True) + result.index = prefix + self.index.astype(str) + return result + + def add_suffix(self, suffix): + result = self.copy(deep=True) + result.index = self.index.astype(str) + suffix + return result + def keys(self): """ Return alias for index. diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index d555b5c4033..c40f9f0b0a5 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9039,3 +9039,23 @@ def test_pearson_corr_multiindex_dataframe(): expected = gdf.to_pandas().groupby(level="a").corr("pearson") assert_eq(expected, actual) + + +def test_dataframe_add_prefix(): + cdf = cudf.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]}) + pdf = cdf.to_pandas() + + got = cdf.add_prefix("item_") + expected = pdf.add_prefix("item_") + + assert_eq(got, expected) + + +def test_dataframe_add_suffix(): + cdf = cudf.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]}) + pdf = cdf.to_pandas() + + got = cdf.add_suffix("_item") + expected = pdf.add_suffix("_item") + + assert_eq(got, expected) diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index d59e3ba7571..f214e54c57e 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -1348,6 +1348,26 @@ def test_nullable_bool_dtype_series(data, bool_dtype): assert_eq(psr, gsr.to_pandas(nullable=True)) +def test_series_add_prefix(): + cd_s = cudf.Series([1, 2, 3, 4]) + pd_s = cd_s.to_pandas() + + got = cd_s.add_prefix("item_") + expected = pd_s.add_prefix("item_") + + assert_eq(got, expected) + + +def test_series_add_suffix(): + cd_s = cudf.Series([1, 2, 3, 4]) + pd_s = cd_s.to_pandas() + + got = cd_s.add_suffix("_item") + expected = pd_s.add_suffix("_item") + + assert_eq(got, expected) + + @pytest.mark.parametrize( "cudf_series", [