From 2b2058de941289ca343cb1d3a3eb143a84998dfd Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Thu, 11 Jul 2024 07:19:12 -0400 Subject: [PATCH] Add custom name setter and getter for proxy objects in `cudf.pandas` (#16234) Closes #14524 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/16234 --- python/cudf/cudf/pandas/_wrappers/pandas.py | 50 +++++++++++++++++-- .../cudf_pandas_tests/test_cudf_pandas.py | 40 +++++++++++++++ 2 files changed, 87 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index 3f94fc18980..d3a3488081a 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -260,6 +260,23 @@ def Index__new__(cls, *args, **kwargs): return self +def name(self): + return self._fsproxy_wrapped._name + + +def Index__setattr__(self, name, value): + if name.startswith("_"): + object.__setattr__(self, name, value) + return + if name == "name": + setattr(self._fsproxy_wrapped, "_name", value) + if name == "names": + setattr(self._fsproxy_wrapped, "_names", value) + return _FastSlowAttribute("__setattr__").__get__(self, type(self))( + name, value + ) + + Index = make_final_proxy_type( "Index", cudf.Index, @@ -277,11 +294,13 @@ def Index__new__(cls, *args, **kwargs): "__iter__": custom_iter, "__init__": _DELETE, "__new__": Index__new__, + "__setattr__": Index__setattr__, "_constructor": _FastSlowAttribute("_constructor"), "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"), "_accessors": set(), "_data": _FastSlowAttribute("_data", private=True), "_mask": _FastSlowAttribute("_mask", private=True), + "name": property(name), }, ) @@ -292,7 +311,11 @@ def Index__new__(cls, *args, **kwargs): fast_to_slow=lambda fast: fast.to_pandas(), slow_to_fast=cudf.from_pandas, bases=(Index,), - additional_attributes={"__init__": _DELETE}, + additional_attributes={ + "__init__": _DELETE, + "__setattr__": Index__setattr__, + "name": property(name), + }, ) SparseDtype = make_final_proxy_type( @@ -319,7 +342,11 @@ def Index__new__(cls, *args, **kwargs): fast_to_slow=lambda fast: fast.to_pandas(), slow_to_fast=cudf.from_pandas, bases=(Index,), - additional_attributes={"__init__": _DELETE}, + additional_attributes={ + "__init__": _DELETE, + "__setattr__": Index__setattr__, + "name": property(name), + }, ) Categorical = make_final_proxy_type( @@ -350,6 +377,8 @@ def Index__new__(cls, *args, **kwargs): "__init__": _DELETE, "_data": _FastSlowAttribute("_data", private=True), "_mask": _FastSlowAttribute("_mask", private=True), + "__setattr__": Index__setattr__, + "name": property(name), }, ) @@ -385,6 +414,8 @@ def Index__new__(cls, *args, **kwargs): "__init__": _DELETE, "_data": _FastSlowAttribute("_data", private=True), "_mask": _FastSlowAttribute("_mask", private=True), + "__setattr__": Index__setattr__, + "name": property(name), }, ) @@ -441,6 +472,8 @@ def Index__new__(cls, *args, **kwargs): "__init__": _DELETE, "_data": _FastSlowAttribute("_data", private=True), "_mask": _FastSlowAttribute("_mask", private=True), + "__setattr__": Index__setattr__, + "name": property(name), }, ) @@ -474,6 +507,11 @@ def Index__new__(cls, *args, **kwargs): additional_attributes={"__hash__": _FastSlowAttribute("__hash__")}, ) + +def names(self): + return self._fsproxy_wrapped._names + + MultiIndex = make_final_proxy_type( "MultiIndex", cudf.MultiIndex, @@ -481,7 +519,11 @@ def Index__new__(cls, *args, **kwargs): fast_to_slow=lambda fast: fast.to_pandas(), slow_to_fast=cudf.from_pandas, bases=(Index,), - additional_attributes={"__init__": _DELETE}, + additional_attributes={ + "__init__": _DELETE, + "__setattr__": Index__setattr__, + "name": property(names), + }, ) TimeGrouper = make_intermediate_proxy_type( @@ -669,6 +711,8 @@ def Index__new__(cls, *args, **kwargs): "__init__": _DELETE, "_data": _FastSlowAttribute("_data", private=True), "_mask": _FastSlowAttribute("_mask", private=True), + "__setattr__": Index__setattr__, + "name": property(name), }, ) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index bc864a48e9d..6292022d8e4 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1592,3 +1592,43 @@ def test_at_setitem_empty(): df.at[0, "new"] = 2.0 expected = pd.DataFrame({"name": [1.0], "new": [2.0]}) tm.assert_frame_equal(df, expected) + + +@pytest.mark.parametrize( + "index", + [ + xpd.Index([1, 2, 3], name="foo"), + xpd.Index(["a", "b", "c"], name="foo"), + xpd.RangeIndex(start=0, stop=3, step=1, name="foo"), + xpd.CategoricalIndex(["a", "b", "a"], name="foo"), + xpd.DatetimeIndex( + ["2024-04-24", "2025-04-24", "2026-04-24"], name="foo" + ), + xpd.TimedeltaIndex(["1 days", "2 days", "3 days"], name="foo"), + xpd.PeriodIndex( + ["2024-06", "2023-06", "2022-06"], freq="M", name="foo" + ), + xpd.IntervalIndex.from_breaks([0, 1, 2, 3], name="foo"), + xpd.MultiIndex.from_tuples( + [(1, "a"), (2, "b"), (3, "c")], names=["foo1", "bar1"] + ), + ], +) +def test_change_index_name(index): + s = xpd.Series([1, 2, object()], index=index) + df = xpd.DataFrame({"values": [1, 2, object()]}, index=index) + + if isinstance(index, xpd.MultiIndex): + names = ["foo2", "bar2"] + s.index.names = names + df.index.names = names + + assert s.index.names == names + assert df.index.names == names + else: + name = "bar" + s.index.name = name + df.index.name = name + + assert s.index.name == name + assert df.index.name == name