From 7edab792af9524f039d9638331e6850d1e4ee501 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 2 Jul 2024 10:01:30 -0700 Subject: [PATCH 1/5] Support at/iat indexers in cudf.pandas --- python/cudf/cudf/pandas/_wrappers/pandas.py | 18 ++++++++++++++++++ .../cudf/cudf_pandas_tests/test_cudf_pandas.py | 11 +++++++++++ 2 files changed, 29 insertions(+) diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index a64bf7772fe..1d8c13bbd2b 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -775,6 +775,24 @@ def Index__new__(cls, *args, **kwargs): pd.core.indexing._LocIndexer, ) +_SeriesLocIndexer = make_intermediate_proxy_type( + "_SeriesLocIndexer", + cudf.core.series._SeriesLocIndexer, + pd.core.indexing._LocIndexer, +) + +_AtIndexer = make_intermediate_proxy_type( + "_AtIndexer", + _Unusable(), + pd.core.indexing._AtIndexer, +) + +_iAtIndexer = make_intermediate_proxy_type( + "_iAtIndexer", + _Unusable(), + pd.core.indexing._iAtIndexer, +) + FixedForwardWindowIndexer = make_final_proxy_type( "FixedForwardWindowIndexer", _Unusable, diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index f51ce103677..6eca2ad4d6b 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1566,3 +1566,14 @@ def test_arrow_string_arrays(): ) tm.assert_equal(cu_arr, pd_arr) + + +@pytest.mark.parametrize("indexer", ["at", "iat"]) +def test_at_iat(indexer): + df = xpd.DataFrame(range(3)) + result = getattr(df, indexer)[0, 0] + assert result == 0 + + getattr(df, indexer)[0, 0] = 1 + expected = pd.DataFrame([1, 1, 2]) + tm.assert_frame_equal(df, expected) From 41be22e1b7c31bbd2d73fab238b17624758a192f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 2 Jul 2024 10:02:46 -0700 Subject: [PATCH 2/5] Remove redundant --- python/cudf/cudf/pandas/_wrappers/pandas.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index 1d8c13bbd2b..5d75d3b8306 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -775,12 +775,6 @@ def Index__new__(cls, *args, **kwargs): pd.core.indexing._LocIndexer, ) -_SeriesLocIndexer = make_intermediate_proxy_type( - "_SeriesLocIndexer", - cudf.core.series._SeriesLocIndexer, - pd.core.indexing._LocIndexer, -) - _AtIndexer = make_intermediate_proxy_type( "_AtIndexer", _Unusable(), From a43454cf6fa157ef1ca5779cdee587334e5b5f82 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 2 Jul 2024 17:49:56 -0700 Subject: [PATCH 3/5] Add test for at.__setitem__ with empty --- python/cudf/cudf_pandas_tests/test_cudf_pandas.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 6eca2ad4d6b..7ca00f9f004 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1577,3 +1577,11 @@ def test_at_iat(indexer): getattr(df, indexer)[0, 0] = 1 expected = pd.DataFrame([1, 1, 2]) tm.assert_frame_equal(df, expected) + + +def test_at_setitem_empty(): + df = xpd.DataFrame({"name": []}) + df.at[0, "name"] = 1.0 + df.at[0, "new"] = 2.0 + expected = pd.DataFrame({"name": [1.0], "new": [2.0]}) + tm.assert_frame_equal(df, expected) From 97a283c27d75785c9cc4d9a29ddfa868042bdeb3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 5 Jul 2024 12:40:36 -0700 Subject: [PATCH 4/5] Make alias for at/iat --- python/cudf/cudf/core/dataframe.py | 10 ++++++++-- python/cudf/cudf/pandas/_wrappers/pandas.py | 4 ++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index b249410c2e4..f21979771b7 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -462,6 +462,9 @@ def _setitem_tuple_arg(self, key, value): self._frame[col].loc[key[0]] = value[i] +_DataFrameAtIndexer = _DataFrameLocIndexer + + class _DataFrameIlocIndexer(_DataFrameIndexer): """ For selection by index. @@ -584,6 +587,9 @@ def _setitem_tuple_arg(self, key, value): self._frame[col].iloc[key[0]] = value[i] +_DataFrameiAtIndexer = _DataFrameIlocIndexer + + class DataFrame(IndexedFrame, Serializable, GetAttrGetItemMixin): """ A GPU Dataframe object. @@ -2581,14 +2587,14 @@ def iat(self): """ Alias for ``DataFrame.iloc``; provided for compatibility with Pandas. """ - return self.iloc + return _DataFrameiAtIndexer(self) @property def at(self): """ Alias for ``DataFrame.loc``; provided for compatibility with Pandas. """ - return self.loc + return _DataFrameAtIndexer(self) @property # type: ignore @_external_only_api( diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index 5d75d3b8306..dd6f6fe76ba 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -777,13 +777,13 @@ def Index__new__(cls, *args, **kwargs): _AtIndexer = make_intermediate_proxy_type( "_AtIndexer", - _Unusable(), + cudf.core.dataframe._DataFrameAtIndexer, pd.core.indexing._AtIndexer, ) _iAtIndexer = make_intermediate_proxy_type( "_iAtIndexer", - _Unusable(), + cudf.core.dataframe._DataFrameiAtIndexer, pd.core.indexing._iAtIndexer, ) From 420294a038ad5b9eed66a226cc477b287864e4ac Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 5 Jul 2024 15:44:15 -0700 Subject: [PATCH 5/5] Use inheritance and be explicit with types --- python/cudf/cudf/core/dataframe.py | 6 ++++-- python/cudf/cudf_pandas_tests/test_cudf_pandas.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index f21979771b7..3e5ff9c18b5 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -462,7 +462,8 @@ def _setitem_tuple_arg(self, key, value): self._frame[col].loc[key[0]] = value[i] -_DataFrameAtIndexer = _DataFrameLocIndexer +class _DataFrameAtIndexer(_DataFrameLocIndexer): + pass class _DataFrameIlocIndexer(_DataFrameIndexer): @@ -587,7 +588,8 @@ def _setitem_tuple_arg(self, key, value): self._frame[col].iloc[key[0]] = value[i] -_DataFrameiAtIndexer = _DataFrameIlocIndexer +class _DataFrameiAtIndexer(_DataFrameIlocIndexer): + pass class DataFrame(IndexedFrame, Serializable, GetAttrGetItemMixin): diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 7ca00f9f004..b0aeaba3916 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1580,7 +1580,7 @@ def test_at_iat(indexer): def test_at_setitem_empty(): - df = xpd.DataFrame({"name": []}) + df = xpd.DataFrame({"name": []}, dtype="float64") df.at[0, "name"] = 1.0 df.at[0, "new"] = 2.0 expected = pd.DataFrame({"name": [1.0], "new": [2.0]})