From eaf555616ff83a75b3c3b11ce18e1c393604ccf4 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 8 May 2024 13:30:14 -0500 Subject: [PATCH] Properly implement binaryops for proxy types (#15684) Fixes #15675 This PR makes changes to `cudf.pandas` machinery by not calling `operator.op` functions insider the re-direct calls. Forked from #14534 Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/15684 --- python/cudf/cudf/pandas/fast_slow_proxy.py | 170 ++++++------------ .../cudf_pandas_tests/test_cudf_pandas.py | 13 ++ .../cudf_pandas_tests/test_fast_slow_proxy.py | 4 - .../cudf/cudf_pandas_tests/test_profiler.py | 1 + 4 files changed, 71 insertions(+), 117 deletions(-) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 835cfa89133..c66458077fa 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -597,90 +597,6 @@ def __setattr__(self, name, value): return return _FastSlowAttribute("__setattr__").__get__(self)(name, value) - def __add__(self, other): - return _fast_slow_function_call(operator.add, self, other)[0] - - def __radd__(self, other): - return _fast_slow_function_call(operator.add, other, self)[0] - - def __sub__(self, other): - return _fast_slow_function_call(operator.sub, self, other)[0] - - def __rsub__(self, other): - return _fast_slow_function_call(operator.sub, other, self)[0] - - def __mul__(self, other): - return _fast_slow_function_call(operator.mul, self, other)[0] - - def __rmul__(self, other): - return _fast_slow_function_call(operator.mul, other, self)[0] - - def __truediv__(self, other): - return _fast_slow_function_call(operator.truediv, self, other)[0] - - def __rtruediv__(self, other): - return _fast_slow_function_call(operator.truediv, other, self)[0] - - def __floordiv__(self, other): - return _fast_slow_function_call(operator.floordiv, self, other)[0] - - def __rfloordiv__(self, other): - return _fast_slow_function_call(operator.floordiv, other, self)[0] - - def __mod__(self, other): - return _fast_slow_function_call(operator.mod, self, other)[0] - - def __rmod__(self, other): - return _fast_slow_function_call(operator.mod, other, self)[0] - - def __divmod__(self, other): - return _fast_slow_function_call(divmod, self, other)[0] - - def __rdivmod__(self, other): - return _fast_slow_function_call(divmod, other, self)[0] - - def __pow__(self, other): - return _fast_slow_function_call(operator.pow, self, other)[0] - - def __rpow__(self, other): - return _fast_slow_function_call(operator.pow, other, self)[0] - - def __lshift__(self, other): - return _fast_slow_function_call(operator.lshift, self, other)[0] - - def __rlshift__(self, other): - return _fast_slow_function_call(operator.lshift, other, self)[0] - - def __rshift__(self, other): - return _fast_slow_function_call(operator.rshift, self, other)[0] - - def __rrshift__(self, other): - return _fast_slow_function_call(operator.rshift, other, self)[0] - - def __and__(self, other): - return _fast_slow_function_call(operator.and_, self, other)[0] - - def __rand__(self, other): - return _fast_slow_function_call(operator.and_, other, self)[0] - - def __xor__(self, other): - return _fast_slow_function_call(operator.xor, self, other)[0] - - def __rxor__(self, other): - return _fast_slow_function_call(operator.xor, other, self)[0] - - def __or__(self, other): - return _fast_slow_function_call(operator.or_, self, other)[0] - - def __ror__(self, other): - return _fast_slow_function_call(operator.or_, other, self)[0] - - def __matmul__(self, other): - return _fast_slow_function_call(operator.matmul, self, other)[0] - - def __rmatmul__(self, other): - return _fast_slow_function_call(operator.matmul, other, self)[0] - class _FinalProxy(_FastSlowProxy): """ @@ -1141,41 +1057,69 @@ def _replace_closurevars( _SPECIAL_METHODS: Set[str] = { - "__repr__", - "__str__", - "__len__", - "__contains__", - "__getitem__", - "__setitem__", - "__delitem__", - "__getslice__", - "__setslice__", - "__delslice__", - "__iter__", - "__lt__", - "__le__", - "__eq__", - "__ne__", - "__gt__", - "__ge__", - "__pos__", - "__neg__", - "__invert__", "__abs__", - "__round__", - "__format__", + "__add__", + "__and__", "__bool__", - "__float__", - "__int__", + "__call__", "__complex__", - "__enter__", - "__exit__", - "__next__", + "__contains__", "__copy__", - "__deepcopy__", "__dataframe__", - "__call__", + "__deepcopy__", + "__delitem__", + "__delslice__", + "__divmod__", + "__enter__", + "__eq__", + "__exit__", + "__float__", + "__floordiv__", + "__format__", + "__ge__", + "__getitem__", + "__getslice__", + "__gt__", # Added on a per-proxy basis # https://github.com/rapidsai/xdf/pull/306#pullrequestreview-1636155428 # "__hash__", + "__int__", + "__invert__", + "__iter__", + "__le__", + "__len__", + "__lshift__", + "__lt__", + "__matmul__", + "__mod__", + "__mul__", + "__ne__", + "__neg__", + "__next__", + "__or__", + "__pos__", + "__pow__", + "__radd__", + "__rand__", + "__rdivmod__", + "__repr__", + "__rfloordiv__", + "__rlshift__", + "__rmatmul__", + "__rmod__", + "__rmul__", + "__ror__", + "__round__", + "__rpow__", + "__rrshift__", + "__rshift__", + "__rsub__", + "__rtruediv__", + "__rxor__", + "__setitem__", + "__setslice__", + "__str__", + "__sub__", + "__truediv__", + "__xor__", } diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 8d319cfe640..aa937d3ed4f 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1218,6 +1218,19 @@ def test_isinstance_base_offset(): assert isinstance(offset, xpd.tseries.offsets.BaseOffset) +def test_floordiv_array_vs_df(): + xarray = xpd.Series([1, 2, 3], dtype="datetime64[ns]").array + parray = pd.Series([1, 2, 3], dtype="datetime64[ns]").array + + xdf = xpd.DataFrame(xarray) + pdf = pd.DataFrame(parray) + + actual = xarray.__floordiv__(xdf) + expected = parray.__floordiv__(pdf) + + tm.assert_equal(actual, expected) + + def test_apply_slow_path_udf_references_global_module(): def my_apply(df, unused): # `datetime` Raised `KeyError: __import__` diff --git a/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py b/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py index 631ad2f37b2..39bf07c49de 100644 --- a/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py +++ b/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py @@ -439,10 +439,6 @@ def __radd__(self, other): assert Bar() + Foo() == "sum" assert FooProxy() + BarProxy() == "sum" assert BarProxy() + FooProxy() == "sum" - assert FooProxy() + Bar() == "sum" - assert Bar() + FooProxy() == "sum" - assert Foo() + BarProxy() == "sum" - assert BarProxy() + Foo() == "sum" def test_slow_attr_still_proxy(): diff --git a/python/cudf/cudf_pandas_tests/test_profiler.py b/python/cudf/cudf_pandas_tests/test_profiler.py index dd8d9287972..359a2a2c515 100644 --- a/python/cudf/cudf_pandas_tests/test_profiler.py +++ b/python/cudf/cudf_pandas_tests/test_profiler.py @@ -37,6 +37,7 @@ def test_profiler(): "DataFrame.sum", "Series.__getitem__", "Timedelta", + "Timestamp.__add__", } for name, func in per_function_stats.items(): assert (