From 529760849f2294929673f0382ea79c8252d18bb1 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 7 May 2024 11:54:12 +0000 Subject: [PATCH 1/4] Properly implement binaryops --- python/cudf/cudf/pandas/fast_slow_proxy.py | 224 +++++++++++++++--- .../cudf_pandas_tests/test_cudf_pandas.py | 13 + 2 files changed, 209 insertions(+), 28 deletions(-) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 9d8c174b297..97cb1963995 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -598,88 +598,256 @@ def __setattr__(self, name, value): return _FastSlowAttribute("__setattr__").__get__(self)(name, value) def __add__(self, other): - return _fast_slow_function_call(operator.add, self, other)[0] + return _fast_slow_function_call( + lambda x, y: x.__add__(y) + if hasattr(x, "__add__") + else NotImplemented, + self, + other, + )[0] def __radd__(self, other): - return _fast_slow_function_call(operator.add, other, self)[0] + return _fast_slow_function_call( + lambda x, y: x.__radd__(y) + if hasattr(x, "__radd__") + else NotImplemented, + self, + other, + )[0] def __sub__(self, other): - return _fast_slow_function_call(operator.sub, self, other)[0] + return _fast_slow_function_call( + lambda x, y: x.__sub__(y) + if hasattr(x, "__sub__") + else NotImplemented, + self, + other, + )[0] def __rsub__(self, other): - return _fast_slow_function_call(operator.sub, other, self)[0] + return _fast_slow_function_call( + lambda x, y: x.__rsub__(y) + if hasattr(x, "__rsub__") + else NotImplemented, + self, + other, + )[0] def __mul__(self, other): - return _fast_slow_function_call(operator.mul, self, other)[0] + return _fast_slow_function_call( + lambda x, y: x.__mul__(y) + if hasattr(x, "__mul__") + else NotImplemented, + self, + other, + )[0] def __rmul__(self, other): - return _fast_slow_function_call(operator.mul, other, self)[0] + return _fast_slow_function_call( + lambda x, y: x.__rmul__(y) + if hasattr(x, "__rmul__") + else NotImplemented, + self, + other, + )[0] def __truediv__(self, other): - return _fast_slow_function_call(operator.truediv, self, other)[0] + return _fast_slow_function_call( + lambda x, y: x.__truediv__(y) + if hasattr(x, "__truediv__") + else NotImplemented, + self, + other, + )[0] def __rtruediv__(self, other): - return _fast_slow_function_call(operator.truediv, other, self)[0] + return _fast_slow_function_call( + lambda x, y: x.__rtruediv__(y) + if hasattr(x, "__rtruediv__") + else NotImplemented, + self, + other, + )[0] def __floordiv__(self, other): - return _fast_slow_function_call(operator.floordiv, self, other)[0] + return _fast_slow_function_call( + lambda x, y: x.__floordiv__(y) + if hasattr(x, "__floordiv__") + else NotImplemented, + self, + other, + )[0] def __rfloordiv__(self, other): - return _fast_slow_function_call(operator.floordiv, other, self)[0] + return _fast_slow_function_call( + lambda x, y: x.__rfloordiv__(y) + if hasattr(x, "__rfloordiv__") + else NotImplemented, + self, + other, + )[0] def __mod__(self, other): - return _fast_slow_function_call(operator.mod, self, other)[0] + return _fast_slow_function_call( + lambda x, y: x.__mod__(y) + if hasattr(x, "__mod__") + else NotImplemented, + self, + other, + )[0] def __rmod__(self, other): - return _fast_slow_function_call(operator.mod, other, self)[0] + return _fast_slow_function_call( + lambda x, y: x.__rmod__(y) + if hasattr(x, "__rmod__") + else NotImplemented, + self, + other, + )[0] def __divmod__(self, other): - return _fast_slow_function_call(divmod, self, other)[0] + return _fast_slow_function_call( + lambda x, y: x.__divmod__(y) + if hasattr(x, "__divmod__") + else NotImplemented, + self, + other, + )[0] def __rdivmod__(self, other): - return _fast_slow_function_call(divmod, other, self)[0] + return _fast_slow_function_call( + lambda x, y: x.__rdivmod__(y) + if hasattr(x, "__rdivmod__") + else NotImplemented, + self, + other, + )[0] def __pow__(self, other): - return _fast_slow_function_call(operator.pow, self, other)[0] + return _fast_slow_function_call( + lambda x, y: x.__pow__(y) + if hasattr(x, "__pow__") + else NotImplemented, + self, + other, + )[0] def __rpow__(self, other): - return _fast_slow_function_call(operator.pow, other, self)[0] + return _fast_slow_function_call( + lambda x, y: x.__rpow__(y) + if hasattr(x, "__rpow__") + else NotImplemented, + self, + other, + )[0] def __lshift__(self, other): - return _fast_slow_function_call(operator.lshift, self, other)[0] + return _fast_slow_function_call( + lambda x, y: x.__lshift__(y) + if hasattr(x, "__lshift__") + else NotImplemented, + self, + other, + )[0] def __rlshift__(self, other): - return _fast_slow_function_call(operator.lshift, other, self)[0] + return _fast_slow_function_call( + lambda x, y: x.__rlshift__(y) + if hasattr(x, "__rlshift__") + else NotImplemented, + self, + other, + )[0] def __rshift__(self, other): - return _fast_slow_function_call(operator.rshift, self, other)[0] + return _fast_slow_function_call( + lambda x, y: x.__rshift__(y) + if hasattr(x, "__rshift__") + else NotImplemented, + self, + other, + )[0] def __rrshift__(self, other): - return _fast_slow_function_call(operator.rshift, other, self)[0] + return _fast_slow_function_call( + lambda x, y: x.__rrshift__(y) + if hasattr(x, "__rrshift__") + else NotImplemented, + self, + other, + )[0] def __and__(self, other): - return _fast_slow_function_call(operator.and_, self, other)[0] + return _fast_slow_function_call( + lambda x, y: x.__and__(y) + if hasattr(x, "__and__") + else NotImplemented, + self, + other, + )[0] def __rand__(self, other): - return _fast_slow_function_call(operator.and_, other, self)[0] + return _fast_slow_function_call( + lambda x, y: x.__rand__(y) + if hasattr(x, "__rand__") + else NotImplemented, + self, + other, + )[0] def __xor__(self, other): - return _fast_slow_function_call(operator.xor, self, other)[0] + return _fast_slow_function_call( + lambda x, y: x.__xor__(y) + if hasattr(x, "__xor__") + else NotImplemented, + self, + other, + )[0] def __rxor__(self, other): - return _fast_slow_function_call(operator.xor, other, self)[0] + return _fast_slow_function_call( + lambda x, y: x.__rxor__(y) + if hasattr(x, "__rxor__") + else NotImplemented, + self, + other, + )[0] def __or__(self, other): - return _fast_slow_function_call(operator.or_, self, other)[0] + return _fast_slow_function_call( + lambda x, y: x.__or__(y) + if hasattr(x, "__or__") + else NotImplemented, + self, + other, + )[0] def __ror__(self, other): - return _fast_slow_function_call(operator.or_, other, self)[0] + return _fast_slow_function_call( + lambda x, y: x.__ror__(y) + if hasattr(x, "__ror__") + else NotImplemented, + self, + other, + )[0] def __matmul__(self, other): - return _fast_slow_function_call(operator.matmul, self, other)[0] + return _fast_slow_function_call( + lambda x, y: x.__matmul__(y) + if hasattr(x, "__matmul__") + else NotImplemented, + self, + other, + )[0] def __rmatmul__(self, other): - return _fast_slow_function_call(operator.matmul, other, self)[0] + return _fast_slow_function_call( + lambda x, y: x.__rmatmul__(y) + if hasattr(x, "__rmatmul__") + else NotImplemented, + self, + other, + )[0] class _FinalProxy(_FastSlowProxy): diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 90356a01404..3fba959b6da 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1210,6 +1210,19 @@ def test_isinstance_base_offset(): assert isinstance(offset, xpd.tseries.offsets.BaseOffset) +def test_floordiv_array_vs_df(): + xarray = xpd.Series([1, 2, 3], dtype="datetime64[ns]").array + parray = pd.Series([1, 2, 3], dtype="datetime64[ns]").array + + xdf = xpd.DataFrame(xarray) + pdf = pd.DataFrame(parray) + + actual = xarray.__floordiv__(xdf) + expected = parray.__floordiv__(pdf) + + tm.assert_equal(actual, expected) + + def test_apply_slow_path_udf_references_global_module(): def my_apply(df, unused): # `datetime` Raised `KeyError: __import__` From 4943688d7737f19ef02c829d513ccffbe106cd36 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 7 May 2024 16:23:36 +0000 Subject: [PATCH 2/4] fix test --- python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py b/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py index 631ad2f37b2..39bf07c49de 100644 --- a/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py +++ b/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py @@ -439,10 +439,6 @@ def __radd__(self, other): assert Bar() + Foo() == "sum" assert FooProxy() + BarProxy() == "sum" assert BarProxy() + FooProxy() == "sum" - assert FooProxy() + Bar() == "sum" - assert Bar() + FooProxy() == "sum" - assert Foo() + BarProxy() == "sum" - assert BarProxy() + Foo() == "sum" def test_slow_attr_still_proxy(): From a91f76cc4ba4ddd8eb546aae06fcf86eca897d72 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 8 May 2024 14:32:22 +0000 Subject: [PATCH 3/4] separate from special methods --- python/cudf/cudf/pandas/fast_slow_proxy.py | 285 ++++----------------- 1 file changed, 43 insertions(+), 242 deletions(-) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 3bcde9cdc1f..06411013af2 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -597,257 +597,58 @@ def __setattr__(self, name, value): return return _FastSlowAttribute("__setattr__").__get__(self)(name, value) - def __add__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__add__(y) - if hasattr(x, "__add__") - else NotImplemented, - self, - other, - )[0] - - def __radd__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__radd__(y) - if hasattr(x, "__radd__") - else NotImplemented, - self, - other, - )[0] - - def __sub__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__sub__(y) - if hasattr(x, "__sub__") - else NotImplemented, - self, - other, - )[0] - - def __rsub__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__rsub__(y) - if hasattr(x, "__rsub__") - else NotImplemented, - self, - other, - )[0] - - def __mul__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__mul__(y) - if hasattr(x, "__mul__") - else NotImplemented, - self, - other, - )[0] - - def __rmul__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__rmul__(y) - if hasattr(x, "__rmul__") - else NotImplemented, - self, - other, - )[0] - - def __truediv__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__truediv__(y) - if hasattr(x, "__truediv__") - else NotImplemented, - self, - other, - )[0] - - def __rtruediv__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__rtruediv__(y) - if hasattr(x, "__rtruediv__") - else NotImplemented, - self, - other, - )[0] - - def __floordiv__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__floordiv__(y) - if hasattr(x, "__floordiv__") - else NotImplemented, - self, - other, - )[0] - - def __rfloordiv__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__rfloordiv__(y) - if hasattr(x, "__rfloordiv__") - else NotImplemented, - self, - other, - )[0] - - def __mod__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__mod__(y) - if hasattr(x, "__mod__") - else NotImplemented, - self, - other, - )[0] - def __rmod__(self, other): +binary_ops = [ + "__add__", + "__and__", + "__divmod__", + "__floordiv__", + "__lshift__", + "__matmul__", + "__mod__", + "__mul__", + "__or__", + "__pow__", + "__radd__", + "__rand__", + "__rdivmod__", + "__rfloordiv__", + "__rlshift__", + "__rmatmul__", + "__rmod__", + "__rmul__", + "__ror__", + "__rpow__", + "__rrshift__", + "__rshift__", + "__rsub__", + "__rtruediv__", + "__rxor__", + "__sub__", + "__truediv__", + "__xor__", +] + + +def create_special_method_impl(special_method): + def special_method_impl(self, other): return _fast_slow_function_call( - lambda x, y: x.__rmod__(y) - if hasattr(x, "__rmod__") + lambda x, y: getattr(x, special_method)(y) + if hasattr(x, special_method) else NotImplemented, self, other, )[0] - def __divmod__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__divmod__(y) - if hasattr(x, "__divmod__") - else NotImplemented, - self, - other, - )[0] + return special_method_impl - def __rdivmod__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__rdivmod__(y) - if hasattr(x, "__rdivmod__") - else NotImplemented, - self, - other, - )[0] - def __pow__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__pow__(y) - if hasattr(x, "__pow__") - else NotImplemented, - self, - other, - )[0] - - def __rpow__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__rpow__(y) - if hasattr(x, "__rpow__") - else NotImplemented, - self, - other, - )[0] - - def __lshift__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__lshift__(y) - if hasattr(x, "__lshift__") - else NotImplemented, - self, - other, - )[0] - - def __rlshift__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__rlshift__(y) - if hasattr(x, "__rlshift__") - else NotImplemented, - self, - other, - )[0] - - def __rshift__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__rshift__(y) - if hasattr(x, "__rshift__") - else NotImplemented, - self, - other, - )[0] - - def __rrshift__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__rrshift__(y) - if hasattr(x, "__rrshift__") - else NotImplemented, - self, - other, - )[0] - - def __and__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__and__(y) - if hasattr(x, "__and__") - else NotImplemented, - self, - other, - )[0] - - def __rand__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__rand__(y) - if hasattr(x, "__rand__") - else NotImplemented, - self, - other, - )[0] - - def __xor__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__xor__(y) - if hasattr(x, "__xor__") - else NotImplemented, - self, - other, - )[0] - - def __rxor__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__rxor__(y) - if hasattr(x, "__rxor__") - else NotImplemented, - self, - other, - )[0] - - def __or__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__or__(y) - if hasattr(x, "__or__") - else NotImplemented, - self, - other, - )[0] - - def __ror__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__ror__(y) - if hasattr(x, "__ror__") - else NotImplemented, - self, - other, - )[0] - - def __matmul__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__matmul__(y) - if hasattr(x, "__matmul__") - else NotImplemented, - self, - other, - )[0] - - def __rmatmul__(self, other): - return _fast_slow_function_call( - lambda x, y: x.__rmatmul__(y) - if hasattr(x, "__rmatmul__") - else NotImplemented, - self, - other, - )[0] +for special_method in binary_ops: + setattr( + _FastSlowProxy, + special_method, + create_special_method_impl(special_method), + ) class _FinalProxy(_FastSlowProxy): From c9b1c5e311c1d7877812372d2bf258aa9e4c1758 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 8 May 2024 16:18:39 +0000 Subject: [PATCH 4/4] Modify such that attributes are assigned only if they exist in the true types --- python/cudf/cudf/pandas/fast_slow_proxy.py | 139 +++++++----------- .../cudf/cudf_pandas_tests/test_profiler.py | 1 + 2 files changed, 58 insertions(+), 82 deletions(-) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 06411013af2..c66458077fa 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -598,59 +598,6 @@ def __setattr__(self, name, value): return _FastSlowAttribute("__setattr__").__get__(self)(name, value) -binary_ops = [ - "__add__", - "__and__", - "__divmod__", - "__floordiv__", - "__lshift__", - "__matmul__", - "__mod__", - "__mul__", - "__or__", - "__pow__", - "__radd__", - "__rand__", - "__rdivmod__", - "__rfloordiv__", - "__rlshift__", - "__rmatmul__", - "__rmod__", - "__rmul__", - "__ror__", - "__rpow__", - "__rrshift__", - "__rshift__", - "__rsub__", - "__rtruediv__", - "__rxor__", - "__sub__", - "__truediv__", - "__xor__", -] - - -def create_special_method_impl(special_method): - def special_method_impl(self, other): - return _fast_slow_function_call( - lambda x, y: getattr(x, special_method)(y) - if hasattr(x, special_method) - else NotImplemented, - self, - other, - )[0] - - return special_method_impl - - -for special_method in binary_ops: - setattr( - _FastSlowProxy, - special_method, - create_special_method_impl(special_method), - ) - - class _FinalProxy(_FastSlowProxy): """ Proxy type for a pair of fast and slow "final" types for which @@ -1110,41 +1057,69 @@ def _replace_closurevars( _SPECIAL_METHODS: Set[str] = { - "__repr__", - "__str__", - "__len__", - "__contains__", - "__getitem__", - "__setitem__", - "__delitem__", - "__getslice__", - "__setslice__", - "__delslice__", - "__iter__", - "__lt__", - "__le__", - "__eq__", - "__ne__", - "__gt__", - "__ge__", - "__pos__", - "__neg__", - "__invert__", "__abs__", - "__round__", - "__format__", + "__add__", + "__and__", "__bool__", - "__float__", - "__int__", + "__call__", "__complex__", - "__enter__", - "__exit__", - "__next__", + "__contains__", "__copy__", - "__deepcopy__", "__dataframe__", - "__call__", + "__deepcopy__", + "__delitem__", + "__delslice__", + "__divmod__", + "__enter__", + "__eq__", + "__exit__", + "__float__", + "__floordiv__", + "__format__", + "__ge__", + "__getitem__", + "__getslice__", + "__gt__", # Added on a per-proxy basis # https://github.com/rapidsai/xdf/pull/306#pullrequestreview-1636155428 # "__hash__", + "__int__", + "__invert__", + "__iter__", + "__le__", + "__len__", + "__lshift__", + "__lt__", + "__matmul__", + "__mod__", + "__mul__", + "__ne__", + "__neg__", + "__next__", + "__or__", + "__pos__", + "__pow__", + "__radd__", + "__rand__", + "__rdivmod__", + "__repr__", + "__rfloordiv__", + "__rlshift__", + "__rmatmul__", + "__rmod__", + "__rmul__", + "__ror__", + "__round__", + "__rpow__", + "__rrshift__", + "__rshift__", + "__rsub__", + "__rtruediv__", + "__rxor__", + "__setitem__", + "__setslice__", + "__str__", + "__sub__", + "__truediv__", + "__xor__", } diff --git a/python/cudf/cudf_pandas_tests/test_profiler.py b/python/cudf/cudf_pandas_tests/test_profiler.py index dd8d9287972..359a2a2c515 100644 --- a/python/cudf/cudf_pandas_tests/test_profiler.py +++ b/python/cudf/cudf_pandas_tests/test_profiler.py @@ -37,6 +37,7 @@ def test_profiler(): "DataFrame.sum", "Series.__getitem__", "Timedelta", + "Timestamp.__add__", } for name, func in per_function_stats.items(): assert (