From 13d1259764174ba7d8ea7acc4f89b7b4cb09216f Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 9 Feb 2022 16:02:48 -0800 Subject: [PATCH 1/5] Add tests of reflected ufuncs and fix behavior of logical reflected ufuncs. --- python/cudf/cudf/core/series.py | 8 ++++++- python/cudf/cudf/tests/test_array_ufunc.py | 27 ++++++++++++++++++---- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 41bf75fb48f..63b04ac4e9c 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -1000,7 +1000,13 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if fname in binary_operations: not_reflect = self is inputs[0] other = inputs[not_reflect] - op = f"__{'' if not_reflect else 'r'}{binary_operations[fname]}__" + op = binary_operations[fname] + ops_without_reflection = ("gt", "gte", "lt", "lte", "ne", "eq") + if op in ops_without_reflection and not not_reflect: + # This replacement will ignore eq and ne, which don't need it. + op = op.replace(*(("g", "l") if "g" in op else ("l", "g"))) + not_reflect = True + op = f"__{'' if not_reflect else 'r'}{op}__" # pandas bitwise operations return bools if indexes are misaligned. # TODO: Generalize for other types of Frames diff --git a/python/cudf/cudf/tests/test_array_ufunc.py b/python/cudf/cudf/tests/test_array_ufunc.py index 2db0e8d29b8..f4df47f94cb 100644 --- a/python/cudf/cudf/tests/test_array_ufunc.py +++ b/python/cudf/cudf/tests/test_array_ufunc.py @@ -102,13 +102,20 @@ def test_ufunc_series(ufunc, has_nulls, indexed): @pytest.mark.parametrize("has_nulls", [True, False]) @pytest.mark.parametrize("indexed", [True, False]) @pytest.mark.parametrize("type_", ["cupy", "numpy", "list"]) -def test_binary_ufunc_series_array(ufunc, has_nulls, indexed, type_): +@pytest.mark.parametrize("reflect", [True, False]) +def test_binary_ufunc_series_array(ufunc, has_nulls, indexed, type_, reflect): fname = ufunc.__name__ if fname == "greater" and has_nulls: pytest.xfail( "The way cudf casts nans in arrays to nulls during binops with " "cudf objects is currently incompatible with pandas." ) + if reflect and has_nulls and type_ == "cupy": + pytest.skip( + "When cupy is the left operand there is no way for us to avoid " + "calling its binary operators, which cannot handle cudf objects " + "that contain nulls." + ) N = 100 # Avoid zeros in either array to skip division by 0 errors. Also limit the # scale to avoid issues with overflow, etc. We use ints because some @@ -136,18 +143,28 @@ def test_binary_ufunc_series_array(ufunc, has_nulls, indexed, type_): if type_ == "list": arg1 = arg1.tolist() - got = ufunc(args[0], arg1) - expect = ufunc(args[0].to_pandas(), args[1].to_numpy()) + if reflect: + got = ufunc(arg1, args[0]) + expect = ufunc(args[1].to_numpy(), args[0].to_pandas()) + else: + got = ufunc(args[0], arg1) + expect = ufunc(args[0].to_pandas(), args[1].to_numpy()) if ufunc.nout > 1: for g, e in zip(got, expect): if has_nulls: e[mask] = np.nan - assert_eq(g, e) + if type_ == "cupy" and reflect: + assert (cp.asnumpy(g) == e).all() + else: + assert_eq(g, e) else: if has_nulls: expect[mask] = np.nan - assert_eq(got, expect) + if type_ == "cupy" and reflect: + assert (cp.asnumpy(got) == expect).all() + else: + assert_eq(got, expect) @pytest.mark.parametrize( From 409bce87a89b4f2df70df0eb5abbf1305f8f865a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 10 Feb 2022 08:52:47 -0800 Subject: [PATCH 2/5] Fix typo and add relevant test. --- python/cudf/cudf/core/series.py | 2 +- python/cudf/cudf/tests/test_array_ufunc.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 63b04ac4e9c..fc93d11d292 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -1001,7 +1001,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): not_reflect = self is inputs[0] other = inputs[not_reflect] op = binary_operations[fname] - ops_without_reflection = ("gt", "gte", "lt", "lte", "ne", "eq") + ops_without_reflection = ("gt", "ge", "lt", "le", "ne", "eq") if op in ops_without_reflection and not not_reflect: # This replacement will ignore eq and ne, which don't need it. op = op.replace(*(("g", "l") if "g" in op else ("l", "g"))) diff --git a/python/cudf/cudf/tests/test_array_ufunc.py b/python/cudf/cudf/tests/test_array_ufunc.py index f4df47f94cb..08e3a7281e6 100644 --- a/python/cudf/cudf/tests/test_array_ufunc.py +++ b/python/cudf/cudf/tests/test_array_ufunc.py @@ -98,14 +98,16 @@ def test_ufunc_series(ufunc, has_nulls, indexed): raise -@pytest.mark.parametrize("ufunc", [np.add, np.greater, np.logical_and]) +@pytest.mark.parametrize( + "ufunc", [np.add, np.greater, np.greater_equal, np.logical_and] +) @pytest.mark.parametrize("has_nulls", [True, False]) @pytest.mark.parametrize("indexed", [True, False]) @pytest.mark.parametrize("type_", ["cupy", "numpy", "list"]) @pytest.mark.parametrize("reflect", [True, False]) def test_binary_ufunc_series_array(ufunc, has_nulls, indexed, type_, reflect): fname = ufunc.__name__ - if fname == "greater" and has_nulls: + if fname in ("greater", "greater_equal") and has_nulls: pytest.xfail( "The way cudf casts nans in arrays to nulls during binops with " "cudf objects is currently incompatible with pandas." From 8e02f7407dab9b08f909991a8f92047bbeab8ae6 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 10 Feb 2022 09:04:59 -0800 Subject: [PATCH 3/5] Add missing copyright. --- python/cudf/cudf/tests/test_array_ufunc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/cudf/cudf/tests/test_array_ufunc.py b/python/cudf/cudf/tests/test_array_ufunc.py index 08e3a7281e6..a384ddecca6 100644 --- a/python/cudf/cudf/tests/test_array_ufunc.py +++ b/python/cudf/cudf/tests/test_array_ufunc.py @@ -1,3 +1,5 @@ +# Copyright (c) 2020-2022, NVIDIA CORPORATION. + import operator from functools import reduce From c850aeeb9240f35ef4434ce04fcd57965f730a4d Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 10 Feb 2022 16:45:27 -0800 Subject: [PATCH 4/5] Use a dictionary to better document the operator swapping. --- python/cudf/cudf/core/series.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index fc93d11d292..135e8e559d2 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -1000,11 +1000,23 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if fname in binary_operations: not_reflect = self is inputs[0] other = inputs[not_reflect] + + # These operators need to be mapped to their inverses when + # performing a reflected operation because no reflected version of + # the operators themselves exist. + ops_without_reflection = { + "gt": "lt", + "ge": "le", + "lt": "gt", + "le": "ge", + # ne and eq are symmetric + "ne": "ne", + "eq": "eq", + } + op = binary_operations[fname] - ops_without_reflection = ("gt", "ge", "lt", "le", "ne", "eq") if op in ops_without_reflection and not not_reflect: - # This replacement will ignore eq and ne, which don't need it. - op = op.replace(*(("g", "l") if "g" in op else ("l", "g"))) + op = ops_without_reflection[op] not_reflect = True op = f"__{'' if not_reflect else 'r'}{op}__" From f516dfa150e0524c357bff7357020f48221f6dc0 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 10 Feb 2022 17:21:33 -0800 Subject: [PATCH 5/5] Invert logic to use reflect instead of not_reflect boolean. --- python/cudf/cudf/core/series.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 135e8e559d2..ddaa358e9d9 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -998,8 +998,8 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # First look for methods of the class. fname = ufunc.__name__ if fname in binary_operations: - not_reflect = self is inputs[0] - other = inputs[not_reflect] + reflect = self is not inputs[0] + other = inputs[0] if reflect else inputs[1] # These operators need to be mapped to their inverses when # performing a reflected operation because no reflected version of @@ -1009,16 +1009,16 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): "ge": "le", "lt": "gt", "le": "ge", - # ne and eq are symmetric + # ne and eq are symmetric, so they are their own inverse op "ne": "ne", "eq": "eq", } op = binary_operations[fname] - if op in ops_without_reflection and not not_reflect: + if reflect and op in ops_without_reflection: op = ops_without_reflection[op] - not_reflect = True - op = f"__{'' if not_reflect else 'r'}{op}__" + reflect = False + op = f"__{'r' if reflect else ''}{op}__" # pandas bitwise operations return bools if indexes are misaligned. # TODO: Generalize for other types of Frames