From f41c10c3b39240a3f7008e5e38faf61f933c552c Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 10 Mar 2021 17:18:16 -0600 Subject: [PATCH] Fix index mismatch issue in equality related APIs (#7555) Fixes: #7536 This PR enables re-indexing in some of the equality-related APIs, note that we will still error when we call the dunder methods. Authors: - GALI PREM SAGAR (@galipremsagar) Approvers: - Keith Kraus (@kkraus14) URL: https://github.com/rapidsai/cudf/pull/7555 --- python/cudf/cudf/core/series.py | 252 ++++++++++++++++++++++++-- python/cudf/cudf/tests/test_binops.py | 19 ++ 2 files changed, 257 insertions(+), 14 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 9bffd28ced5..11e32e2285d 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -1490,7 +1490,9 @@ def __repr__(self): return "\n".join(lines) @annotate("BINARY_OP", color="orange", domain="cudf_python") - def _binaryop(self, other, fn, fill_value=None, reflect=False): + def _binaryop( + self, other, fn, fill_value=None, reflect=False, can_reindex=False + ): """ Internal util to call a binary operator *fn* on operands *self* and *other*. Return the output Series. The output dtype is @@ -1505,7 +1507,7 @@ def _binaryop(self, other, fn, fill_value=None, reflect=False): result_name = utils.get_result_name(self, other) if isinstance(other, Series): - if fn in cudf.utils.utils._EQUALITY_OPS: + if not can_reindex and fn in cudf.utils.utils._EQUALITY_OPS: if not self.index.equals(other.index): raise ValueError( "Can only compare identically-labeled " @@ -2318,10 +2320,47 @@ def eq(self, other, fill_value=None, axis=0): fill_value : None or value Value to fill nulls with before computation. If data in both corresponding Series locations is null the result will be null - """ + + Returns + ------- + Series + The result of the operation. + + Examples + -------- + >>> import cudf + >>> a = cudf.Series([1, 2, 3, None, 10, 20], index=['a', 'c', 'd', 'e', 'f', 'g']) + >>> a + a 1 + c 2 + d 3 + e + f 10 + g 20 + dtype: int64 + >>> b = cudf.Series([-10, 23, -1, None, None], index=['a', 'b', 'c', 'd', 'e']) + >>> b + a -10 + b 23 + c -1 + d + e + dtype: int64 + >>> a.eq(b, fill_value=2) + a False + b False + c False + d False + e + f False + g False + dtype: bool + """ # noqa: E501 if axis != 0: raise NotImplementedError("Only axis=0 supported at this time.") - return self._binaryop(other, "eq", fill_value) + return self._binaryop( + other=other, fn="eq", fill_value=fill_value, can_reindex=True + ) def __eq__(self, other): return self._binaryop(other, "eq") @@ -2336,10 +2375,47 @@ def ne(self, other, fill_value=None, axis=0): fill_value : None or value Value to fill nulls with before computation. If data in both corresponding Series locations is null the result will be null - """ + + Returns + ------- + Series + The result of the operation. + + Examples + -------- + >>> import cudf + >>> a = cudf.Series([1, 2, 3, None, 10, 20], index=['a', 'c', 'd', 'e', 'f', 'g']) + >>> a + a 1 + c 2 + d 3 + e + f 10 + g 20 + dtype: int64 + >>> b = cudf.Series([-10, 23, -1, None, None], index=['a', 'b', 'c', 'd', 'e']) + >>> b + a -10 + b 23 + c -1 + d + e + dtype: int64 + >>> a.ne(b, fill_value=2) + a True + b True + c True + d True + e + f True + g True + dtype: bool + """ # noqa: E501 if axis != 0: raise NotImplementedError("Only axis=0 supported at this time.") - return self._binaryop(other, "ne", fill_value) + return self._binaryop( + other=other, fn="ne", fill_value=fill_value, can_reindex=True + ) def __ne__(self, other): return self._binaryop(other, "ne") @@ -2354,10 +2430,47 @@ def lt(self, other, fill_value=None, axis=0): fill_value : None or value Value to fill nulls with before computation. If data in both corresponding Series locations is null the result will be null - """ + + Returns + ------- + Series + The result of the operation. + + Examples + -------- + >>> import cudf + >>> a = cudf.Series([1, 2, 3, None, 10, 20], index=['a', 'c', 'd', 'e', 'f', 'g']) + >>> a + a 1 + c 2 + d 3 + e + f 10 + g 20 + dtype: int64 + >>> b = cudf.Series([-10, 23, -1, None, None], index=['a', 'b', 'c', 'd', 'e']) + >>> b + a -10 + b 23 + c -1 + d + e + dtype: int64 + >>> a.lt(b, fill_value=-10) + a False + b True + c False + d False + e + f False + g False + dtype: bool + """ # noqa: E501 if axis != 0: raise NotImplementedError("Only axis=0 supported at this time.") - return self._binaryop(other, "lt", fill_value) + return self._binaryop( + other=other, fn="lt", fill_value=fill_value, can_reindex=True + ) def __lt__(self, other): return self._binaryop(other, "lt") @@ -2372,10 +2485,47 @@ def le(self, other, fill_value=None, axis=0): fill_value : None or value Value to fill nulls with before computation. If data in both corresponding Series locations is null the result will be null - """ + + Returns + ------- + Series + The result of the operation. + + Examples + -------- + >>> import cudf + >>> a = cudf.Series([1, 2, 3, None, 10, 20], index=['a', 'c', 'd', 'e', 'f', 'g']) + >>> a + a 1 + c 2 + d 3 + e + f 10 + g 20 + dtype: int64 + >>> b = cudf.Series([-10, 23, -1, None, None], index=['a', 'b', 'c', 'd', 'e']) + >>> b + a -10 + b 23 + c -1 + d + e + dtype: int64 + >>> a.le(b, fill_value=-10) + a False + b True + c False + d False + e + f False + g False + dtype: bool + """ # noqa: E501 if axis != 0: raise NotImplementedError("Only axis=0 supported at this time.") - return self._binaryop(other, "le", fill_value) + return self._binaryop( + other=other, fn="le", fill_value=fill_value, can_reindex=True + ) def __le__(self, other): return self._binaryop(other, "le") @@ -2390,10 +2540,47 @@ def gt(self, other, fill_value=None, axis=0): fill_value : None or value Value to fill nulls with before computation. If data in both corresponding Series locations is null the result will be null - """ + + Returns + ------- + Series + The result of the operation. + + Examples + -------- + >>> import cudf + >>> a = cudf.Series([1, 2, 3, None, 10, 20], index=['a', 'c', 'd', 'e', 'f', 'g']) + >>> a + a 1 + c 2 + d 3 + e + f 10 + g 20 + dtype: int64 + >>> b = cudf.Series([-10, 23, -1, None, None], index=['a', 'b', 'c', 'd', 'e']) + >>> b + a -10 + b 23 + c -1 + d + e + dtype: int64 + >>> a.gt(b) + a True + b False + c True + d False + e False + f False + g False + dtype: bool + """ # noqa: E501 if axis != 0: raise NotImplementedError("Only axis=0 supported at this time.") - return self._binaryop(other, "gt", fill_value) + return self._binaryop( + other=other, fn="gt", fill_value=fill_value, can_reindex=True + ) def __gt__(self, other): return self._binaryop(other, "gt") @@ -2408,10 +2595,47 @@ def ge(self, other, fill_value=None, axis=0): fill_value : None or value Value to fill nulls with before computation. If data in both corresponding Series locations is null the result will be null - """ + + Returns + ------- + Series + The result of the operation. + + Examples + -------- + >>> import cudf + >>> a = cudf.Series([1, 2, 3, None, 10, 20], index=['a', 'c', 'd', 'e', 'f', 'g']) + >>> a + a 1 + c 2 + d 3 + e + f 10 + g 20 + dtype: int64 + >>> b = cudf.Series([-10, 23, -1, None, None], index=['a', 'b', 'c', 'd', 'e']) + >>> b + a -10 + b 23 + c -1 + d + e + dtype: int64 + >>> a.ge(b) + a True + b False + c True + d False + e False + f False + g False + dtype: bool + """ # noqa: E501 if axis != 0: raise NotImplementedError("Only axis=0 supported at this time.") - return self._binaryop(other, "ge", fill_value) + return self._binaryop( + other=other, fn="ge", fill_value=fill_value, can_reindex=True + ) def __ge__(self, other): return self._binaryop(other, "ge") diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 579716f8277..a0b65743180 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1726,3 +1726,22 @@ def decimal_series(input, dtype): got = op(a, b) assert expect.dtype == got.dtype utils.assert_eq(expect, got) + + +@pytest.mark.parametrize("fn", ["eq", "ne", "lt", "gt", "le", "ge"]) +def test_equality_ops_index_mismatch(fn): + a = cudf.Series( + [1, 2, 3, None, None, 4], index=["a", "b", "c", "d", "e", "f"] + ) + b = cudf.Series( + [-5, 4, 3, 2, 1, 0, 19, 11], + index=["aa", "b", "c", "d", "e", "f", "y", "z"], + ) + + pa = a.to_pandas() + pb = b.to_pandas() + + expected = getattr(pa, fn)(pb) + actual = getattr(a, fn)(b) + + utils.assert_eq(expected, actual)