Skip to content

Commit

Permalink
Fix index mismatch issue in equality related APIs (#7555)
Browse files Browse the repository at this point in the history
Fixes: #7536 

This PR enables re-indexing in some of the equality-related APIs, note that we will still error when we call the dunder methods.

Authors:
  - GALI PREM SAGAR (@galipremsagar)

Approvers:
  - Keith Kraus (@kkraus14)

URL: #7555
  • Loading branch information
galipremsagar authored Mar 10, 2021
1 parent 35f3f70 commit f41c10c
Show file tree
Hide file tree
Showing 2 changed files with 257 additions and 14 deletions.
252 changes: 238 additions & 14 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1490,7 +1490,9 @@ def __repr__(self):
return "\n".join(lines)

@annotate("BINARY_OP", color="orange", domain="cudf_python")
def _binaryop(self, other, fn, fill_value=None, reflect=False):
def _binaryop(
self, other, fn, fill_value=None, reflect=False, can_reindex=False
):
"""
Internal util to call a binary operator *fn* on operands *self*
and *other*. Return the output Series. The output dtype is
Expand All @@ -1505,7 +1507,7 @@ def _binaryop(self, other, fn, fill_value=None, reflect=False):

result_name = utils.get_result_name(self, other)
if isinstance(other, Series):
if fn in cudf.utils.utils._EQUALITY_OPS:
if not can_reindex and fn in cudf.utils.utils._EQUALITY_OPS:
if not self.index.equals(other.index):
raise ValueError(
"Can only compare identically-labeled "
Expand Down Expand Up @@ -2318,10 +2320,47 @@ def eq(self, other, fill_value=None, axis=0):
fill_value : None or value
Value to fill nulls with before computation. If data in both
corresponding Series locations is null the result will be null
"""
Returns
-------
Series
The result of the operation.
Examples
--------
>>> import cudf
>>> a = cudf.Series([1, 2, 3, None, 10, 20], index=['a', 'c', 'd', 'e', 'f', 'g'])
>>> a
a 1
c 2
d 3
e <NA>
f 10
g 20
dtype: int64
>>> b = cudf.Series([-10, 23, -1, None, None], index=['a', 'b', 'c', 'd', 'e'])
>>> b
a -10
b 23
c -1
d <NA>
e <NA>
dtype: int64
>>> a.eq(b, fill_value=2)
a False
b False
c False
d False
e <NA>
f False
g False
dtype: bool
""" # noqa: E501
if axis != 0:
raise NotImplementedError("Only axis=0 supported at this time.")
return self._binaryop(other, "eq", fill_value)
return self._binaryop(
other=other, fn="eq", fill_value=fill_value, can_reindex=True
)

def __eq__(self, other):
return self._binaryop(other, "eq")
Expand All @@ -2336,10 +2375,47 @@ def ne(self, other, fill_value=None, axis=0):
fill_value : None or value
Value to fill nulls with before computation. If data in both
corresponding Series locations is null the result will be null
"""
Returns
-------
Series
The result of the operation.
Examples
--------
>>> import cudf
>>> a = cudf.Series([1, 2, 3, None, 10, 20], index=['a', 'c', 'd', 'e', 'f', 'g'])
>>> a
a 1
c 2
d 3
e <NA>
f 10
g 20
dtype: int64
>>> b = cudf.Series([-10, 23, -1, None, None], index=['a', 'b', 'c', 'd', 'e'])
>>> b
a -10
b 23
c -1
d <NA>
e <NA>
dtype: int64
>>> a.ne(b, fill_value=2)
a True
b True
c True
d True
e <NA>
f True
g True
dtype: bool
""" # noqa: E501
if axis != 0:
raise NotImplementedError("Only axis=0 supported at this time.")
return self._binaryop(other, "ne", fill_value)
return self._binaryop(
other=other, fn="ne", fill_value=fill_value, can_reindex=True
)

def __ne__(self, other):
return self._binaryop(other, "ne")
Expand All @@ -2354,10 +2430,47 @@ def lt(self, other, fill_value=None, axis=0):
fill_value : None or value
Value to fill nulls with before computation. If data in both
corresponding Series locations is null the result will be null
"""
Returns
-------
Series
The result of the operation.
Examples
--------
>>> import cudf
>>> a = cudf.Series([1, 2, 3, None, 10, 20], index=['a', 'c', 'd', 'e', 'f', 'g'])
>>> a
a 1
c 2
d 3
e <NA>
f 10
g 20
dtype: int64
>>> b = cudf.Series([-10, 23, -1, None, None], index=['a', 'b', 'c', 'd', 'e'])
>>> b
a -10
b 23
c -1
d <NA>
e <NA>
dtype: int64
>>> a.lt(b, fill_value=-10)
a False
b True
c False
d False
e <NA>
f False
g False
dtype: bool
""" # noqa: E501
if axis != 0:
raise NotImplementedError("Only axis=0 supported at this time.")
return self._binaryop(other, "lt", fill_value)
return self._binaryop(
other=other, fn="lt", fill_value=fill_value, can_reindex=True
)

def __lt__(self, other):
return self._binaryop(other, "lt")
Expand All @@ -2372,10 +2485,47 @@ def le(self, other, fill_value=None, axis=0):
fill_value : None or value
Value to fill nulls with before computation. If data in both
corresponding Series locations is null the result will be null
"""
Returns
-------
Series
The result of the operation.
Examples
--------
>>> import cudf
>>> a = cudf.Series([1, 2, 3, None, 10, 20], index=['a', 'c', 'd', 'e', 'f', 'g'])
>>> a
a 1
c 2
d 3
e <NA>
f 10
g 20
dtype: int64
>>> b = cudf.Series([-10, 23, -1, None, None], index=['a', 'b', 'c', 'd', 'e'])
>>> b
a -10
b 23
c -1
d <NA>
e <NA>
dtype: int64
>>> a.le(b, fill_value=-10)
a False
b True
c False
d False
e <NA>
f False
g False
dtype: bool
""" # noqa: E501
if axis != 0:
raise NotImplementedError("Only axis=0 supported at this time.")
return self._binaryop(other, "le", fill_value)
return self._binaryop(
other=other, fn="le", fill_value=fill_value, can_reindex=True
)

def __le__(self, other):
return self._binaryop(other, "le")
Expand All @@ -2390,10 +2540,47 @@ def gt(self, other, fill_value=None, axis=0):
fill_value : None or value
Value to fill nulls with before computation. If data in both
corresponding Series locations is null the result will be null
"""
Returns
-------
Series
The result of the operation.
Examples
--------
>>> import cudf
>>> a = cudf.Series([1, 2, 3, None, 10, 20], index=['a', 'c', 'd', 'e', 'f', 'g'])
>>> a
a 1
c 2
d 3
e <NA>
f 10
g 20
dtype: int64
>>> b = cudf.Series([-10, 23, -1, None, None], index=['a', 'b', 'c', 'd', 'e'])
>>> b
a -10
b 23
c -1
d <NA>
e <NA>
dtype: int64
>>> a.gt(b)
a True
b False
c True
d False
e False
f False
g False
dtype: bool
""" # noqa: E501
if axis != 0:
raise NotImplementedError("Only axis=0 supported at this time.")
return self._binaryop(other, "gt", fill_value)
return self._binaryop(
other=other, fn="gt", fill_value=fill_value, can_reindex=True
)

def __gt__(self, other):
return self._binaryop(other, "gt")
Expand All @@ -2408,10 +2595,47 @@ def ge(self, other, fill_value=None, axis=0):
fill_value : None or value
Value to fill nulls with before computation. If data in both
corresponding Series locations is null the result will be null
"""
Returns
-------
Series
The result of the operation.
Examples
--------
>>> import cudf
>>> a = cudf.Series([1, 2, 3, None, 10, 20], index=['a', 'c', 'd', 'e', 'f', 'g'])
>>> a
a 1
c 2
d 3
e <NA>
f 10
g 20
dtype: int64
>>> b = cudf.Series([-10, 23, -1, None, None], index=['a', 'b', 'c', 'd', 'e'])
>>> b
a -10
b 23
c -1
d <NA>
e <NA>
dtype: int64
>>> a.ge(b)
a True
b False
c True
d False
e False
f False
g False
dtype: bool
""" # noqa: E501
if axis != 0:
raise NotImplementedError("Only axis=0 supported at this time.")
return self._binaryop(other, "ge", fill_value)
return self._binaryop(
other=other, fn="ge", fill_value=fill_value, can_reindex=True
)

def __ge__(self, other):
return self._binaryop(other, "ge")
Expand Down
19 changes: 19 additions & 0 deletions python/cudf/cudf/tests/test_binops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1726,3 +1726,22 @@ def decimal_series(input, dtype):
got = op(a, b)
assert expect.dtype == got.dtype
utils.assert_eq(expect, got)


@pytest.mark.parametrize("fn", ["eq", "ne", "lt", "gt", "le", "ge"])
def test_equality_ops_index_mismatch(fn):
a = cudf.Series(
[1, 2, 3, None, None, 4], index=["a", "b", "c", "d", "e", "f"]
)
b = cudf.Series(
[-5, 4, 3, 2, 1, 0, 19, 11],
index=["aa", "b", "c", "d", "e", "f", "y", "z"],
)

pa = a.to_pandas()
pb = b.to_pandas()

expected = getattr(pa, fn)(pb)
actual = getattr(a, fn)(b)

utils.assert_eq(expected, actual)

0 comments on commit f41c10c

Please sign in to comment.