Skip to content

Commit

Permalink
Fix typo and 0-d numpy array handling in binary operation(#6887)
Browse files Browse the repository at this point in the history
Fixes typo and 0-d numpy array handling. When numpy scalar is used on lhs while executing binary operation, `__eq__` from numpy returns a 0-d array rather than scalar.

closes #6778

Authors:
  - Ramakrishna Prabhu <[email protected]>
  - Ram (Ramakrishna Prabhu) <[email protected]>

Approvers:
  - Keith Kraus

URL: #6887
  • Loading branch information
rgsl888prabhu authored Dec 3, 2020
1 parent f137ed1 commit 5336301
Show file tree
Hide file tree
Showing 8 changed files with 56 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@
- PR #6861 Fix compile error in type_dispatch_benchmark.cu
- PR #6869 Avoid dependency resolution failure in latest version of pip by explicitly specifying versions for dask and distributed
- PR #6806 Force install of local conda artifacts
- PR #6887 Fix typo and `0-d` numpy array handling in binary operation

# cuDF 0.16.0 (21 Oct 2020)

Expand Down
3 changes: 3 additions & 0 deletions python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,9 @@ def binary_operator(self, op, rhs, reflect=False):

def normalize_binop_value(self, other):

if isinstance(other, np.ndarray) and other.ndim == 0:
other = other.item()

ary = cudf.utils.utils.scalar_broadcast_to(
self._encode(other), size=len(self), dtype=self.codes.dtype
)
Expand Down
6 changes: 5 additions & 1 deletion python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,11 @@ def get_dt_field(self, field):
def normalize_binop_value(self, other):
if isinstance(other, cudf.Scalar):
return other
elif isinstance(other, dt.datetime):

if isinstance(other, np.ndarray) and other.ndim == 0:
other = other.item()

if isinstance(other, dt.datetime):
other = np.datetime64(other)
elif isinstance(other, dt.timedelta):
other = np.timedelta64(other)
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ def normalize_binop_value(self, other):
# expensive device-host transfer just to
# adjust the dtype
other = other.value
elif isinstance(other, np.ndarray) and other.ndim == 0:
other = other.item()
other_dtype = np.min_scalar_type(other)
if other_dtype.kind in {"b", "i", "u", "f"}:
if isinstance(other, cudf.Scalar):
Expand Down
5 changes: 5 additions & 0 deletions python/cudf/cudf/core/column/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -5014,6 +5014,11 @@ def normalize_binop_value(self, other):
other, size=len(self), dtype="object"
)
return col
elif isinstance(other, np.ndarray) and other.ndim == 0:
col = utils.scalar_broadcast_to(
other.item(), size=len(self), dtype="object"
)
return col
else:
raise TypeError(f"cannot broadcast {type(other)}")

Expand Down
6 changes: 5 additions & 1 deletion python/cudf/cudf/core/column/timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,11 @@ def binary_operator(self, op, rhs, reflect=False):
def normalize_binop_value(self, other):
if isinstance(other, cudf.Scalar):
return other
elif isinstance(other, dt.timedelta):

if isinstance(other, np.ndarray) and other.ndim == 0:
other = other.item()

if isinstance(other, dt.timedelta):
other = np.timedelta64(other)
elif isinstance(other, pd.Timestamp):
other = other.to_datetime64()
Expand Down
4 changes: 3 additions & 1 deletion python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1371,7 +1371,9 @@ def fallback(col, fn):
l_opr = self[col]
result[col] = op(l_opr, r_opr)

elif isinstance(other, (numbers.Number, cudf.Scalar)):
elif isinstance(other, (numbers.Number, cudf.Scalar)) or (
isinstance(other, np.ndarray) and other.ndim == 0
):
for col in self._data:
result[col] = op(self[col], other)
else:
Expand Down
32 changes: 32 additions & 0 deletions python/cudf/cudf/tests/test_binops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1459,3 +1459,35 @@ def test_scalar_power_invalid(dtype_l, dtype_r):

with pytest.raises(TypeError):
lval_gpu ** rval_gpu


@pytest.mark.parametrize("frame", [cudf.Series, cudf.Index, cudf.DataFrame])
@pytest.mark.parametrize(
"dtype", ["int", "str", "datetime64[s]", "timedelta64[s]", "category"]
)
def test_binops_with_lhs_numpy_scalar(frame, dtype):
data = [1, 2, 3, 4, 5]

data = (
frame({"a": data}, dtype=dtype)
if isinstance(frame, cudf.DataFrame)
else frame(data, dtype=dtype)
)

if dtype == "datetime64[s]":
val = np.dtype(dtype).type(4, "s")
elif dtype == "timedelta64[s]":
val = np.dtype(dtype).type(4, "s")
elif dtype == "category":
val = np.int64(4)
else:
val = np.dtype(dtype).type(4)

expected = val == data.to_pandas()
got = val == data

# In case of index, expected would be a numpy array
if isinstance(data, cudf.Index):
expected = pd.Index(expected)

utils.assert_eq(expected, got)

0 comments on commit 5336301

Please sign in to comment.