Skip to content

Commit

Permalink
Fix failures when performing binary operations on DataFrames with emp…
Browse files Browse the repository at this point in the history
…ty columns (#8452)

Closes #8434

Authors:
  - https://github.com/ChrisJar

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Charles Blackmon-Luca (https://github.com/charlesbluca)

URL: #8452
  • Loading branch information
ChrisJar authored Jul 20, 2021
1 parent 67b7aac commit 5e0e9a3
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 0 deletions.
24 changes: 24 additions & 0 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3401,6 +3401,30 @@ def _colwise_binop(
col,
(left_column, right_column, reflect, fill_value),
) in operands.items():

# Handle object columns that are empty or
# all nulls when performing binary operations
if (
left_column.dtype == "object"
and left_column.null_count == len(left_column)
and fill_value is None
):
if fn in (
"add",
"sub",
"mul",
"mod",
"pow",
"truediv",
"floordiv",
):
output[col] = left_column
elif fn in ("eq", "lt", "le", "gt", "ge"):
output[col] = left_column.notnull()
elif fn == "ne":
output[col] = left_column.isnull()
continue

if right_column is cudf.NA:
right_column = cudf.Scalar(
right_column, dtype=left_column.dtype
Expand Down
25 changes: 25 additions & 0 deletions python/cudf/cudf/tests/test_binops.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,15 @@
operator.pow,
]

_binops_compare = [
operator.eq,
operator.ne,
operator.lt,
operator.le,
operator.gt,
operator.ge,
]


@pytest.mark.parametrize("obj_class", ["Series", "Index"])
@pytest.mark.parametrize("binop", _binops)
Expand Down Expand Up @@ -2888,3 +2897,19 @@ def test_binops_non_cudf_types(obj_class, binop, other_type):
lhs = obj_class(data)
rhs = other_type(data)
assert cp.all((binop(lhs, rhs) == binop(lhs, lhs)).values)


@pytest.mark.parametrize("binop", _binops + _binops_compare)
@pytest.mark.parametrize("data", [None, [-9, 7], [5, -2], [12, 18]])
@pytest.mark.parametrize("scalar", [1, 3, 12, np.nan])
def test_empty_column(binop, data, scalar):
gdf = cudf.DataFrame(columns=["a", "b"])
if data is not None:
gdf["a"] = data

pdf = gdf.to_pandas()

got = binop(gdf, scalar)
expected = binop(pdf, scalar)

utils.assert_eq(expected, got)

0 comments on commit 5e0e9a3

Please sign in to comment.