-
Notifications
You must be signed in to change notification settings - Fork 915
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add tests for pylibcudf
binaryops
#15470
Changes from 13 commits
1b482bf
0a46a0f
f5f33e6
61ab85b
ba03539
4911f33
9eba971
562c765
e533469
b81e017
e5f34e7
322a7de
ecbb895
076b83d
e06d5cd
473845f
6e0c816
f0a62a9
77c709f
4e653d6
6968f95
cc8afe9
12a748b
8c44149
959e9a7
367dfe9
849e586
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we can make a mapping like: {
"ADD": operator.add,
"SUB": operator.sub,
...
"PYMOD": operator.mod,
...
} and use that for testing everything? |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,247 @@ | ||||||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||||||
|
||||||
|
||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
import numpy as np | ||||||
import pyarrow as pa | ||||||
import pytest | ||||||
from utils import assert_column_eq | ||||||
|
||||||
from cudf._lib import pylibcudf as plc | ||||||
|
||||||
|
||||||
def idfn(param): | ||||||
ltype, rtype, outtype = param | ||||||
return f"{ltype}-{rtype}-{outtype}" | ||||||
brandon-b-miller marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
|
||||||
@pytest.fixture(params=[True, False], ids=["nulls", "no_nulls"]) | ||||||
def nulls(request): | ||||||
return request.param | ||||||
|
||||||
|
||||||
@pytest.fixture | ||||||
def pa_data(request, nulls): | ||||||
ltype, rtype, outtype = request.param | ||||||
values = make_col(ltype, nulls), make_col(rtype, nulls), outtype | ||||||
return values | ||||||
|
||||||
|
||||||
@pytest.fixture | ||||||
def plc_data(pa_data): | ||||||
lhs, rhs, outtype = pa_data | ||||||
return ( | ||||||
plc.interop.from_arrow(lhs), | ||||||
plc.interop.from_arrow(rhs), | ||||||
plc.interop.from_arrow(pa.from_numpy_dtype(np.dtype(outtype))), | ||||||
) | ||||||
|
||||||
|
||||||
def make_col(dtype, nulls): | ||||||
if dtype == "int64": | ||||||
data = [1, 2, 3, 4, 5] | ||||||
pa_type = pa.int32() | ||||||
elif dtype == "uint64": | ||||||
data = [1, 2, 3, 4, 5] | ||||||
pa_type = pa.uint32() | ||||||
brandon-b-miller marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
elif dtype == "float64": | ||||||
data = [1.0, 2.0, 3.0, 4.0, 5.0] | ||||||
pa_type = pa.float32() | ||||||
elif dtype == "bool": | ||||||
data = [True, False, True, False, True] | ||||||
pa_type = pa.bool_() | ||||||
elif dtype == "timestamp64[ns]": | ||||||
data = [ | ||||||
np.datetime64("2022-01-01"), | ||||||
np.datetime64("2022-01-02"), | ||||||
np.datetime64("2022-01-03"), | ||||||
np.datetime64("2022-01-04"), | ||||||
np.datetime64("2022-01-05"), | ||||||
] | ||||||
pa_type = pa.timestamp("ns") | ||||||
elif dtype == "timedelta64[ns]": | ||||||
data = [ | ||||||
np.timedelta64(1, "ns"), | ||||||
np.timedelta64(2, "ns"), | ||||||
np.timedelta64(3, "ns"), | ||||||
np.timedelta64(4, "ns"), | ||||||
np.timedelta64(5, "ns"), | ||||||
] | ||||||
pa_type = pa.duration("ns") | ||||||
else: | ||||||
raise ValueError("Unsupported dtype") | ||||||
|
||||||
if nulls: | ||||||
data[3] = None | ||||||
|
||||||
return pa.array(data, type=pa_type) | ||||||
|
||||||
|
||||||
def _test_binaryop_inner(pa_data, plc_data, pyop, plc_op): | ||||||
lhs_py, rhs_py, outty_py = pa_data | ||||||
lhs_plc, rhs_plc, outty_plc = plc_data | ||||||
|
||||||
def get_result(): | ||||||
return plc.binaryop.binary_operation( | ||||||
lhs_plc, | ||||||
rhs_plc, | ||||||
plc_op, | ||||||
outty_plc, | ||||||
) | ||||||
|
||||||
if not plc.binaryop.is_supported_operation( | ||||||
outty_plc, lhs_plc.type(), rhs_plc.type(), plc_op | ||||||
): | ||||||
with pytest.raises(TypeError): | ||||||
get_result() | ||||||
return | ||||||
|
||||||
expect = [ | ||||||
pyop(x, y) for x, y in zip(lhs_py.to_pylist(), rhs_py.to_pylist()) | ||||||
] | ||||||
expect = pa.array(expect, type=outty_py) | ||||||
got = get_result() | ||||||
assert_column_eq(expect, got) | ||||||
|
||||||
|
||||||
@pytest.mark.parametrize( | ||||||
"pa_data", | ||||||
[ | ||||||
("int64", "int64", "int64"), | ||||||
("int64", "float64", "float64"), | ||||||
("int64", "int64", "datetime64[ns]"), | ||||||
], | ||||||
indirect=True, | ||||||
ids=idfn, | ||||||
) | ||||||
def test_add(pa_data, plc_data): | ||||||
def add(x, y): | ||||||
if x is None or y is None: | ||||||
return None | ||||||
return x + y | ||||||
|
||||||
_test_binaryop_inner( | ||||||
pa_data, | ||||||
plc_data, | ||||||
add, | ||||||
plc.binaryop.BinaryOperator.ADD, | ||||||
) | ||||||
|
||||||
|
||||||
@pytest.mark.parametrize( | ||||||
"pa_data", | ||||||
[("int64", "int64", "int64"), ("int64", "float64", "float64")], | ||||||
indirect=True, | ||||||
ids=idfn, | ||||||
) | ||||||
def test_sub(pa_data, plc_data): | ||||||
def sub(x, y): | ||||||
if x is None or y is None: | ||||||
return None | ||||||
return x - y | ||||||
|
||||||
_test_binaryop_inner( | ||||||
pa_data, | ||||||
plc_data, | ||||||
sub, | ||||||
plc.binaryop.BinaryOperator.SUB, | ||||||
) | ||||||
|
||||||
|
||||||
@pytest.mark.parametrize( | ||||||
"pa_data", | ||||||
[("int64", "int64", "int64"), ("int64", "float64", "float64")], | ||||||
indirect=True, | ||||||
ids=idfn, | ||||||
) | ||||||
def test_mul(pa_data, plc_data): | ||||||
def mul(x, y): | ||||||
if x is None or y is None: | ||||||
return None | ||||||
return x * y | ||||||
|
||||||
_test_binaryop_inner( | ||||||
pa_data, | ||||||
plc_data, | ||||||
mul, | ||||||
plc.binaryop.BinaryOperator.MUL, | ||||||
) | ||||||
|
||||||
|
||||||
@pytest.mark.parametrize( | ||||||
"pa_data", | ||||||
[("int64", "int64", "int64"), ("int64", "float64", "float64")], | ||||||
indirect=True, | ||||||
ids=idfn, | ||||||
) | ||||||
def test_div(pa_data, plc_data): | ||||||
def div(x, y): | ||||||
if x is None or y is None: | ||||||
return None | ||||||
return x / y | ||||||
brandon-b-miller marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
_test_binaryop_inner( | ||||||
pa_data, | ||||||
plc_data, | ||||||
div, | ||||||
plc.binaryop.BinaryOperator.DIV, | ||||||
) | ||||||
|
||||||
|
||||||
@pytest.mark.parametrize( | ||||||
"pa_data", | ||||||
[("int64", "int64", "int64"), ("int64", "float64", "float64")], | ||||||
indirect=True, | ||||||
ids=idfn, | ||||||
) | ||||||
def test_floordiv(pa_data, plc_data): | ||||||
def floordiv(x, y): | ||||||
if x is None or y is None: | ||||||
return None | ||||||
return x // y | ||||||
|
||||||
_test_binaryop_inner( | ||||||
pa_data, | ||||||
plc_data, | ||||||
floordiv, | ||||||
plc.binaryop.BinaryOperator.FLOOR_DIV, | ||||||
) | ||||||
|
||||||
|
||||||
@pytest.mark.parametrize( | ||||||
"pa_data", | ||||||
[("int64", "int64", "int64"), ("int64", "float64", "float64")], | ||||||
indirect=True, | ||||||
ids=idfn, | ||||||
) | ||||||
def test_truediv(pa_data, plc_data): | ||||||
def truediv(x, y): | ||||||
if x is None or y is None: | ||||||
return None | ||||||
return x / y | ||||||
|
||||||
_test_binaryop_inner( | ||||||
pa_data, | ||||||
plc_data, | ||||||
truediv, | ||||||
plc.binaryop.BinaryOperator.TRUE_DIV, | ||||||
) | ||||||
|
||||||
|
||||||
@pytest.mark.parametrize( | ||||||
"pa_data", | ||||||
[("int64", "int64", "int64"), ("int64", "float64", "float64")], | ||||||
indirect=True, | ||||||
ids=idfn, | ||||||
) | ||||||
def test_mod(pa_data, plc_data): | ||||||
def mod(x, y): | ||||||
if x is None or y is None: | ||||||
return None | ||||||
return x % y | ||||||
|
||||||
_test_binaryop_inner( | ||||||
pa_data, | ||||||
plc_data, | ||||||
mod, | ||||||
plc.binaryop.BinaryOperator.MOD, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Python's modulus operator is implemented as PYMOD. We probably need more expansive inputs to test this code accurately.
Suggested change
Reference: https://stackoverflow.com/questions/1907565/c-and-python-different-behaviour-of-the-modulo-operation There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The goal of the pylibcudf tests is generally to test that the wrappers are implemented correctly: we defer to libcudf for covering the implementation correctness of all the kernels. So I guess in this case we would want an input that can differentiate between getting |
||||||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See also #16239. That PR exposes reflection for casting.