Skip to content

Commit

Permalink
Add casting operators to masked UDFs (#11578)
Browse files Browse the repository at this point in the history
While working through #11031 it was discovered that we were missing the ability to "cast" between python "classes" (`int`, `float`, and `bool`) within UDFs. This PR introduces the equivalent syntax into masked UDFs. These operations shall be interpreted as mapping to `int64`, `float64` and `bool` types, following numpy and numba's existing handling for scalar types.

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Matthew Roeschke (https://github.com/mroeschke)

URL: #11578
  • Loading branch information
brandon-b-miller authored Aug 25, 2022
1 parent ae8e1df commit 096bbc4
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 7 deletions.
40 changes: 33 additions & 7 deletions python/cudf/cudf/core/udf/lowering.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,19 +289,45 @@ def pack_return_scalar_impl(context, builder, sig, args):


@cuda_lower(operator.truth, MaskedType)
def masked_scalar_truth_impl(context, builder, sig, args):
indata = cgutils.create_struct_proxy(MaskedType(types.boolean))(
@cuda_lower(bool, MaskedType)
def masked_scalar_bool_impl(context, builder, sig, args):
indata = cgutils.create_struct_proxy(sig.args[0])(
context, builder, value=args[0]
)
return indata.value
result = cgutils.alloca_once(builder, ir.IntType(1))
with builder.if_else(indata.valid) as (then, otherwise):
with then:
builder.store(
context.cast(
builder,
indata.value,
sig.args[0].value_type,
types.boolean,
),
result,
)
with otherwise:
builder.store(context.get_constant(types.boolean, 0), result)
return builder.load(result)


@cuda_lower(bool, MaskedType)
def masked_scalar_bool_impl(context, builder, sig, args):
indata = cgutils.create_struct_proxy(MaskedType(types.boolean))(
@cuda_lower(float, MaskedType)
@cuda_lower(int, MaskedType)
def masked_scalar_cast_impl(context, builder, sig, args):
input = cgutils.create_struct_proxy(sig.args[0])(
context, builder, value=args[0]
)
return indata.value
result = cgutils.create_struct_proxy(sig.return_type)(context, builder)

casted = context.cast(
builder,
input.value,
sig.args[0].value_type,
sig.return_type.value_type,
)
result.value = casted
result.valid = input.valid
return result._getvalue()


# To handle the unification, we need to support casting from any type to a
Expand Down
30 changes: 30 additions & 0 deletions python/cudf/cudf/core/udf/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,36 @@ def generic(self, args, kws):
return nb_signature(types.boolean, MaskedType(types.boolean))


@cuda_decl_registry.register_global(float)
class MaskedScalarFloatCast(AbstractTemplate):
"""
Typing for float(Masked)
returns the result of calling "float" on the input
TODO: retains the validity of the input rather than
raising as in float(pd.NA)
"""

def generic(self, args, kws):
if isinstance(args[0], MaskedType):
# following numpy convention np.dtype(float) -> dtype('float64')
return nb_signature(MaskedType(types.float64), args[0])


@cuda_decl_registry.register_global(int)
class MaskedScalarIntCast(AbstractTemplate):
"""
Typing for int(Masked)
returns the result of calling "int" on the input
TODO: retains the validity of the input rather than
raising as in int(pd.NA)
"""

def generic(self, args, kws):
if isinstance(args[0], MaskedType):
# following numpy convention np.dtype(int) -> dtype('int64')
return nb_signature(MaskedType(types.int64), args[0])


@cuda_decl_registry.register_global(api.pack_return)
class UnpackReturnToMasked(AbstractTemplate):
"""
Expand Down
13 changes: 13 additions & 0 deletions python/cudf/cudf/tests/test_udf_masked_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,3 +673,16 @@ def f(x):
data.apply(f)

assert precompiled.currsize == 1


@pytest.mark.parametrize(
"data", [[1.0, 0.0, 1.5], [1, 0, 2], [True, False, True]]
)
@pytest.mark.parametrize("operator", [float, int, bool])
def test_masked_udf_casting(operator, data):
data = cudf.Series(data)

def func(x):
return operator(x)

run_masked_udf_series(func, data, check_dtype=False)

0 comments on commit 096bbc4

Please sign in to comment.