Skip to content

Commit

Permalink
Add scalar argtypes to udf cache keys (#13194)
Browse files Browse the repository at this point in the history
  • Loading branch information
brandon-b-miller authored Apr 26, 2023
1 parent 5234278 commit bd04975
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 3 deletions.
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/udf/groupby_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def jit_groupby_apply(offsets, grouped_values, function, *args):
ngroups = len(offsets) - 1

cache_key = _generate_cache_key(
grouped_values, function, suffix="__GROUPBY_APPLY_UDF"
grouped_values, function, args, suffix="__GROUPBY_APPLY_UDF"
)

if cache_key not in precompiled:
Expand Down
9 changes: 7 additions & 2 deletions python/cudf/cudf/core/udf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
column_from_udf_string_array,
column_to_string_view_array,
)
from cudf.api.types import is_scalar
from cudf.core.column.column import as_column
from cudf.core.dtypes import dtype
from cudf.core.udf.masked_typing import MaskedType
Expand Down Expand Up @@ -245,20 +246,22 @@ def _mask_get(mask, pos):
return (mask[pos // MASK_BITSIZE] >> (pos % MASK_BITSIZE)) & 1


def _generate_cache_key(frame, func: Callable, suffix="__APPLY_UDF"):
def _generate_cache_key(frame, func: Callable, args, suffix="__APPLY_UDF"):
"""Create a cache key that uniquely identifies a compilation.
A new compilation is needed any time any of the following things change:
- The UDF itself as defined in python by the user
- The types of the columns utilized by the UDF
- The existence of the input columns masks
"""
scalar_argtypes = tuple(typeof(arg) for arg in args)
return (
*cudautils.make_cache_key(
func, tuple(_all_dtypes_from_frame(frame).values())
),
*(col.mask is None for col in frame._data.values()),
*frame._data.keys(),
scalar_argtypes,
suffix,
)

Expand All @@ -285,9 +288,11 @@ def _compile_or_get(frame, func, args, kernel_getter=None):
we then obtain the return type from that separate compilation and
use it to allocate an output column of the right dtype.
"""
if not all(is_scalar(arg) for arg in args):
raise TypeError("only scalar valued args are supported by apply")

# check to see if we already compiled this function
cache_key = _generate_cache_key(frame, func)
cache_key = _generate_cache_key(frame, func, args)
if precompiled.get(cache_key) is not None:
kernel, masked_or_scalar = precompiled[cache_key]
return kernel, masked_or_scalar
Expand Down
13 changes: 13 additions & 0 deletions python/cudf/cudf/tests/test_udf_masked_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,19 @@ def f(x):

assert precompiled.currsize == 1

# validate that changing the type of a scalar arg
# results in a miss
precompiled.clear()

def f(x, c):
return x + c

data.apply(f, args=(1,))
assert precompiled.currsize == 1

data.apply(f, args=(1.5,))
assert precompiled.currsize == 2


@pytest.mark.parametrize(
"data", [[1.0, 0.0, 1.5], [1, 0, 2], [True, False, True]]
Expand Down

0 comments on commit bd04975

Please sign in to comment.