From 1077daeaad8ff710de6f4fbb99f2e7371b4af8de Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Thu, 2 Dec 2021 15:51:04 -0600 Subject: [PATCH] Fix caching in `Series.applymap` (#9821) The cache key we were generating for these functions didn't take into account the constants that could be different in the bytecode. Hence certain functions were causing cache hits when they actually differ by a constant value somewhere in the logic. Authors: - https://github.com/brandon-b-miller Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Bradley Dice (https://github.com/bdice) - Ashwin Srinath (https://github.com/shwina) URL: https://github.com/rapidsai/cudf/pull/9821 --- python/cudf/cudf/tests/test_udf_masked_ops.py | 19 +++++++++++++++++++ python/cudf/cudf/utils/cudautils.py | 4 +++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index dc126546f15..c9c2c440632 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -593,3 +593,22 @@ def func(row, c, k): return y run_masked_udf_test(func, data, args=(1, 2), check_dtype=False) + + +def test_masked_udf_caching(): + # Make sure similar functions that differ + # by simple things like constants actually + # recompile + + data = cudf.Series([1, 2, 3]) + expect = data ** 2 + got = data.applymap(lambda x: x ** 2) + + assert_eq(expect, got, check_dtype=False) + + # update the constant value being used and make sure + # it does not result in a cache hit + + expect = data ** 3 + got = data.applymap(lambda x: x ** 3) + assert_eq(expect, got, check_dtype=False) diff --git a/python/cudf/cudf/utils/cudautils.py b/python/cudf/cudf/utils/cudautils.py index 5fa091a0081..f0533dcaa72 100755 --- a/python/cudf/cudf/utils/cudautils.py +++ b/python/cudf/cudf/utils/cudautils.py @@ -216,12 +216,14 @@ def make_cache_key(udf, sig): recompiling the same function for the same set of types """ codebytes = udf.__code__.co_code + constants = udf.__code__.co_consts if udf.__closure__ is not None: cvars = tuple([x.cell_contents for x in udf.__closure__]) cvarbytes = dumps(cvars) else: cvarbytes = b"" - return codebytes, cvarbytes, sig + + return constants, codebytes, cvarbytes, sig def compile_udf(udf, type_signature):