diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py index 8421d763167..8092207e037 100644 --- a/python/cudf/cudf/core/udf/__init__.py +++ b/python/cudf/cudf/core/udf/__init__.py @@ -1,5 +1,7 @@ # Copyright (c) 2022, NVIDIA CORPORATION. +from functools import lru_cache + from numba import types from numba.cuda.cudaimpl import lower as cuda_lower @@ -21,6 +23,8 @@ ) _STRING_UDFS_ENABLED = False cudf_str_dtype = dtype(str) + + try: import strings_udf from strings_udf import ptxpath @@ -47,7 +51,18 @@ utils.JIT_SUPPORTED_TYPES |= STRING_TYPES _supported_masked_types |= {string_view, udf_string} - utils.launch_arg_getters[cudf_str_dtype] = column_to_string_view_array + @lru_cache(maxsize=None) + def set_initial_malloc_heap_size(): + strings_udf.set_malloc_heap_size() + + def column_to_string_view_array_init_heap(col): + # lazily allocate heap only when a string needs to be returned + set_initial_malloc_heap_size() + return column_to_string_view_array(col) + + utils.launch_arg_getters[ + cudf_str_dtype + ] = column_to_string_view_array_init_heap utils.output_col_getters[cudf_str_dtype] = column_from_udf_string_array utils.masked_array_types[cudf_str_dtype] = string_view row_function.itemsizes[cudf_str_dtype] = string_view.size_bytes diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index 2222fb72009..bf13b79ab90 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -3,6 +3,7 @@ import os from cubinlinker.patch import _numba_version_ok, get_logger, new_patched_linker +from cuda import cudart from numba import cuda from numba.cuda.cudadrv.driver import Linker from ptxcompiler.patch import NO_DRIVER, safe_get_versions @@ -87,6 +88,30 @@ def _get_ptx_file(): return regular_result[1] +# Maximum size of a string column is 2 GiB +_STRINGS_UDF_DEFAULT_HEAP_SIZE = os.environ.get( + "STRINGS_UDF_HEAP_SIZE", 2**31 +) +heap_size = 0 + + +def set_malloc_heap_size(size=None): + """ + Heap size control for strings_udf, size in bytes. + """ + global heap_size + if size is None: + size = _STRINGS_UDF_DEFAULT_HEAP_SIZE + if size != heap_size: + (ret,) = cudart.cudaDeviceSetLimit( + cudart.cudaLimit.cudaLimitMallocHeapSize, size + ) + if ret.value != 0: + raise RuntimeError("Unable to set cudaMalloc heap size") + + heap_size = size + + ptxpath = None versions = safe_get_versions() if versions != NO_DRIVER: