rapidsai · rapids-bot · Nov 17, 2022 · Nov 8, 2022 · Nov 14, 2022 · Nov 14, 2022
@@ -1,5 +1,7 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
 
+from functools import lru_cache
+
 from numba import types
 from numba.cuda.cudaimpl import lower as cuda_lower
 
@@ -21,6 +23,13 @@
 )
 _STRING_UDFS_ENABLED = False
 cudf_str_dtype = dtype(str)
+
+
+@lru_cache(maxsize=None)
+def set_initial_malloc_heap_size():
+    strings_udf.set_malloc_heap_size()
+
+
 try:
     import strings_udf
     from strings_udf import ptxpath
@@ -47,7 +56,18 @@
         utils.JIT_SUPPORTED_TYPES |= STRING_TYPES
         _supported_masked_types |= {string_view, udf_string}
 
-        utils.launch_arg_getters[cudf_str_dtype] = column_to_string_view_array
+        @lru_cache(maxsize=None)
+        def set_initial_malloc_heap_size():
+            strings_udf.set_malloc_heap_size()
+
+        def column_to_string_view_array_init_heap(col):
+            # lazily allocate heap only when a string needs to be returned
+            set_initial_malloc_heap_size()
+            return column_to_string_view_array(col)
+
+        utils.launch_arg_getters[
+            cudf_str_dtype
+        ] = column_to_string_view_array_init_heap
         utils.output_col_getters[cudf_str_dtype] = column_from_udf_string_array
         utils.masked_array_types[cudf_str_dtype] = string_view
         row_function.itemsizes[cudf_str_dtype] = string_view.size_bytes

@@ -3,6 +3,7 @@
 import os
 
 from cubinlinker.patch import _numba_version_ok, get_logger, new_patched_linker
+from cuda import cudart
 from numba import cuda
 from numba.cuda.cudadrv.driver import Linker
 from ptxcompiler.patch import NO_DRIVER, safe_get_versions
@@ -87,6 +88,30 @@ def _get_ptx_file():
         return regular_result[1]
 
 
+# Maximum size of a string column is 2gb
+_STRINGS_UDF_DEFAULT_HEAP_SIZE = os.environ.get(
+    "STRINGS_UDF_HEAP_SIZE", 2**31
+)
+heap_size = 0
+
+
+def set_malloc_heap_size(size=None):
+    """
+    Heap size control for strings_udf, size in bytes.
+    """
+    global heap_size
+    if size is None:
+        size = _STRINGS_UDF_DEFAULT_HEAP_SIZE
+    if size != heap_size:
+        (ret,) = cudart.cudaDeviceSetLimit(
+            cudart.cudaLimit.cudaLimitMallocHeapSize, size
+        )
+        if ret.value != 0:
+            raise RuntimeError("Unable to set cudaMalloc heap size")
+
+        heap_size = size
+
+
 ptxpath = None
 versions = safe_get_versions()
 if versions != NO_DRIVER: