From 22bfac993193cebca1d32a21524847fc7da38838 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 8 Nov 2022 10:23:23 -0800 Subject: [PATCH 01/11] add lazy mechanism for setting the heap size --- python/cudf/cudf/core/udf/__init__.py | 9 ++++++++- python/strings_udf/strings_udf/__init__.py | 21 +++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py index 926d2ea6cbf..f096a59a719 100644 --- a/python/cudf/cudf/core/udf/__init__.py +++ b/python/cudf/cudf/core/udf/__init__.py @@ -43,7 +43,14 @@ utils.JIT_SUPPORTED_TYPES |= STRING_TYPES _supported_masked_types |= {string_view} - utils.launch_arg_getters[cudf_str_dtype] = column_to_string_view_array + def column_to_string_view_array_init_heap(col): + # lazily allocate heap only when a string needs to be returned + strings_udf.set_malloc_heap_size() + return column_to_string_view_array(col) + + utils.launch_arg_getters[ + cudf_str_dtype + ] = column_to_string_view_array_init_heap utils.output_col_getters[cudf_str_dtype] = column_from_udf_string_array utils.masked_array_types[cudf_str_dtype] = string_view row_function.itemsizes[cudf_str_dtype] = string_view.size_bytes diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index 2222fb72009..0a0dae33306 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -3,6 +3,7 @@ import os from cubinlinker.patch import _numba_version_ok, get_logger, new_patched_linker +from cuda import cudart from numba import cuda from numba.cuda.cudadrv.driver import Linker from ptxcompiler.patch import NO_DRIVER, safe_get_versions @@ -87,6 +88,26 @@ def _get_ptx_file(): return regular_result[1] +default_heap_size = int(2e6) +heap_size = 0 + + +def set_malloc_heap_size(size=default_heap_size): + """ + Heap size control for strings_udf, size in bytes. + """ + global heap_size + if size == heap_size: + return + else: + (ret,) = cudart.cudaDeviceSetLimit(cudart.cudaLimit(2), size) + if ret.value != 0: + breakpoint() + raise RuntimeError("Unable to set cudaMalloc heap size") + + heap_size = size + + ptxpath = None versions = safe_get_versions() if versions != NO_DRIVER: From e7cc530c46b8a1106a39ad5184621d15bcb0d9f2 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 14 Nov 2022 07:22:19 -0800 Subject: [PATCH 02/11] introduce env var to control heap, cleanup --- python/strings_udf/strings_udf/__init__.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index 0a0dae33306..b21c89f886f 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -88,11 +88,15 @@ def _get_ptx_file(): return regular_result[1] -default_heap_size = int(2e6) +# Maximum size of a string column is 2gb +STRINGS_UDF_DEFAULT_HEAP_SIZE = int(2e9) +STRINGS_UDF_HEAP_SIZE = os.environ.get( + "STRINGS_UDF_HEAP_SIZE", STRINGS_UDF_DEFAULT_HEAP_SIZE +) heap_size = 0 -def set_malloc_heap_size(size=default_heap_size): +def set_malloc_heap_size(size=STRINGS_UDF_DEFAULT_HEAP_SIZE): """ Heap size control for strings_udf, size in bytes. """ @@ -102,7 +106,6 @@ def set_malloc_heap_size(size=default_heap_size): else: (ret,) = cudart.cudaDeviceSetLimit(cudart.cudaLimit(2), size) if ret.value != 0: - breakpoint() raise RuntimeError("Unable to set cudaMalloc heap size") heap_size = size From 80459c5755108622116f94110cb567e8b9834cd2 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 14 Nov 2022 07:30:43 -0800 Subject: [PATCH 03/11] fix logic error --- python/strings_udf/strings_udf/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index b21c89f886f..bd420d53765 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -96,7 +96,7 @@ def _get_ptx_file(): heap_size = 0 -def set_malloc_heap_size(size=STRINGS_UDF_DEFAULT_HEAP_SIZE): +def set_malloc_heap_size(size=STRINGS_UDF_HEAP_SIZE): """ Heap size control for strings_udf, size in bytes. """ From efff6c7c6a999b9ff97b1d1b3f5e7db37273cb83 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Wed, 16 Nov 2022 09:01:06 -0600 Subject: [PATCH 04/11] Apply suggestions from code review Co-authored-by: Bradley Dice --- python/strings_udf/strings_udf/__init__.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index bd420d53765..dd5be6fac99 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -89,7 +89,7 @@ def _get_ptx_file(): # Maximum size of a string column is 2gb -STRINGS_UDF_DEFAULT_HEAP_SIZE = int(2e9) +STRINGS_UDF_DEFAULT_HEAP_SIZE = 2**31 STRINGS_UDF_HEAP_SIZE = os.environ.get( "STRINGS_UDF_HEAP_SIZE", STRINGS_UDF_DEFAULT_HEAP_SIZE ) @@ -101,10 +101,8 @@ def set_malloc_heap_size(size=STRINGS_UDF_HEAP_SIZE): Heap size control for strings_udf, size in bytes. """ global heap_size - if size == heap_size: - return - else: - (ret,) = cudart.cudaDeviceSetLimit(cudart.cudaLimit(2), size) + if size != heap_size: + (ret,) = cudart.cudaDeviceSetLimit(cudart.cudaLimit.cudaLimitMallocHeapSize, size) if ret.value != 0: raise RuntimeError("Unable to set cudaMalloc heap size") From 8949fcb4d4894e91758918180d9d2db16bb3e16d Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 16 Nov 2022 07:20:08 -0800 Subject: [PATCH 05/11] style fix --- python/strings_udf/strings_udf/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index dd5be6fac99..f22ca773446 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -102,7 +102,9 @@ def set_malloc_heap_size(size=STRINGS_UDF_HEAP_SIZE): """ global heap_size if size != heap_size: - (ret,) = cudart.cudaDeviceSetLimit(cudart.cudaLimit.cudaLimitMallocHeapSize, size) + (ret,) = cudart.cudaDeviceSetLimit( + cudart.cudaLimit.cudaLimitMallocHeapSize, size + ) if ret.value != 0: raise RuntimeError("Unable to set cudaMalloc heap size") From af7cd9f85b2afa0a6b92f53e4abffbb538c9d02f Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 16 Nov 2022 13:00:33 -0800 Subject: [PATCH 06/11] adjust logic --- python/strings_udf/strings_udf/__init__.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index f22ca773446..4178971468a 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -89,18 +89,20 @@ def _get_ptx_file(): # Maximum size of a string column is 2gb -STRINGS_UDF_DEFAULT_HEAP_SIZE = 2**31 -STRINGS_UDF_HEAP_SIZE = os.environ.get( - "STRINGS_UDF_HEAP_SIZE", STRINGS_UDF_DEFAULT_HEAP_SIZE -) +_STRINGS_UDF_DEFAULT_HEAP_SIZE = 2**31 heap_size = 0 -def set_malloc_heap_size(size=STRINGS_UDF_HEAP_SIZE): +def set_malloc_heap_size(size=None): """ Heap size control for strings_udf, size in bytes. """ global heap_size + if size == None: + size = os.environ.get( + "STRINGS_UDF_HEAP_SIZE", _STRINGS_UDF_DEFAULT_HEAP_SIZE + ) + if size != heap_size: (ret,) = cudart.cudaDeviceSetLimit( cudart.cudaLimit.cudaLimitMallocHeapSize, size From 889aba934fb53f72a8d335fc2c9cc33d89bde095 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Wed, 16 Nov 2022 15:39:30 -0600 Subject: [PATCH 07/11] Update python/strings_udf/strings_udf/__init__.py Co-authored-by: Bradley Dice --- python/strings_udf/strings_udf/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index 4178971468a..e5dc9972e59 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -98,7 +98,7 @@ def set_malloc_heap_size(size=None): Heap size control for strings_udf, size in bytes. """ global heap_size - if size == None: + if size is None: size = os.environ.get( "STRINGS_UDF_HEAP_SIZE", _STRINGS_UDF_DEFAULT_HEAP_SIZE ) From 95e330ba2549e6dd2589a0fe62a06ece46accca2 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 16 Nov 2022 13:40:13 -0800 Subject: [PATCH 08/11] define _STRINGS_UDF_DEFAULT_HEAP_SIZE at init --- python/strings_udf/strings_udf/__init__.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index e5dc9972e59..8e0abcab7a4 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -89,7 +89,9 @@ def _get_ptx_file(): # Maximum size of a string column is 2gb -_STRINGS_UDF_DEFAULT_HEAP_SIZE = 2**31 +_STRINGS_UDF_DEFAULT_HEAP_SIZE = os.environ.get( + "STRINGS_UDF_HEAP_SIZE", 2**31 +) heap_size = 0 @@ -99,10 +101,7 @@ def set_malloc_heap_size(size=None): """ global heap_size if size is None: - size = os.environ.get( - "STRINGS_UDF_HEAP_SIZE", _STRINGS_UDF_DEFAULT_HEAP_SIZE - ) - + size = _STRINGS_UDF_DEFAULT_HEAP_SIZE if size != heap_size: (ret,) = cudart.cudaDeviceSetLimit( cudart.cudaLimit.cudaLimitMallocHeapSize, size From 9bda9d6a0a2220ad8c2a634170d556f0d2efc179 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 16 Nov 2022 13:50:22 -0800 Subject: [PATCH 09/11] use an lru cache --- python/cudf/cudf/core/udf/__init__.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py index 23c7d88d992..3d2f7a8cff3 100644 --- a/python/cudf/cudf/core/udf/__init__.py +++ b/python/cudf/cudf/core/udf/__init__.py @@ -1,5 +1,7 @@ # Copyright (c) 2022, NVIDIA CORPORATION. +from functools import lru_cache + from numba import types from numba.cuda.cudaimpl import lower as cuda_lower @@ -21,6 +23,13 @@ ) _STRING_UDFS_ENABLED = False cudf_str_dtype = dtype(str) + + +@lru_cache(maxsize=None) +def set_initial_malloc_heap_size(): + strings_udf.set_malloc_heap_size() + + try: import strings_udf from strings_udf import ptxpath @@ -47,9 +56,13 @@ utils.JIT_SUPPORTED_TYPES |= STRING_TYPES _supported_masked_types |= {string_view, udf_string} + @lru_cache(maxsize=None) + def set_initial_malloc_heap_size(): + strings_udf.set_malloc_heap_size() + def column_to_string_view_array_init_heap(col): # lazily allocate heap only when a string needs to be returned - strings_udf.set_malloc_heap_size() + set_initial_malloc_heap_size() return column_to_string_view_array(col) utils.launch_arg_getters[ From 594104800980737d529de87565bf740cd361baa3 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Thu, 17 Nov 2022 09:00:19 -0600 Subject: [PATCH 10/11] Update python/strings_udf/strings_udf/__init__.py Co-authored-by: Bradley Dice --- python/strings_udf/strings_udf/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index 8e0abcab7a4..bf13b79ab90 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -88,7 +88,7 @@ def _get_ptx_file(): return regular_result[1] -# Maximum size of a string column is 2gb +# Maximum size of a string column is 2 GiB _STRINGS_UDF_DEFAULT_HEAP_SIZE = os.environ.get( "STRINGS_UDF_HEAP_SIZE", 2**31 ) From b4001432e8732578354da2c7815b9eab51b69ef5 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 17 Nov 2022 07:00:58 -0800 Subject: [PATCH 11/11] cleanup --- python/cudf/cudf/core/udf/__init__.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py index 3d2f7a8cff3..8092207e037 100644 --- a/python/cudf/cudf/core/udf/__init__.py +++ b/python/cudf/cudf/core/udf/__init__.py @@ -25,11 +25,6 @@ cudf_str_dtype = dtype(str) -@lru_cache(maxsize=None) -def set_initial_malloc_heap_size(): - strings_udf.set_malloc_heap_size() - - try: import strings_udf from strings_udf import ptxpath