From f596e93ee07872dce02393e2a07f949829210673 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 10 Oct 2022 07:08:52 -0700 Subject: [PATCH 01/17] remove version guards --- python/cudf/cudf/core/udf/__init__.py | 53 +++++------ python/strings_udf/strings_udf/__init__.py | 104 +++++++-------------- 2 files changed, 61 insertions(+), 96 deletions(-) diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py index 443466b28bd..87e1de348a9 100644 --- a/python/cudf/cudf/core/udf/__init__.py +++ b/python/cudf/cudf/core/udf/__init__.py @@ -26,34 +26,31 @@ try: import strings_udf - if strings_udf.ENABLED: - from . import strings_typing # isort: skip - from . import strings_lowering # isort: skip - from strings_udf import ptxpath - from strings_udf._lib.cudf_jit_udf import to_string_view_array - from strings_udf._typing import str_view_arg_handler, string_view - - # add an overload of MaskedType.__init__(string_view, bool) - cuda_lower(api.Masked, strings_typing.string_view, types.boolean)( - masked_lowering.masked_constructor - ) - - # add an overload of pack_return(string_view) - cuda_lower(api.pack_return, strings_typing.string_view)( - masked_lowering.pack_return_scalar_impl - ) - - _supported_masked_types |= {strings_typing.string_view} - utils.launch_arg_getters[dtype("O")] = to_string_view_array - utils.masked_array_types[dtype("O")] = string_view - utils.JIT_SUPPORTED_TYPES |= STRING_TYPES - utils.ptx_files.append(ptxpath) - utils.arg_handlers.append(str_view_arg_handler) - row_function.itemsizes[dtype("O")] = string_view.size_bytes - - _STRING_UDFS_ENABLED = True - else: - del strings_udf + from . import strings_typing # isort: skip + from . import strings_lowering # isort: skip + from strings_udf import ptxpath + from strings_udf._lib.cudf_jit_udf import to_string_view_array + from strings_udf._typing import str_view_arg_handler, string_view + + # add an overload of MaskedType.__init__(string_view, bool) + cuda_lower(api.Masked, strings_typing.string_view, types.boolean)( + masked_lowering.masked_constructor + ) + + # add an overload of pack_return(string_view) + cuda_lower(api.pack_return, strings_typing.string_view)( + masked_lowering.pack_return_scalar_impl + ) + + _supported_masked_types |= {strings_typing.string_view} + utils.launch_arg_getters[dtype("O")] = to_string_view_array + utils.masked_array_types[dtype("O")] = string_view + utils.JIT_SUPPORTED_TYPES |= STRING_TYPES + utils.ptx_files.append(ptxpath) + utils.arg_handlers.append(str_view_arg_handler) + row_function.itemsizes[dtype("O")] = string_view.size_bytes + + _STRING_UDFS_ENABLED = True except ImportError as e: # allow cuDF to work without strings_udf diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index 2cedc0288d1..a89f05b7fb1 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -12,27 +12,6 @@ __version__ = _version.get_versions()["version"] -ENABLED = False - - -def compiler_from_ptx_file(path): - """Parse a PTX file header and extract the CUDA version used to compile it. - Here is an example PTX header that this function should parse: - // Generated by NVIDIA NVVM Compiler - // - // Compiler Build ID: CL-30672275 - // Cuda compilation tools, release 11.5, V11.5.119 - // Based on NVVM 7 - """ - file = open(path).read() - major, minor = ( - re.search(r"Cuda compilation tools, release ([0-9\.]+)", file) - .group(1) - .split(".") - ) - return int(major), int(minor) - - def _get_appropriate_file(sms, cc): filtered_sms = list(filter(lambda x: x[0] <= cc, sms)) if filtered_sms: @@ -43,57 +22,46 @@ def _get_appropriate_file(sms, cc): # adapted from PTXCompiler cp = subprocess.run([sys.executable, "-c", CMD], capture_output=True) +# must have a driver to proceed if cp.returncode == 0: - # must have a driver to proceed - versions = [int(s) for s in cp.stdout.strip().split()] - driver_version = tuple(versions[:2]) - runtime_version = tuple(versions[2:]) - - # CUDA enhanced compatibility not yet enabled - if driver_version >= runtime_version: - # Load the highest compute capability file available that is less than - # the current device's. - dev = cuda.get_current_device() - cc = int("".join(str(x) for x in dev.compute_capability)) - files = glob.glob( - os.path.join(os.path.dirname(__file__), "shim_*.ptx") + # Load the highest compute capability file available that is less than + # the current device's. + dev = cuda.get_current_device() + cc = int("".join(str(x) for x in dev.compute_capability)) + files = glob.glob( + os.path.join(os.path.dirname(__file__), "shim_*.ptx") + ) + if len(files) == 0: + raise RuntimeError( + "This strings_udf installation is missing the necessary PTX " + "files. Please file an issue reporting this error and how you " + "installed cudf and strings_udf." ) - if len(files) == 0: - raise RuntimeError( - "This strings_udf installation is missing the necessary PTX " - "files. Please file an issue reporting this error and how you " - "installed cudf and strings_udf." - ) - suffix_a_sm = None - regular_sms = [] + suffix_a_sm = None + regular_sms = [] - for f in files: - file_name = os.path.basename(f) - sm_number = file_name.rstrip(".ptx").lstrip("shim_") - if sm_number.endswith("a"): - processed_sm_number = int(sm_number.rstrip("a")) - if processed_sm_number == cc: - suffix_a_sm = (processed_sm_number, f) - else: - regular_sms.append((int(sm_number), f)) - - regular_result = None + for f in files: + file_name = os.path.basename(f) + sm_number = file_name.rstrip(".ptx").lstrip("shim_") + if sm_number.endswith("a"): + processed_sm_number = int(sm_number.rstrip("a")) + if processed_sm_number == cc: + suffix_a_sm = (processed_sm_number, f) + else: + regular_sms.append((int(sm_number), f)) - if regular_sms: - regular_result = _get_appropriate_file(regular_sms, cc) + regular_result = None - if suffix_a_sm is None and regular_result is None: - raise RuntimeError( - "This strings_udf installation is missing the necessary PTX " - f"files that are <={cc}." - ) - elif suffix_a_sm is not None: - ptxpath = suffix_a_sm[1] - else: - ptxpath = regular_result[1] + if regular_sms: + regular_result = _get_appropriate_file(regular_sms, cc) - if driver_version >= compiler_from_ptx_file(ptxpath): - ENABLED = True - else: - del ptxpath + if suffix_a_sm is None and regular_result is None: + raise RuntimeError( + "This strings_udf installation is missing the necessary PTX " + f"files that are <={cc}." + ) + elif suffix_a_sm is not None: + ptxpath = suffix_a_sm[1] + else: + ptxpath = regular_result[1] From a70932b6e44140e52edb94338d84fb34a39aadda Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 10 Oct 2022 07:15:36 -0700 Subject: [PATCH 02/17] style --- python/cudf/cudf/core/udf/__init__.py | 6 +++--- python/strings_udf/strings_udf/__init__.py | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py index 87e1de348a9..462243a8102 100644 --- a/python/cudf/cudf/core/udf/__init__.py +++ b/python/cudf/cudf/core/udf/__init__.py @@ -25,13 +25,13 @@ _STRING_UDFS_ENABLED = False try: import strings_udf - - from . import strings_typing # isort: skip - from . import strings_lowering # isort: skip from strings_udf import ptxpath from strings_udf._lib.cudf_jit_udf import to_string_view_array from strings_udf._typing import str_view_arg_handler, string_view + from . import strings_typing # isort: skip + from . import strings_lowering # isort: skip + # add an overload of MaskedType.__init__(string_view, bool) cuda_lower(api.Masked, strings_typing.string_view, types.boolean)( masked_lowering.masked_constructor diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index a89f05b7fb1..6aa1fb1bd91 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -12,6 +12,7 @@ __version__ = _version.get_versions()["version"] + def _get_appropriate_file(sms, cc): filtered_sms = list(filter(lambda x: x[0] <= cc, sms)) if filtered_sms: @@ -28,9 +29,7 @@ def _get_appropriate_file(sms, cc): # the current device's. dev = cuda.get_current_device() cc = int("".join(str(x) for x in dev.compute_capability)) - files = glob.glob( - os.path.join(os.path.dirname(__file__), "shim_*.ptx") - ) + files = glob.glob(os.path.join(os.path.dirname(__file__), "shim_*.ptx")) if len(files) == 0: raise RuntimeError( "This strings_udf installation is missing the necessary PTX " From 759804de1219c450c889ee87efd420a63aa283d1 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 10 Oct 2022 07:20:11 -0700 Subject: [PATCH 03/17] work around style conflict bug --- python/cudf/cudf/core/udf/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py index 462243a8102..3df1e0bd1d4 100644 --- a/python/cudf/cudf/core/udf/__init__.py +++ b/python/cudf/cudf/core/udf/__init__.py @@ -32,7 +32,6 @@ from . import strings_typing # isort: skip from . import strings_lowering # isort: skip - # add an overload of MaskedType.__init__(string_view, bool) cuda_lower(api.Masked, strings_typing.string_view, types.boolean)( masked_lowering.masked_constructor ) From bd2219d09edea3135b2ba5bb59c3e52017b6389b Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 10 Oct 2022 12:11:39 -0700 Subject: [PATCH 04/17] always test string udfs --- ci/gpu/build.sh | 16 ++++------------ .../strings_udf/tests/test_string_udfs.py | 3 --- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index afcc80a6803..fbd2d284b85 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -283,19 +283,11 @@ gpuci_mamba_retry install strings_udf -c "${CONDA_BLD_DIR}" -c "${CONDA_ARTIFACT # only install strings_udf after cuDF is finished testing without its presence cd "$WORKSPACE/python/strings_udf/strings_udf" gpuci_logger "Python py.test for strings_udf" +py.test -n 8 --cache-clear --basetemp="$WORKSPACE/strings-udf-cuda-tmp" --junitxml="$WORKSPACE/junit-strings-udf.xml" -v --cov-config=.coveragerc --cov=strings_udf --cov-report=xml:"$WORKSPACE/python/strings_udf/strings-udf-coverage.xml" --cov-report term tests -STRINGS_UDF_PYTEST_RETCODE=0 -py.test -n 8 --cache-clear --basetemp="$WORKSPACE/strings-udf-cuda-tmp" --junitxml="$WORKSPACE/junit-strings-udf.xml" -v --cov-config=.coveragerc --cov=strings_udf --cov-report=xml:"$WORKSPACE/python/strings_udf/strings-udf-coverage.xml" --cov-report term tests || STRINGS_UDF_PYTEST_RETCODE=$? - -if [ ${STRINGS_UDF_PYTEST_RETCODE} -eq 5 ]; then - echo "No strings UDF tests were run, but this script will continue to execute." -elif [ ${STRINGS_UDF_PYTEST_RETCODE} -ne 0 ]; then - exit ${STRINGS_UDF_PYTEST_RETCODE} -else - cd "$WORKSPACE/python/cudf/cudf" - gpuci_logger "Python py.test retest cuDF UDFs" - py.test tests/test_udf_masked_ops.py -n 8 --cache-clear -fi +cd "$WORKSPACE/python/cudf/cudf" +gpuci_logger "Python py.test retest cuDF UDFs" +py.test tests/test_udf_masked_ops.py -n 8 --cache-clear # Run benchmarks with both cudf and pandas to ensure compatibility is maintained. # Benchmarks are run in DEBUG_ONLY mode, meaning that only small data sizes are used. diff --git a/python/strings_udf/strings_udf/tests/test_string_udfs.py b/python/strings_udf/strings_udf/tests/test_string_udfs.py index f214915ae12..1a5dfa00dd7 100644 --- a/python/strings_udf/strings_udf/tests/test_string_udfs.py +++ b/python/strings_udf/strings_udf/tests/test_string_udfs.py @@ -15,9 +15,6 @@ from strings_udf._lib.cudf_jit_udf import to_string_view_array from strings_udf._typing import str_view_arg_handler, string_view -if not strings_udf.ENABLED: - pytest.skip("Strings UDF not enabled.", allow_module_level=True) - def get_kernel(func, dtype): """ From 1f54f3938517e2ab4b94a1286975c90b67c4c2f9 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 12 Oct 2022 12:24:56 -0700 Subject: [PATCH 05/17] maybe patch numba linker --- python/strings_udf/strings_udf/__init__.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index 6aa1fb1bd91..e89d17ef90b 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -5,13 +5,17 @@ import subprocess import sys +from cubinlinker.patch import _numba_version_ok, get_logger, new_patched_linker from numba import cuda +from numba.cuda.cudadrv.driver import Linker from ptxcompiler.patch import CMD from . import _version __version__ = _version.get_versions()["version"] +logger = get_logger() + def _get_appropriate_file(sms, cc): filtered_sms = list(filter(lambda x: x[0] <= cc, sms)) @@ -21,10 +25,23 @@ def _get_appropriate_file(sms, cc): return None +def maybe_patch_numba_linker(driver_version): + if driver_version < (11, 5): + logger.debug("Driver version %s.%s needs patching" % driver_version) + if _numba_version_ok: + logger.debug("Patching Numba Linker") + Linker.new = new_patched_linker + else: + logger.debug("Cannot patch Numba Linker - unsupported version") + + # adapted from PTXCompiler cp = subprocess.run([sys.executable, "-c", CMD], capture_output=True) # must have a driver to proceed if cp.returncode == 0: + versions = [int(s) for s in cp.stdout.strip().split()] + driver_version = tuple(versions[:2]) + maybe_patch_numba_linker(driver_version) # Load the highest compute capability file available that is less than # the current device's. dev = cuda.get_current_device() From 76fbf1e05711125a4f764dbe5df6430dcd39b9d4 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 14 Oct 2022 07:19:49 -0700 Subject: [PATCH 06/17] add context for maybe_patch_numba_linker --- python/strings_udf/strings_udf/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index e89d17ef90b..0d4fa2fc504 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -16,6 +16,9 @@ logger = get_logger() +# TODO: embed this in the .so and read dynamically? +strings_udf_ptx_version = (11, 5) + def _get_appropriate_file(sms, cc): filtered_sms = list(filter(lambda x: x[0] <= cc, sms)) @@ -26,7 +29,10 @@ def _get_appropriate_file(sms, cc): def maybe_patch_numba_linker(driver_version): - if driver_version < (11, 5): + # Numba thinks cubinkinker is only needed if the driver is older than the ctk + # but when strings_udf is present, it might also need to patch because the PTX + # file strings_udf relies on may be newer than the driver as well + if driver_version < strings_udf_ptx_version: logger.debug("Driver version %s.%s needs patching" % driver_version) if _numba_version_ok: logger.debug("Patching Numba Linker") From a9125199b4267e86583746c47e5d4f8ef4a0d188 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Fri, 14 Oct 2022 11:11:51 -0500 Subject: [PATCH 07/17] Update ci/gpu/build.sh Co-authored-by: Vyas Ramasubramani --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index f47ff7d7438..88d88aa75d9 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -287,7 +287,7 @@ py.test -n 8 --cache-clear --basetemp="$WORKSPACE/strings-udf-cuda-tmp" --junitx # retest cuDF UDFs cd "$WORKSPACE/python/cudf/cudf" gpuci_logger "Python py.test retest cuDF UDFs" -py.test -n 8 tests/test_udf_masked_ops.py --cache-clear --basetemp="$WORKSPACE/cudf-cuda-strings-udf-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf-strings-udf.xml" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-strings-udf-coverage.xml" --cov-report term --dist=loadscope tests +py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-strings-udf-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf-strings-udf.xml" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-strings-udf-coverage.xml" --cov-report term --dist=loadscope tests/test_udf_masked_ops.py # Run benchmarks with both cudf and pandas to ensure compatibility is maintained. From 29cf1c71a05267835033760de6042f25f20ebc54 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 17 Oct 2022 11:04:47 -0700 Subject: [PATCH 08/17] address reviews --- python/strings_udf/strings_udf/__init__.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index 0d4fa2fc504..50487a74838 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -16,7 +16,6 @@ logger = get_logger() -# TODO: embed this in the .so and read dynamically? strings_udf_ptx_version = (11, 5) @@ -29,11 +28,14 @@ def _get_appropriate_file(sms, cc): def maybe_patch_numba_linker(driver_version): - # Numba thinks cubinkinker is only needed if the driver is older than the ctk + # Numba thinks cubinlinker is only needed if the driver is older than the ctk # but when strings_udf is present, it might also need to patch because the PTX # file strings_udf relies on may be newer than the driver as well if driver_version < strings_udf_ptx_version: - logger.debug("Driver version %s.%s needs patching" % driver_version) + logger.debug( + "Driver version %s.%s needs patching due to strings_udf" + % driver_version + ) if _numba_version_ok: logger.debug("Patching Numba Linker") Linker.new = new_patched_linker From d725aec7ba474c2900a1a3cf252671fdd4b55ae3 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 18 Oct 2022 08:40:49 -0700 Subject: [PATCH 09/17] half way address reviews --- python/strings_udf/strings_udf/__init__.py | 29 ++++++++++++++-------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index 50487a74838..81b749f8833 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -16,7 +16,8 @@ logger = get_logger() -strings_udf_ptx_version = (11, 5) +# tracks the version of CUDA used to build the c++ and PTX components +STRINGS_UDF_PTX_VERSION = (11, 5) def _get_appropriate_file(sms, cc): @@ -31,7 +32,7 @@ def maybe_patch_numba_linker(driver_version): # Numba thinks cubinlinker is only needed if the driver is older than the ctk # but when strings_udf is present, it might also need to patch because the PTX # file strings_udf relies on may be newer than the driver as well - if driver_version < strings_udf_ptx_version: + if driver_version < STRINGS_UDF_PTX_VERSION: logger.debug( "Driver version %s.%s needs patching due to strings_udf" % driver_version @@ -43,15 +44,7 @@ def maybe_patch_numba_linker(driver_version): logger.debug("Cannot patch Numba Linker - unsupported version") -# adapted from PTXCompiler -cp = subprocess.run([sys.executable, "-c", CMD], capture_output=True) -# must have a driver to proceed -if cp.returncode == 0: - versions = [int(s) for s in cp.stdout.strip().split()] - driver_version = tuple(versions[:2]) - maybe_patch_numba_linker(driver_version) - # Load the highest compute capability file available that is less than - # the current device's. +def get_ptx_file(): dev = cuda.get_current_device() cc = int("".join(str(x) for x in dev.compute_capability)) files = glob.glob(os.path.join(os.path.dirname(__file__), "shim_*.ptx")) @@ -89,3 +82,17 @@ def maybe_patch_numba_linker(driver_version): ptxpath = suffix_a_sm[1] else: ptxpath = regular_result[1] + + return ptxpath + + +# adapted from PTXCompiler +cp = subprocess.run([sys.executable, "-c", CMD], capture_output=True) +# must have a driver to proceed +if cp.returncode == 0: + versions = [int(s) for s in cp.stdout.strip().split()] + driver_version = tuple(versions[:2]) + maybe_patch_numba_linker(driver_version) + ptxpath = get_ptx_file() + # Load the highest compute capability file available that is less than + # the current device's. From 1fdc3992f13fa14518ca7f9e9e13039eddfda42c Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 18 Oct 2022 11:25:01 -0700 Subject: [PATCH 10/17] address reviews --- python/strings_udf/strings_udf/__init__.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index 81b749f8833..efaa7568c4f 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -79,11 +79,9 @@ def get_ptx_file(): f"files that are <={cc}." ) elif suffix_a_sm is not None: - ptxpath = suffix_a_sm[1] + return suffix_a_sm[1] else: - ptxpath = regular_result[1] - - return ptxpath + return regular_result[1] # adapted from PTXCompiler From b56108ad5b1b138bbfc4fffd896ded9762b6257d Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 18 Oct 2022 11:26:05 -0700 Subject: [PATCH 11/17] add comment --- python/strings_udf/strings_udf/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index efaa7568c4f..de313e68a98 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -46,6 +46,9 @@ def maybe_patch_numba_linker(driver_version): def get_ptx_file(): dev = cuda.get_current_device() + + # Load the highest compute capability file available that is less than + # the current device's. cc = int("".join(str(x) for x in dev.compute_capability)) files = glob.glob(os.path.join(os.path.dirname(__file__), "shim_*.ptx")) if len(files) == 0: @@ -92,5 +95,3 @@ def get_ptx_file(): driver_version = tuple(versions[:2]) maybe_patch_numba_linker(driver_version) ptxpath = get_ptx_file() - # Load the highest compute capability file available that is less than - # the current device's. From 049e63a59f2c93e30440eb8fd1dd3fe60ccf1518 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 18 Oct 2022 19:24:01 -0700 Subject: [PATCH 12/17] refactor --- python/strings_udf/strings_udf/__init__.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index de313e68a98..52434deef64 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -58,7 +58,6 @@ def get_ptx_file(): "installed cudf and strings_udf." ) - suffix_a_sm = None regular_sms = [] for f in files: @@ -67,7 +66,7 @@ def get_ptx_file(): if sm_number.endswith("a"): processed_sm_number = int(sm_number.rstrip("a")) if processed_sm_number == cc: - suffix_a_sm = (processed_sm_number, f) + return f else: regular_sms.append((int(sm_number), f)) @@ -76,13 +75,11 @@ def get_ptx_file(): if regular_sms: regular_result = _get_appropriate_file(regular_sms, cc) - if suffix_a_sm is None and regular_result is None: + if regular_result is None: raise RuntimeError( "This strings_udf installation is missing the necessary PTX " f"files that are <={cc}." ) - elif suffix_a_sm is not None: - return suffix_a_sm[1] else: return regular_result[1] From 63992b197bdec979830b382300c29a8d395411b1 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 26 Oct 2022 12:48:15 -0700 Subject: [PATCH 13/17] use updated ptxcompiler api --- conda/recipes/strings_udf/meta.yaml | 2 +- python/strings_udf/strings_udf/__init__.py | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/conda/recipes/strings_udf/meta.yaml b/conda/recipes/strings_udf/meta.yaml index e29fb55ce63..9dbd0e56ea1 100644 --- a/conda/recipes/strings_udf/meta.yaml +++ b/conda/recipes/strings_udf/meta.yaml @@ -50,7 +50,7 @@ requirements: - cudf ={{ version }} - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }} - cachetools - - ptxcompiler # [linux64] # CUDA enhanced compatibility. See https://github.com/rapidsai/ptxcompiler + - ptxcompiler >=0.7.0 # [linux64] # CUDA enhanced compatibility. See https://github.com/rapidsai/ptxcompiler test: # [linux64] requires: # [linux64] - cudatoolkit {{ cuda_version }}.* # [linux64] diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index 52434deef64..970d40451d9 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -8,7 +8,7 @@ from cubinlinker.patch import _numba_version_ok, get_logger, new_patched_linker from numba import cuda from numba.cuda.cudadrv.driver import Linker -from ptxcompiler.patch import CMD +from ptxcompiler.patch import NO_DRIVER, safe_get_versions from . import _version @@ -84,11 +84,8 @@ def get_ptx_file(): return regular_result[1] -# adapted from PTXCompiler -cp = subprocess.run([sys.executable, "-c", CMD], capture_output=True) -# must have a driver to proceed -if cp.returncode == 0: - versions = [int(s) for s in cp.stdout.strip().split()] - driver_version = tuple(versions[:2]) +versions = safe_get_versions() +if not versions == NO_DRIVER: + driver_version, runtime_version = versions maybe_patch_numba_linker(driver_version) ptxpath = get_ptx_file() From 777e257ffbe08034d1892d0542cb3bd28eb6e1af Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 1 Nov 2022 07:18:18 -0700 Subject: [PATCH 14/17] fix up reviews --- python/cudf/cudf/core/udf/__init__.py | 6 +++++- python/strings_udf/strings_udf/__init__.py | 10 +++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py index 3df1e0bd1d4..18bfc70decb 100644 --- a/python/cudf/cudf/core/udf/__init__.py +++ b/python/cudf/cudf/core/udf/__init__.py @@ -26,6 +26,10 @@ try: import strings_udf from strings_udf import ptxpath + + if ptxpath: + utils.ptx_files.append(ptxpath) + from strings_udf._lib.cudf_jit_udf import to_string_view_array from strings_udf._typing import str_view_arg_handler, string_view @@ -45,7 +49,7 @@ utils.launch_arg_getters[dtype("O")] = to_string_view_array utils.masked_array_types[dtype("O")] = string_view utils.JIT_SUPPORTED_TYPES |= STRING_TYPES - utils.ptx_files.append(ptxpath) + utils.arg_handlers.append(str_view_arg_handler) row_function.itemsizes[dtype("O")] = string_view.size_bytes diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index 970d40451d9..ee5d19c1ec0 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -1,9 +1,6 @@ # Copyright (c) 2022, NVIDIA CORPORATION. import glob import os -import re -import subprocess -import sys from cubinlinker.patch import _numba_version_ok, get_logger, new_patched_linker from numba import cuda @@ -44,7 +41,7 @@ def maybe_patch_numba_linker(driver_version): logger.debug("Cannot patch Numba Linker - unsupported version") -def get_ptx_file(): +def _get_ptx_file(): dev = cuda.get_current_device() # Load the highest compute capability file available that is less than @@ -56,6 +53,7 @@ def get_ptx_file(): "This strings_udf installation is missing the necessary PTX " "files. Please file an issue reporting this error and how you " "installed cudf and strings_udf." + "https://github.com/rapidsai/cudf/issues" ) regular_sms = [] @@ -84,8 +82,10 @@ def get_ptx_file(): return regular_result[1] +ptxpath = None versions = safe_get_versions() if not versions == NO_DRIVER: driver_version, runtime_version = versions maybe_patch_numba_linker(driver_version) - ptxpath = get_ptx_file() + if "RAPIDS_NO_INITIALIZE" not in os.environ: + ptxpath = _get_ptx_file() From 08f384d6a1afb0318c77f8191cc0f0a95a03a44c Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 1 Nov 2022 07:19:10 -0700 Subject: [PATCH 15/17] minor update --- python/strings_udf/strings_udf/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index ee5d19c1ec0..98e659e1ca0 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -84,7 +84,7 @@ def _get_ptx_file(): ptxpath = None versions = safe_get_versions() -if not versions == NO_DRIVER: +if versions != NO_DRIVER: driver_version, runtime_version = versions maybe_patch_numba_linker(driver_version) if "RAPIDS_NO_INITIALIZE" not in os.environ: From 600b97dc71e1815355a51e80c3444db5fa591f10 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 2 Nov 2022 07:07:23 -0700 Subject: [PATCH 16/17] adjust logic --- python/cudf/cudf/core/udf/__init__.py | 36 +++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py index 18bfc70decb..246a5434e4a 100644 --- a/python/cudf/cudf/core/udf/__init__.py +++ b/python/cudf/cudf/core/udf/__init__.py @@ -30,30 +30,30 @@ if ptxpath: utils.ptx_files.append(ptxpath) - from strings_udf._lib.cudf_jit_udf import to_string_view_array - from strings_udf._typing import str_view_arg_handler, string_view + from strings_udf._lib.cudf_jit_udf import to_string_view_array + from strings_udf._typing import str_view_arg_handler, string_view - from . import strings_typing # isort: skip - from . import strings_lowering # isort: skip + from . import strings_typing # isort: skip + from . import strings_lowering # isort: skip - cuda_lower(api.Masked, strings_typing.string_view, types.boolean)( - masked_lowering.masked_constructor - ) + cuda_lower(api.Masked, strings_typing.string_view, types.boolean)( + masked_lowering.masked_constructor + ) - # add an overload of pack_return(string_view) - cuda_lower(api.pack_return, strings_typing.string_view)( - masked_lowering.pack_return_scalar_impl - ) + # add an overload of pack_return(string_view) + cuda_lower(api.pack_return, strings_typing.string_view)( + masked_lowering.pack_return_scalar_impl + ) - _supported_masked_types |= {strings_typing.string_view} - utils.launch_arg_getters[dtype("O")] = to_string_view_array - utils.masked_array_types[dtype("O")] = string_view - utils.JIT_SUPPORTED_TYPES |= STRING_TYPES + _supported_masked_types |= {strings_typing.string_view} + utils.launch_arg_getters[dtype("O")] = to_string_view_array + utils.masked_array_types[dtype("O")] = string_view + utils.JIT_SUPPORTED_TYPES |= STRING_TYPES - utils.arg_handlers.append(str_view_arg_handler) - row_function.itemsizes[dtype("O")] = string_view.size_bytes + utils.arg_handlers.append(str_view_arg_handler) + row_function.itemsizes[dtype("O")] = string_view.size_bytes - _STRING_UDFS_ENABLED = True + _STRING_UDFS_ENABLED = True except ImportError as e: # allow cuDF to work without strings_udf From 610e85aa53e4befd10ee62238be25c64f4c36a81 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 2 Nov 2022 09:36:45 -0700 Subject: [PATCH 17/17] add STRINGS_UDF_CC as an environment variable --- python/cudf/cudf/core/udf/__init__.py | 1 + python/cudf/cudf/core/udf/strings_utils.py | 0 python/strings_udf/strings_udf/__init__.py | 17 ++++++++++------- 3 files changed, 11 insertions(+), 7 deletions(-) create mode 100644 python/cudf/cudf/core/udf/strings_utils.py diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py index 246a5434e4a..4730f1fa296 100644 --- a/python/cudf/cudf/core/udf/__init__.py +++ b/python/cudf/cudf/core/udf/__init__.py @@ -23,6 +23,7 @@ | {types.boolean} ) _STRING_UDFS_ENABLED = False + try: import strings_udf from strings_udf import ptxpath diff --git a/python/cudf/cudf/core/udf/strings_utils.py b/python/cudf/cudf/core/udf/strings_utils.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index 98e659e1ca0..24f1a2d3bda 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -42,16 +42,20 @@ def maybe_patch_numba_linker(driver_version): def _get_ptx_file(): - dev = cuda.get_current_device() + if "RAPIDS_NO_INITIALIZE" in os.environ: + cc = int(os.environ.get("STRINGS_UDF_CC", "52")) + else: + dev = cuda.get_current_device() - # Load the highest compute capability file available that is less than - # the current device's. - cc = int("".join(str(x) for x in dev.compute_capability)) + # Load the highest compute capability file available that is less than + # the current device's. + cc = int("".join(str(x) for x in dev.compute_capability)) files = glob.glob(os.path.join(os.path.dirname(__file__), "shim_*.ptx")) if len(files) == 0: raise RuntimeError( "This strings_udf installation is missing the necessary PTX " - "files. Please file an issue reporting this error and how you " + f"files for compute capability {cc}. " + "Please file an issue reporting this error and how you " "installed cudf and strings_udf." "https://github.com/rapidsai/cudf/issues" ) @@ -87,5 +91,4 @@ def _get_ptx_file(): if versions != NO_DRIVER: driver_version, runtime_version = versions maybe_patch_numba_linker(driver_version) - if "RAPIDS_NO_INITIALIZE" not in os.environ: - ptxpath = _get_ptx_file() + ptxpath = _get_ptx_file()