diff --git a/build.sh b/build.sh index ac283d01fc9..bda3d83798a 100755 --- a/build.sh +++ b/build.sh @@ -329,7 +329,7 @@ fi if buildAll || hasArg cudf; then cd ${REPODIR}/python/cudf - python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1} + python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1} if [[ ${INSTALL_TARGET} != "" ]]; then python setup.py install --single-version-externally-managed --record=record.txt -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1} fi @@ -338,7 +338,7 @@ fi if buildAll || hasArg strings_udf; then cd ${REPODIR}/python/strings_udf - python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1} + python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1} if [[ ${INSTALL_TARGET} != "" ]]; then python setup.py install --single-version-externally-managed --record=record.txt -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1} fi diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 850afaa44e2..7f3ac81b5c6 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -269,6 +269,8 @@ cd "$WORKSPACE/python/custreamz" gpuci_logger "Python py.test for cuStreamz" py.test -n 8 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term custreamz + +# only install strings_udf after cuDF is finished testing without its presence gpuci_logger "Installing strings_udf" gpuci_mamba_retry install strings_udf -c "${CONDA_BLD_DIR}" -c "${CONDA_ARTIFACT_PATH}" @@ -276,11 +278,18 @@ gpuci_mamba_retry install strings_udf -c "${CONDA_BLD_DIR}" -c "${CONDA_ARTIFACT cd "$WORKSPACE/python/strings_udf/strings_udf" gpuci_logger "Python py.test for strings_udf" -# retest cudf with strings_udf present -cd $WORKSPACE/python/cudf/cudf -py.test -n 8 --cache-clear --basetemp="$WORKSPACE/strings-udf-cuda-tmp" --junitxml="$WORKSPACE/junit-strings-udf.xml" -v --cov-config=.coveragerc --cov=strings_udf --cov-report=xml:"$WORKSPACE/python/strings_udf/strings-udf-coverage.xml" --cov-report term tests -gpuci_logger "Python py.test retest cuDF UDFs" -py.test tests/test_udf_masked_ops.py -n 8 --cache-clear +STRINGS_UDF_PYTEST_RETCODE=0 +py.test -n 8 --cache-clear --basetemp="$WORKSPACE/strings-udf-cuda-tmp" --junitxml="$WORKSPACE/junit-strings-udf.xml" -v --cov-config=.coveragerc --cov=strings_udf --cov-report=xml:"$WORKSPACE/python/strings_udf/strings-udf-coverage.xml" --cov-report term tests || STRINGS_UDF_PYTEST_RETCODE=$? + +if [ ${STRINGS_UDF_PYTEST_RETCODE} -eq 5 ]; then + echo "No strings UDF tests were run, but this script will continue to execute." +elif [ ${STRINGS_UDF_PYTEST_RETCODE} -ne 0 ]; then + exit ${STRINGS_UDF_PYTEST_RETCODE} +else + cd "$WORKSPACE/python/cudf/cudf" + gpuci_logger "Python py.test retest cuDF UDFs" + py.test tests/test_udf_masked_ops.py -n 8 --cache-clear +fi # Run benchmarks with both cudf and pandas to ensure compatibility is maintained. # Benchmarks are run in DEBUG_ONLY mode, meaning that only small data sizes are used. diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py index 97ed1be82c6..443466b28bd 100644 --- a/python/cudf/cudf/core/udf/__init__.py +++ b/python/cudf/cudf/core/udf/__init__.py @@ -1,6 +1,10 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from numba import types -from numba.cuda.cudaimpl import lower as cuda_lower +import numpy as np +from numba import cuda, types +from numba.cuda.cudaimpl import ( + lower as cuda_lower, + registry as cuda_lowering_registry, +) from cudf.core.dtypes import dtype from cudf.core.udf import api, row_function, utils @@ -11,8 +15,6 @@ _units = ["ns", "ms", "us", "s"] _datetime_cases = {types.NPDatetime(u) for u in _units} _timedelta_cases = {types.NPTimedelta(u) for u in _units} - - _supported_masked_types = ( types.integer_domain | types.real_domain @@ -20,37 +22,40 @@ | _timedelta_cases | {types.boolean} ) - _STRING_UDFS_ENABLED = False try: import strings_udf - from strings_udf import ptxpath - from strings_udf._lib.cudf_jit_udf import to_string_view_array - from strings_udf._typing import str_view_arg_handler, string_view - - from . import strings_typing # isort: skip - from . import strings_lowering # isort: skip - - cuda_lower(api.Masked, strings_typing.string_view, types.boolean)( - masked_lowering.masked_constructor - ) - - # add an overload of pack_return(string_view) - cuda_lower(api.pack_return, strings_typing.string_view)( - masked_lowering.pack_return_scalar_impl - ) - - _supported_masked_types |= {strings_typing.string_view} - utils.launch_arg_getters[dtype("O")] = to_string_view_array - utils.masked_array_types[dtype("O")] = string_view - utils.JIT_SUPPORTED_TYPES |= STRING_TYPES - utils.ptx_files.append(ptxpath) - utils.arg_handlers.append(str_view_arg_handler) - row_function.itemsizes[dtype("O")] = string_view.size_bytes - - _STRING_UDFS_ENABLED = True + + if strings_udf.ENABLED: + from . import strings_typing # isort: skip + from . import strings_lowering # isort: skip + from strings_udf import ptxpath + from strings_udf._lib.cudf_jit_udf import to_string_view_array + from strings_udf._typing import str_view_arg_handler, string_view + + # add an overload of MaskedType.__init__(string_view, bool) + cuda_lower(api.Masked, strings_typing.string_view, types.boolean)( + masked_lowering.masked_constructor + ) + + # add an overload of pack_return(string_view) + cuda_lower(api.pack_return, strings_typing.string_view)( + masked_lowering.pack_return_scalar_impl + ) + + _supported_masked_types |= {strings_typing.string_view} + utils.launch_arg_getters[dtype("O")] = to_string_view_array + utils.masked_array_types[dtype("O")] = string_view + utils.JIT_SUPPORTED_TYPES |= STRING_TYPES + utils.ptx_files.append(ptxpath) + utils.arg_handlers.append(str_view_arg_handler) + row_function.itemsizes[dtype("O")] = string_view.size_bytes + + _STRING_UDFS_ENABLED = True + else: + del strings_udf + except ImportError as e: # allow cuDF to work without strings_udf pass - masked_typing.register_masked_constructor(_supported_masked_types) diff --git a/python/strings_udf/CMakeLists.txt b/python/strings_udf/CMakeLists.txt index 53d31575363..3e54162b732 100644 --- a/python/strings_udf/CMakeLists.txt +++ b/python/strings_udf/CMakeLists.txt @@ -18,6 +18,9 @@ set(strings_udf_version 22.10.00) include(../../fetch_rapids.cmake) +include(rapids-cuda) +rapids_cuda_init_architectures(strings-udf-python) + project( strings-udf-python VERSION ${strings_udf_version} diff --git a/python/strings_udf/cpp/CMakeLists.txt b/python/strings_udf/cpp/CMakeLists.txt index 5bbb6ae4791..735e9ff5c27 100644 --- a/python/strings_udf/cpp/CMakeLists.txt +++ b/python/strings_udf/cpp/CMakeLists.txt @@ -19,9 +19,7 @@ include(rapids-cpm) include(rapids-cuda) include(rapids-find) -rapids_cpm_init() - -rapids_cuda_init_architectures(STRINGS_UDF) +rapids_cuda_init_architectures(strings-udf-cpp) # Create a project so that we can enable CUDA architectures in this file. project( @@ -30,6 +28,8 @@ project( LANGUAGES CUDA ) +rapids_cpm_init() + rapids_find_package( CUDAToolkit REQUIRED BUILD_EXPORT_SET strings-udf-exports diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py index ed0c2a228a7..8a06f02d8f9 100644 --- a/python/strings_udf/strings_udf/__init__.py +++ b/python/strings_udf/strings_udf/__init__.py @@ -12,26 +12,62 @@ __version__ = _version.get_versions()["version"] +ENABLED = False + + +def compiler_from_ptx_file(path): + """Parse a PTX file header and extract the CUDA version used to compile it. + Here is an example PTX header that this function should parse: + // Generated by NVIDIA NVVM Compiler + // + // Compiler Build ID: CL-30672275 + // Cuda compilation tools, release 11.5, V11.5.119 + // Based on NVVM 7 + """ + file = open(path).read() + major, minor = ( + re.search(r"Cuda compilation tools, release ([0-9\.]+)", file) + .group(1) + .split(".") + ) + return int(major), int(minor) + + # adapted from PTXCompiler cp = subprocess.run([sys.executable, "-c", CMD], capture_output=True) - -# must have a driver to proceed if cp.returncode == 0: + # must have a driver to proceed + versions = [int(s) for s in cp.stdout.strip().split()] + driver_version = tuple(versions[:2]) + runtime_version = tuple(versions[2:]) - # Load the highest compute capability file available that is less than - # the current device's. - files = glob.glob(os.path.join(os.path.dirname(__file__), "shim_*.ptx")) - dev = cuda.get_current_device() - cc = "".join(str(x) for x in dev.compute_capability) - files = glob.glob(os.path.join(os.path.dirname(__file__), "shim_*.ptx")) - if len(files) == 0: - raise RuntimeError( - "This strings_udf installation is missing the necessary PTX " - "files. Please file an issue reporting this error and how you " - "installed cudf and strings_udf." + # CUDA enhanced compatibility not yet enabled + if driver_version >= runtime_version: + # Load the highest compute capability file available that is less than + # the current device's. + files = glob.glob( + os.path.join(os.path.dirname(__file__), "shim_*.ptx") ) - sms = [os.path.basename(f).rstrip(".ptx").lstrip("shim_") for f in files] - selected_sm = max(sm for sm in sms if sm < cc) - ptxpath = os.path.join( - os.path.dirname(__file__), f"shim_{selected_sm}.ptx" - ) + dev = cuda.get_current_device() + cc = "".join(str(x) for x in dev.compute_capability) + files = glob.glob( + os.path.join(os.path.dirname(__file__), "shim_*.ptx") + ) + if len(files) == 0: + raise RuntimeError( + "This strings_udf installation is missing the necessary PTX " + "files. Please file an issue reporting this error and how you " + "installed cudf and strings_udf." + ) + sms = [ + os.path.basename(f).rstrip(".ptx").lstrip("shim_") for f in files + ] + selected_sm = max(sm for sm in sms if sm < cc) + ptxpath = os.path.join( + os.path.dirname(__file__), f"shim_{selected_sm}.ptx" + ) + + if driver_version >= compiler_from_ptx_file(ptxpath): + ENABLED = True + else: + del ptxpath diff --git a/python/strings_udf/strings_udf/tests/test_string_udfs.py b/python/strings_udf/strings_udf/tests/test_string_udfs.py index 1a5dfa00dd7..f214915ae12 100644 --- a/python/strings_udf/strings_udf/tests/test_string_udfs.py +++ b/python/strings_udf/strings_udf/tests/test_string_udfs.py @@ -15,6 +15,9 @@ from strings_udf._lib.cudf_jit_udf import to_string_view_array from strings_udf._typing import str_view_arg_handler, string_view +if not strings_udf.ENABLED: + pytest.skip("Strings UDF not enabled.", allow_module_level=True) + def get_kernel(func, dtype): """