diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings_udf.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/strings_udf.pxd index b895d5e6925..804ad30dfb1 100644 --- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings_udf.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/strings_udf.pxd @@ -18,6 +18,7 @@ cdef extern from "cudf/strings/udf/udf_string.hpp" namespace \ cdef extern from "cudf/strings/udf/udf_apis.hpp" namespace \ "cudf::strings::udf" nogil: + cdef int get_cuda_build_version() except + cdef unique_ptr[device_buffer] to_string_view_array(column_view) except + cdef unique_ptr[column] column_from_udf_string_array( udf_string* strings, size_type size, diff --git a/python/cudf/cudf/_lib/strings_udf.pyx b/python/cudf/cudf/_lib/strings_udf.pyx index e952492c45d..7610cad0b40 100644 --- a/python/cudf/cudf/_lib/strings_udf.pyx +++ b/python/cudf/cudf/_lib/strings_udf.pyx @@ -22,11 +22,16 @@ from cudf._lib.pylibcudf.libcudf.column.column cimport column, column_view from cudf._lib.pylibcudf.libcudf.strings_udf cimport ( column_from_udf_string_array as cpp_column_from_udf_string_array, free_udf_string_array as cpp_free_udf_string_array, + get_cuda_build_version as cpp_get_cuda_build_version, to_string_view_array as cpp_to_string_view_array, udf_string, ) +def get_cuda_build_version(): + return cpp_get_cuda_build_version() + + def column_to_string_view_array(Column strings_col): cdef unique_ptr[device_buffer] c_buffer cdef column_view input_view = strings_col.view() diff --git a/python/cudf/cudf/utils/_numba.py b/python/cudf/cudf/utils/_numba.py index 494b48b3cfd..d9dde58d998 100644 --- a/python/cudf/cudf/utils/_numba.py +++ b/python/cudf/cudf/utils/_numba.py @@ -12,16 +12,14 @@ # strings_udf. This is the easiest way to break an otherwise circular import # loop of _lib.*->cudautils->_numba->_lib.strings_udf @lru_cache -def _get_cc_60_ptx_file(): +def _get_cuda_build_version(): from cudf._lib import strings_udf - return os.path.join( - os.path.dirname(strings_udf.__file__), - "..", - "core", - "udf", - "shim_60.ptx", - ) + # The version is an integer, parsed as 1000 * major + 10 * minor + cuda_build_version = strings_udf.get_cuda_build_version() + cuda_major_version = cuda_build_version // 1000 + cuda_minor_version = (cuda_build_version % 1000) // 10 + return (cuda_major_version, cuda_minor_version) def _get_best_ptx_file(archs, max_compute_capability): @@ -38,8 +36,8 @@ def _get_best_ptx_file(archs, max_compute_capability): def _get_ptx_file(path, prefix): if "RAPIDS_NO_INITIALIZE" in os.environ: - # cc=60 ptx is always built - cc = int(os.environ.get("STRINGS_UDF_CC", "60")) + # cc=70 ptx is always built + cc = int(os.environ.get("STRINGS_UDF_CC", "70")) else: from numba import cuda @@ -120,15 +118,13 @@ def _setup_numba(): versions = safe_get_versions() if versions != NO_DRIVER: driver_version, runtime_version = versions - ptx_toolkit_version = _get_cuda_version_from_ptx_file( - _get_cc_60_ptx_file() - ) + shim_ptx_cuda_version = _get_cuda_build_version() # MVC is required whenever any PTX is newer than the driver - # This could be the shipped PTX file or the PTX emitted by - # the version of NVVM on the user system, the latter aligning - # with the runtime version - if (driver_version < ptx_toolkit_version) or ( + # This could be the shipped shim PTX file (determined by the CUDA + # version used at build time) or the PTX emitted by the version of NVVM + # on the user system (determined by the user's CUDA runtime version) + if (driver_version < shim_ptx_cuda_version) or ( driver_version < runtime_version ): if driver_version < (12, 0): @@ -139,60 +135,6 @@ def _setup_numba(): patch_numba_linker() -def _get_cuda_version_from_ptx_file(path): - """ - https://docs.nvidia.com/cuda/parallel-thread-execution/ - Each PTX module must begin with a .version - directive specifying the PTX language version - - example header: - // - // Generated by NVIDIA NVVM Compiler - // - // Compiler Build ID: CL-31057947 - // Cuda compilation tools, release 11.6, V11.6.124 - // Based on NVVM 7.0.1 - // - - .version 7.6 - .target sm_52 - .address_size 64 - - """ - with open(path) as ptx_file: - for line in ptx_file: - if line.startswith(".version"): - ver_line = line - break - else: - raise ValueError("Could not read CUDA version from ptx file.") - version = ver_line.strip("\n").split(" ")[1] - # This dictionary maps from supported versions of NVVM to the - # PTX version it produces. The lowest value should be the minimum - # CUDA version required to compile the library. Currently CUDA 11.5 - # or higher is required to build cudf. New CUDA versions should - # be added to this dictionary when officially supported. - ver_map = { - "7.5": (11, 5), - "7.6": (11, 6), - "7.7": (11, 7), - "7.8": (11, 8), - "8.0": (12, 0), - "8.1": (12, 1), - "8.2": (12, 2), - "8.3": (12, 3), - "8.4": (12, 4), - } - - cuda_ver = ver_map.get(version) - if cuda_ver is None: - raise ValueError( - f"Could not map PTX version {version} to a CUDA version" - ) - - return cuda_ver - - class _CUDFNumbaConfig: def __enter__(self): self.CUDA_LOW_OCCUPANCY_WARNINGS = ( diff --git a/python/cudf/udf_cpp/CMakeLists.txt b/python/cudf/udf_cpp/CMakeLists.txt index fe7f9d0b00d..fa7855cfc65 100644 --- a/python/cudf/udf_cpp/CMakeLists.txt +++ b/python/cudf/udf_cpp/CMakeLists.txt @@ -60,7 +60,7 @@ set(SHIM_CUDA_FLAGS --expt-relaxed-constexpr -rdc=true) # always build a default PTX file in case RAPIDS_NO_INITIALIZE is set and the device cc can't be # safely queried through a context -list(INSERT CMAKE_CUDA_ARCHITECTURES 0 "60") +list(INSERT CMAKE_CUDA_ARCHITECTURES 0 "70") list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE "-real" "") list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE "-virtual" "") diff --git a/python/cudf/udf_cpp/strings/include/cudf/strings/udf/udf_apis.hpp b/python/cudf/udf_cpp/strings/include/cudf/strings/udf/udf_apis.hpp index 219dbe27682..8635b1280de 100644 --- a/python/cudf/udf_cpp/strings/include/cudf/strings/udf/udf_apis.hpp +++ b/python/cudf/udf_cpp/strings/include/cudf/strings/udf/udf_apis.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,13 @@ namespace cudf { namespace strings { namespace udf { +/** + * @brief Get the CUDA version used at build time. + * + * @return The CUDA version as an integer, parsed as major * 1000 + minor * 10. + */ +int get_cuda_build_version(); + class udf_string; /** diff --git a/python/cudf/udf_cpp/strings/src/strings/udf/udf_apis.cu b/python/cudf/udf_cpp/strings/src/strings/udf/udf_apis.cu index 9cf86b5ea48..941e61e6787 100644 --- a/python/cudf/udf_cpp/strings/src/strings/udf/udf_apis.cu +++ b/python/cudf/udf_cpp/strings/src/strings/udf/udf_apis.cu @@ -101,6 +101,8 @@ void free_udf_string_array(cudf::strings::udf::udf_string* d_strings, // external APIs +int get_cuda_build_version() { return CUDA_VERSION; } + std::unique_ptr to_string_view_array(cudf::column_view const input) { return detail::to_string_view_array(input, cudf::get_default_stream());