Skip to content

Commit

Permalink
Support arbitrary CUDA versions in UDF code (#15950)
Browse files Browse the repository at this point in the history
This PR eliminates the manual mapping from PTX versions to CUDA versions, to help support CUDA 12.5 and newer without requiring a manual update to `_numba.py` for every CUDA release. This also updates the minimum compute capability PTX file from arch 60 to arch 70, since that is now the minimum required by RAPIDS.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Graham Markall (https://github.com/gmarkall)
  - https://github.com/brandon-b-miller

URL: #15950
  • Loading branch information
bdice authored Jun 10, 2024
1 parent ae12634 commit 9b2c35f
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 73 deletions.
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/libcudf/strings_udf.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ cdef extern from "cudf/strings/udf/udf_string.hpp" namespace \

cdef extern from "cudf/strings/udf/udf_apis.hpp" namespace \
"cudf::strings::udf" nogil:
cdef int get_cuda_build_version() except +
cdef unique_ptr[device_buffer] to_string_view_array(column_view) except +
cdef unique_ptr[column] column_from_udf_string_array(
udf_string* strings, size_type size,
Expand Down
5 changes: 5 additions & 0 deletions python/cudf/cudf/_lib/strings_udf.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,16 @@ from cudf._lib.pylibcudf.libcudf.column.column cimport column, column_view
from cudf._lib.pylibcudf.libcudf.strings_udf cimport (
column_from_udf_string_array as cpp_column_from_udf_string_array,
free_udf_string_array as cpp_free_udf_string_array,
get_cuda_build_version as cpp_get_cuda_build_version,
to_string_view_array as cpp_to_string_view_array,
udf_string,
)


def get_cuda_build_version():
return cpp_get_cuda_build_version()


def column_to_string_view_array(Column strings_col):
cdef unique_ptr[device_buffer] c_buffer
cdef column_view input_view = strings_col.view()
Expand Down
84 changes: 13 additions & 71 deletions python/cudf/cudf/utils/_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,14 @@
# strings_udf. This is the easiest way to break an otherwise circular import
# loop of _lib.*->cudautils->_numba->_lib.strings_udf
@lru_cache
def _get_cc_60_ptx_file():
def _get_cuda_build_version():
from cudf._lib import strings_udf

return os.path.join(
os.path.dirname(strings_udf.__file__),
"..",
"core",
"udf",
"shim_60.ptx",
)
# The version is an integer, parsed as 1000 * major + 10 * minor
cuda_build_version = strings_udf.get_cuda_build_version()
cuda_major_version = cuda_build_version // 1000
cuda_minor_version = (cuda_build_version % 1000) // 10
return (cuda_major_version, cuda_minor_version)


def _get_best_ptx_file(archs, max_compute_capability):
Expand All @@ -38,8 +36,8 @@ def _get_best_ptx_file(archs, max_compute_capability):

def _get_ptx_file(path, prefix):
if "RAPIDS_NO_INITIALIZE" in os.environ:
# cc=60 ptx is always built
cc = int(os.environ.get("STRINGS_UDF_CC", "60"))
# cc=70 ptx is always built
cc = int(os.environ.get("STRINGS_UDF_CC", "70"))
else:
from numba import cuda

Expand Down Expand Up @@ -120,15 +118,13 @@ def _setup_numba():
versions = safe_get_versions()
if versions != NO_DRIVER:
driver_version, runtime_version = versions
ptx_toolkit_version = _get_cuda_version_from_ptx_file(
_get_cc_60_ptx_file()
)
shim_ptx_cuda_version = _get_cuda_build_version()

# MVC is required whenever any PTX is newer than the driver
# This could be the shipped PTX file or the PTX emitted by
# the version of NVVM on the user system, the latter aligning
# with the runtime version
if (driver_version < ptx_toolkit_version) or (
# This could be the shipped shim PTX file (determined by the CUDA
# version used at build time) or the PTX emitted by the version of NVVM
# on the user system (determined by the user's CUDA runtime version)
if (driver_version < shim_ptx_cuda_version) or (
driver_version < runtime_version
):
if driver_version < (12, 0):
Expand All @@ -139,60 +135,6 @@ def _setup_numba():
patch_numba_linker()


def _get_cuda_version_from_ptx_file(path):
"""
https://docs.nvidia.com/cuda/parallel-thread-execution/
Each PTX module must begin with a .version
directive specifying the PTX language version
example header:
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-31057947
// Cuda compilation tools, release 11.6, V11.6.124
// Based on NVVM 7.0.1
//
.version 7.6
.target sm_52
.address_size 64
"""
with open(path) as ptx_file:
for line in ptx_file:
if line.startswith(".version"):
ver_line = line
break
else:
raise ValueError("Could not read CUDA version from ptx file.")
version = ver_line.strip("\n").split(" ")[1]
# This dictionary maps from supported versions of NVVM to the
# PTX version it produces. The lowest value should be the minimum
# CUDA version required to compile the library. Currently CUDA 11.5
# or higher is required to build cudf. New CUDA versions should
# be added to this dictionary when officially supported.
ver_map = {
"7.5": (11, 5),
"7.6": (11, 6),
"7.7": (11, 7),
"7.8": (11, 8),
"8.0": (12, 0),
"8.1": (12, 1),
"8.2": (12, 2),
"8.3": (12, 3),
"8.4": (12, 4),
}

cuda_ver = ver_map.get(version)
if cuda_ver is None:
raise ValueError(
f"Could not map PTX version {version} to a CUDA version"
)

return cuda_ver


class _CUDFNumbaConfig:
def __enter__(self):
self.CUDA_LOW_OCCUPANCY_WARNINGS = (
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/udf_cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ set(SHIM_CUDA_FLAGS --expt-relaxed-constexpr -rdc=true)

# always build a default PTX file in case RAPIDS_NO_INITIALIZE is set and the device cc can't be
# safely queried through a context
list(INSERT CMAKE_CUDA_ARCHITECTURES 0 "60")
list(INSERT CMAKE_CUDA_ARCHITECTURES 0 "70")

list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE "-real" "")
list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE "-virtual" "")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,6 +27,13 @@ namespace cudf {
namespace strings {
namespace udf {

/**
* @brief Get the CUDA version used at build time.
*
* @return The CUDA version as an integer, parsed as major * 1000 + minor * 10.
*/
int get_cuda_build_version();

class udf_string;

/**
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/udf_cpp/strings/src/strings/udf/udf_apis.cu
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ void free_udf_string_array(cudf::strings::udf::udf_string* d_strings,

// external APIs

int get_cuda_build_version() { return CUDA_VERSION; }

std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const input)
{
return detail::to_string_view_array(input, cudf::get_default_stream());
Expand Down

0 comments on commit 9b2c35f

Please sign in to comment.