Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support arbitrary CUDA versions in UDF code #15950

Merged
merged 5 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/libcudf/strings_udf.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ cdef extern from "cudf/strings/udf/udf_string.hpp" namespace \

cdef extern from "cudf/strings/udf/udf_apis.hpp" namespace \
"cudf::strings::udf" nogil:
cdef int get_cuda_build_version() except +
cdef unique_ptr[device_buffer] to_string_view_array(column_view) except +
cdef unique_ptr[column] column_from_udf_string_array(
udf_string* strings, size_type size,
Expand Down
5 changes: 5 additions & 0 deletions python/cudf/cudf/_lib/strings_udf.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,16 @@ from cudf._lib.pylibcudf.libcudf.column.column cimport column, column_view
from cudf._lib.pylibcudf.libcudf.strings_udf cimport (
column_from_udf_string_array as cpp_column_from_udf_string_array,
free_udf_string_array as cpp_free_udf_string_array,
get_cuda_build_version as cpp_get_cuda_build_version,
to_string_view_array as cpp_to_string_view_array,
udf_string,
)


def get_cuda_build_version():
return cpp_get_cuda_build_version()


def column_to_string_view_array(Column strings_col):
cdef unique_ptr[device_buffer] c_buffer
cdef column_view input_view = strings_col.view()
Expand Down
84 changes: 13 additions & 71 deletions python/cudf/cudf/utils/_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,14 @@
# strings_udf. This is the easiest way to break an otherwise circular import
# loop of _lib.*->cudautils->_numba->_lib.strings_udf
@lru_cache
def _get_cc_60_ptx_file():
def _get_cuda_build_version():
from cudf._lib import strings_udf

return os.path.join(
os.path.dirname(strings_udf.__file__),
"..",
"core",
"udf",
"shim_60.ptx",
)
# The version is an integer, parsed as 1000 * major + 10 * minor
cuda_build_version = strings_udf.get_cuda_build_version()
cuda_major_version = cuda_build_version // 1000
cuda_minor_version = (cuda_build_version % 1000) // 10
return (cuda_major_version, cuda_minor_version)


def _get_best_ptx_file(archs, max_compute_capability):
Expand All @@ -38,8 +36,8 @@ def _get_best_ptx_file(archs, max_compute_capability):

def _get_ptx_file(path, prefix):
if "RAPIDS_NO_INITIALIZE" in os.environ:
# cc=60 ptx is always built
cc = int(os.environ.get("STRINGS_UDF_CC", "60"))
# cc=70 ptx is always built
cc = int(os.environ.get("STRINGS_UDF_CC", "70"))
else:
from numba import cuda

Expand Down Expand Up @@ -120,15 +118,13 @@ def _setup_numba():
versions = safe_get_versions()
if versions != NO_DRIVER:
driver_version, runtime_version = versions
ptx_toolkit_version = _get_cuda_version_from_ptx_file(
_get_cc_60_ptx_file()
)
shim_ptx_cuda_version = _get_cuda_build_version()

# MVC is required whenever any PTX is newer than the driver
# This could be the shipped PTX file or the PTX emitted by
# the version of NVVM on the user system, the latter aligning
# with the runtime version
if (driver_version < ptx_toolkit_version) or (
# This could be the shipped shim PTX file (determined by the CUDA
# version used at build time) or the PTX emitted by the version of NVVM
# on the user system (determined by the user's CUDA runtime version)
if (driver_version < shim_ptx_cuda_version) or (
driver_version < runtime_version
):
if driver_version < (12, 0):
Expand All @@ -139,60 +135,6 @@ def _setup_numba():
patch_numba_linker()


def _get_cuda_version_from_ptx_file(path):
"""
https://docs.nvidia.com/cuda/parallel-thread-execution/
Each PTX module must begin with a .version
directive specifying the PTX language version

example header:
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-31057947
// Cuda compilation tools, release 11.6, V11.6.124
// Based on NVVM 7.0.1
//

.version 7.6
.target sm_52
.address_size 64

"""
with open(path) as ptx_file:
for line in ptx_file:
if line.startswith(".version"):
ver_line = line
break
else:
raise ValueError("Could not read CUDA version from ptx file.")
version = ver_line.strip("\n").split(" ")[1]
# This dictionary maps from supported versions of NVVM to the
# PTX version it produces. The lowest value should be the minimum
# CUDA version required to compile the library. Currently CUDA 11.5
# or higher is required to build cudf. New CUDA versions should
# be added to this dictionary when officially supported.
ver_map = {
"7.5": (11, 5),
"7.6": (11, 6),
"7.7": (11, 7),
"7.8": (11, 8),
"8.0": (12, 0),
"8.1": (12, 1),
"8.2": (12, 2),
"8.3": (12, 3),
"8.4": (12, 4),
}
Comment on lines -175 to -185
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main point of this PR is to get rid of this mapping!


cuda_ver = ver_map.get(version)
if cuda_ver is None:
raise ValueError(
f"Could not map PTX version {version} to a CUDA version"
)

return cuda_ver


class _CUDFNumbaConfig:
def __enter__(self):
self.CUDA_LOW_OCCUPANCY_WARNINGS = (
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/udf_cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ set(SHIM_CUDA_FLAGS --expt-relaxed-constexpr -rdc=true)

# always build a default PTX file in case RAPIDS_NO_INITIALIZE is set and the device cc can't be
# safely queried through a context
list(INSERT CMAKE_CUDA_ARCHITECTURES 0 "60")
list(INSERT CMAKE_CUDA_ARCHITECTURES 0 "70")

list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE "-real" "")
list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE "-virtual" "")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,6 +27,13 @@ namespace cudf {
namespace strings {
namespace udf {

/**
* @brief Get the CUDA version used at build time.
*
* @return The CUDA version as an integer, parsed as major * 1000 + minor * 10.
*/
int get_cuda_build_version();

class udf_string;

/**
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/udf_cpp/strings/src/strings/udf/udf_apis.cu
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ void free_udf_string_array(cudf::strings::udf::udf_string* d_strings,

// external APIs

int get_cuda_build_version() { return CUDA_VERSION; }
bdice marked this conversation as resolved.
Show resolved Hide resolved

std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const input)
{
return detail::to_string_view_array(input, cudf::get_default_stream());
Expand Down
Loading