Skip to content

Commit

Permalink
Merge pull request #11851 from rapidsai/branch-22.10
Browse files Browse the repository at this point in the history
[gpuCI] Forward-merge branch-22.10 to branch-22.12 [skip gpuci]
  • Loading branch information
GPUtester authored Oct 3, 2022
2 parents 71167d7 + d9ddd83 commit 8df6dbf
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 59 deletions.
4 changes: 2 additions & 2 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ fi
if buildAll || hasArg cudf; then

cd ${REPODIR}/python/cudf
python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
if [[ ${INSTALL_TARGET} != "" ]]; then
python setup.py install --single-version-externally-managed --record=record.txt -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
fi
Expand All @@ -338,7 +338,7 @@ fi
if buildAll || hasArg strings_udf; then

cd ${REPODIR}/python/strings_udf
python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
if [[ ${INSTALL_TARGET} != "" ]]; then
python setup.py install --single-version-externally-managed --record=record.txt -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
fi
Expand Down
19 changes: 14 additions & 5 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -269,18 +269,27 @@ cd "$WORKSPACE/python/custreamz"
gpuci_logger "Python py.test for cuStreamz"
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term custreamz


# only install strings_udf after cuDF is finished testing without its presence
gpuci_logger "Installing strings_udf"
gpuci_mamba_retry install strings_udf -c "${CONDA_BLD_DIR}" -c "${CONDA_ARTIFACT_PATH}"

# only install strings_udf after cuDF is finished testing without its presence
cd "$WORKSPACE/python/strings_udf/strings_udf"
gpuci_logger "Python py.test for strings_udf"

# retest cudf with strings_udf present
cd $WORKSPACE/python/cudf/cudf
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/strings-udf-cuda-tmp" --junitxml="$WORKSPACE/junit-strings-udf.xml" -v --cov-config=.coveragerc --cov=strings_udf --cov-report=xml:"$WORKSPACE/python/strings_udf/strings-udf-coverage.xml" --cov-report term tests
gpuci_logger "Python py.test retest cuDF UDFs"
py.test tests/test_udf_masked_ops.py -n 8 --cache-clear
STRINGS_UDF_PYTEST_RETCODE=0
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/strings-udf-cuda-tmp" --junitxml="$WORKSPACE/junit-strings-udf.xml" -v --cov-config=.coveragerc --cov=strings_udf --cov-report=xml:"$WORKSPACE/python/strings_udf/strings-udf-coverage.xml" --cov-report term tests || STRINGS_UDF_PYTEST_RETCODE=$?

if [ ${STRINGS_UDF_PYTEST_RETCODE} -eq 5 ]; then
echo "No strings UDF tests were run, but this script will continue to execute."
elif [ ${STRINGS_UDF_PYTEST_RETCODE} -ne 0 ]; then
exit ${STRINGS_UDF_PYTEST_RETCODE}
else
cd "$WORKSPACE/python/cudf/cudf"
gpuci_logger "Python py.test retest cuDF UDFs"
py.test tests/test_udf_masked_ops.py -n 8 --cache-clear
fi

# Run benchmarks with both cudf and pandas to ensure compatibility is maintained.
# Benchmarks are run in DEBUG_ONLY mode, meaning that only small data sizes are used.
Expand Down
67 changes: 36 additions & 31 deletions python/cudf/cudf/core/udf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
from numba import types
from numba.cuda.cudaimpl import lower as cuda_lower
import numpy as np
from numba import cuda, types
from numba.cuda.cudaimpl import (
lower as cuda_lower,
registry as cuda_lowering_registry,
)

from cudf.core.dtypes import dtype
from cudf.core.udf import api, row_function, utils
Expand All @@ -11,46 +15,47 @@
_units = ["ns", "ms", "us", "s"]
_datetime_cases = {types.NPDatetime(u) for u in _units}
_timedelta_cases = {types.NPTimedelta(u) for u in _units}


_supported_masked_types = (
types.integer_domain
| types.real_domain
| _datetime_cases
| _timedelta_cases
| {types.boolean}
)

_STRING_UDFS_ENABLED = False
try:
import strings_udf
from strings_udf import ptxpath
from strings_udf._lib.cudf_jit_udf import to_string_view_array
from strings_udf._typing import str_view_arg_handler, string_view

from . import strings_typing # isort: skip
from . import strings_lowering # isort: skip

cuda_lower(api.Masked, strings_typing.string_view, types.boolean)(
masked_lowering.masked_constructor
)

# add an overload of pack_return(string_view)
cuda_lower(api.pack_return, strings_typing.string_view)(
masked_lowering.pack_return_scalar_impl
)

_supported_masked_types |= {strings_typing.string_view}
utils.launch_arg_getters[dtype("O")] = to_string_view_array
utils.masked_array_types[dtype("O")] = string_view
utils.JIT_SUPPORTED_TYPES |= STRING_TYPES
utils.ptx_files.append(ptxpath)
utils.arg_handlers.append(str_view_arg_handler)
row_function.itemsizes[dtype("O")] = string_view.size_bytes

_STRING_UDFS_ENABLED = True

if strings_udf.ENABLED:
from . import strings_typing # isort: skip
from . import strings_lowering # isort: skip
from strings_udf import ptxpath
from strings_udf._lib.cudf_jit_udf import to_string_view_array
from strings_udf._typing import str_view_arg_handler, string_view

# add an overload of MaskedType.__init__(string_view, bool)
cuda_lower(api.Masked, strings_typing.string_view, types.boolean)(
masked_lowering.masked_constructor
)

# add an overload of pack_return(string_view)
cuda_lower(api.pack_return, strings_typing.string_view)(
masked_lowering.pack_return_scalar_impl
)

_supported_masked_types |= {strings_typing.string_view}
utils.launch_arg_getters[dtype("O")] = to_string_view_array
utils.masked_array_types[dtype("O")] = string_view
utils.JIT_SUPPORTED_TYPES |= STRING_TYPES
utils.ptx_files.append(ptxpath)
utils.arg_handlers.append(str_view_arg_handler)
row_function.itemsizes[dtype("O")] = string_view.size_bytes

_STRING_UDFS_ENABLED = True
else:
del strings_udf

except ImportError as e:
# allow cuDF to work without strings_udf
pass

masked_typing.register_masked_constructor(_supported_masked_types)
3 changes: 3 additions & 0 deletions python/strings_udf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ set(strings_udf_version 22.12.00)

include(../../fetch_rapids.cmake)

include(rapids-cuda)
rapids_cuda_init_architectures(strings-udf-python)

project(
strings-udf-python
VERSION ${strings_udf_version}
Expand Down
6 changes: 3 additions & 3 deletions python/strings_udf/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ include(rapids-cpm)
include(rapids-cuda)
include(rapids-find)

rapids_cpm_init()

rapids_cuda_init_architectures(STRINGS_UDF)
rapids_cuda_init_architectures(strings-udf-cpp)

# Create a project so that we can enable CUDA architectures in this file.
project(
Expand All @@ -30,6 +28,8 @@ project(
LANGUAGES CUDA
)

rapids_cpm_init()

rapids_find_package(
CUDAToolkit REQUIRED
BUILD_EXPORT_SET strings-udf-exports
Expand Down
72 changes: 54 additions & 18 deletions python/strings_udf/strings_udf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,62 @@

__version__ = _version.get_versions()["version"]

ENABLED = False


def compiler_from_ptx_file(path):
"""Parse a PTX file header and extract the CUDA version used to compile it.
Here is an example PTX header that this function should parse:
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-30672275
// Cuda compilation tools, release 11.5, V11.5.119
// Based on NVVM 7
"""
file = open(path).read()
major, minor = (
re.search(r"Cuda compilation tools, release ([0-9\.]+)", file)
.group(1)
.split(".")
)
return int(major), int(minor)


# adapted from PTXCompiler
cp = subprocess.run([sys.executable, "-c", CMD], capture_output=True)

# must have a driver to proceed
if cp.returncode == 0:
# must have a driver to proceed
versions = [int(s) for s in cp.stdout.strip().split()]
driver_version = tuple(versions[:2])
runtime_version = tuple(versions[2:])

# Load the highest compute capability file available that is less than
# the current device's.
files = glob.glob(os.path.join(os.path.dirname(__file__), "shim_*.ptx"))
dev = cuda.get_current_device()
cc = "".join(str(x) for x in dev.compute_capability)
files = glob.glob(os.path.join(os.path.dirname(__file__), "shim_*.ptx"))
if len(files) == 0:
raise RuntimeError(
"This strings_udf installation is missing the necessary PTX "
"files. Please file an issue reporting this error and how you "
"installed cudf and strings_udf."
# CUDA enhanced compatibility not yet enabled
if driver_version >= runtime_version:
# Load the highest compute capability file available that is less than
# the current device's.
files = glob.glob(
os.path.join(os.path.dirname(__file__), "shim_*.ptx")
)
sms = [os.path.basename(f).rstrip(".ptx").lstrip("shim_") for f in files]
selected_sm = max(sm for sm in sms if sm < cc)
ptxpath = os.path.join(
os.path.dirname(__file__), f"shim_{selected_sm}.ptx"
)
dev = cuda.get_current_device()
cc = "".join(str(x) for x in dev.compute_capability)
files = glob.glob(
os.path.join(os.path.dirname(__file__), "shim_*.ptx")
)
if len(files) == 0:
raise RuntimeError(
"This strings_udf installation is missing the necessary PTX "
"files. Please file an issue reporting this error and how you "
"installed cudf and strings_udf."
)
sms = [
os.path.basename(f).rstrip(".ptx").lstrip("shim_") for f in files
]
selected_sm = max(sm for sm in sms if sm < cc)
ptxpath = os.path.join(
os.path.dirname(__file__), f"shim_{selected_sm}.ptx"
)

if driver_version >= compiler_from_ptx_file(ptxpath):
ENABLED = True
else:
del ptxpath
3 changes: 3 additions & 0 deletions python/strings_udf/strings_udf/tests/test_string_udfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
from strings_udf._lib.cudf_jit_udf import to_string_view_array
from strings_udf._typing import str_view_arg_handler, string_view

if not strings_udf.ENABLED:
pytest.skip("Strings UDF not enabled.", allow_module_level=True)


def get_kernel(func, dtype):
"""
Expand Down

0 comments on commit 8df6dbf

Please sign in to comment.