diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index d02825b73d1..dd4482375b9 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -133,5 +133,6 @@ jobs:
with:
build_type: pull-request
package-name: dask_cudf
- test-before: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf-dep && python -m pip install --no-deps ./local-cudf-dep/cudf*.whl"
+ # Install the cudf we just built, and also test against latest dask/distributed/dask-cuda.
+ test-before: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf-dep && python -m pip install --no-deps ./local-cudf-dep/cudf*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04"
test-unittest: "python -m pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index c808e1475e6..a4bd14439b0 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -97,4 +97,6 @@ jobs:
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
package-name: dask_cudf
+ # Test against latest dask/distributed/dask-cuda.
+ test-before: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04"
test-unittest: "python -m pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1eb2c508db9..8b46eb25950 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -161,7 +161,7 @@ repos:
^CHANGELOG.md$
)
- repo: https://github.com/rapidsai/dependency-file-generator
- rev: v1.4.0
+ rev: v1.5.1
hooks:
- id: rapids-dependency-file-generator
args: ["--clean"]
diff --git a/build.sh b/build.sh
index bee66d819b4..7cbd0fceb5a 100755
--- a/build.sh
+++ b/build.sh
@@ -300,8 +300,7 @@ if buildAll || hasArg libcudf; then
# Record build times
if [[ "$BUILD_REPORT_METRICS" == "ON" && -f "${LIB_BUILD_DIR}/.ninja_log" ]]; then
echo "Formatting build metrics"
- python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt xml > ${LIB_BUILD_DIR}/ninja_log.xml
- MSG="
"
+ MSG=""
# get some sccache stats after the compile
if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v sccache)" ]]; then
COMPILE_REQUESTS=$(sccache -s | grep "Compile requests \+ [0-9]\+$" | awk '{ print $NF }')
@@ -318,7 +317,9 @@ if buildAll || hasArg libcudf; then
BMR_DIR=${RAPIDS_ARTIFACTS_DIR:-"${LIB_BUILD_DIR}"}
echo "Metrics output dir: [$BMR_DIR]"
mkdir -p ${BMR_DIR}
- python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${BMR_DIR}/ninja_log.html
+ MSG_OUTFILE="$(mktemp)"
+ echo "$MSG" > "${MSG_OUTFILE}"
+ python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "${MSG_OUTFILE}" > ${BMR_DIR}/ninja_log.html
cp ${LIB_BUILD_DIR}/.ninja_log ${BMR_DIR}/ninja.log
fi
diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index b68c2bdbef6..bc27e7d76b0 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -14,29 +14,3 @@ rapids-logger "Begin cpp build"
rapids-mamba-retry mambabuild conda/recipes/libcudf
rapids-upload-conda-to-s3 cpp
-
-echo "++++++++++++++++++++++++++++++++++++++++++++"
-
-if [[ -d $RAPIDS_ARTIFACTS_DIR ]]; then
- ls -l ${RAPIDS_ARTIFACTS_DIR}
-fi
-
-echo "++++++++++++++++++++++++++++++++++++++++++++"
-
-FILE=${RAPIDS_ARTIFACTS_DIR}/ninja.log
-if [[ -f $FILE ]]; then
- echo -e "\x1B[33;1m\x1B[48;5;240m Ninja log for this build available at the following link \x1B[0m"
- UPLOAD_NAME=cpp_cuda${RAPIDS_CUDA_VERSION%%.*}_$(arch).ninja.log
- rapids-upload-to-s3 "${UPLOAD_NAME}" "${FILE}"
-fi
-
-echo "++++++++++++++++++++++++++++++++++++++++++++"
-
-FILE=${RAPIDS_ARTIFACTS_DIR}/ninja_log.html
-if [[ -f $FILE ]]; then
- echo -e "\x1B[33;1m\x1B[48;5;240m Build Metrics Report for this build available at the following link \x1B[0m"
- UPLOAD_NAME=cpp_cuda${RAPIDS_CUDA_VERSION%%.*}_$(arch).BuildMetricsReport.html
- rapids-upload-to-s3 "${UPLOAD_NAME}" "${FILE}"
-fi
-
-echo "++++++++++++++++++++++++++++++++++++++++++++"
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 6daedb59733..4955fe08982 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -33,16 +33,25 @@ aws s3 cp s3://rapidsai-docs/librmm/${VERSION_NUMBER}/html/rmm.tag . || echo "Fa
doxygen Doxyfile
popd
-rapids-logger "Build Sphinx docs"
+rapids-logger "Build cuDF Sphinx docs"
pushd docs/cudf
sphinx-build -b dirhtml source _html
sphinx-build -b text source _text
popd
+rapids-logger "Build dask-cuDF Sphinx docs"
+pushd docs/dask_cudf
+sphinx-build -b dirhtml source _html
+sphinx-build -b text source _text
+popd
+
+
if [[ ${RAPIDS_BUILD_TYPE} == "branch" ]]; then
rapids-logger "Upload Docs to S3"
aws s3 sync --no-progress --delete cpp/doxygen/html "s3://rapidsai-docs/libcudf/${VERSION_NUMBER}/html"
aws s3 sync --no-progress --delete docs/cudf/_html "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/html"
aws s3 sync --no-progress --delete docs/cudf/_text "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/txt"
+ aws s3 sync --no-progress --delete docs/dask_cudf/_html "s3://rapidsai-docs/dask-cudf/${VERSION_NUMBER}/html"
+ aws s3 sync --no-progress --delete docs/dask_cudf/_text "s3://rapidsai-docs/dask-cudf/${VERSION_NUMBER}/txt"
fi
diff --git a/ci/release/apply_wheel_modifications.sh b/ci/release/apply_wheel_modifications.sh
index 9d9758f1f15..0c55c4b9141 100755
--- a/ci/release/apply_wheel_modifications.sh
+++ b/ci/release/apply_wheel_modifications.sh
@@ -6,12 +6,6 @@
VERSION=${1}
CUDA_SUFFIX=${2}
-# __init__.py versions
-sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/cudf/cudf/__init__.py
-sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/dask_cudf/dask_cudf/__init__.py
-sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/cudf_kafka/cudf_kafka/__init__.py
-sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/custreamz/custreamz/__init__.py
-
# pyproject.toml versions
sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/cudf/pyproject.toml
sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/dask_cudf/pyproject.toml
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index e5c9ba0569f..dc5ea6015f9 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -24,6 +24,11 @@ NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}')
NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}
NEXT_UCX_PY_VERSION="$(curl -sL https://version.gpuci.io/rapids/${NEXT_SHORT_TAG}).*"
+# Need to distutils-normalize the versions for some use cases
+CURRENT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${CURRENT_SHORT_TAG}'))")
+NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
+echo "current is ${CURRENT_SHORT_TAG_PEP440}, next is ${NEXT_SHORT_TAG_PEP440}"
+
echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG"
# Inplace sed replace; workaround for Linux and Mac
@@ -70,9 +75,10 @@ sed_runner 's/release = .*/release = '"'${NEXT_FULL_TAG}'"'/g' docs/cudf/source/
# bump rmm & dask-cuda
for FILE in conda/environments/*.yaml dependencies.yaml; do
- sed_runner "s/dask-cuda=${CURRENT_SHORT_TAG}/dask-cuda=${NEXT_SHORT_TAG}/g" ${FILE};
- sed_runner "s/rmm=${CURRENT_SHORT_TAG}/rmm=${NEXT_SHORT_TAG}/g" ${FILE};
- sed_runner "s/rmm-cu11=${CURRENT_SHORT_TAG}/rmm-cu11=${NEXT_SHORT_TAG}/g" ${FILE};
+ sed_runner "s/dask-cuda==${CURRENT_SHORT_TAG_PEP440}/dask-cuda==${NEXT_SHORT_TAG_PEP440}/g" ${FILE};
+ sed_runner "s/rmm==${CURRENT_SHORT_TAG_PEP440}/rmm==${NEXT_SHORT_TAG_PEP440}/g" ${FILE};
+ sed_runner "s/cudf==${CURRENT_SHORT_TAG_PEP440}/cudf==${NEXT_SHORT_TAG_PEP440}/g" ${FILE};
+ sed_runner "s/cudf_kafka==${CURRENT_SHORT_TAG_PEP440}/cudf_kafka==${NEXT_SHORT_TAG_PEP440}/g" ${FILE};
done
# Doxyfile update
@@ -86,13 +92,11 @@ sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md
sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/basic/CMakeLists.txt
sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/strings/CMakeLists.txt
-# Need to distutils-normalize the original version
-NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
-
# Dependency versions in pyproject.toml
sed_runner "s/rmm==.*\",/rmm==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/cudf/pyproject.toml
sed_runner "s/cudf==.*\",/cudf==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/dask_cudf/pyproject.toml
for FILE in .github/workflows/*.yaml; do
sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
+ sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" ${FILE};
done
diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
index bd7a82afbea..846b90c78e5 100755
--- a/ci/test_cpp.sh
+++ b/ci/test_cpp.sh
@@ -8,35 +8,34 @@ trap "EXITCODE=1" ERR
set +e
# Get library for finding incorrect default stream usage.
-STREAM_IDENTIFY_LIB="${CONDA_PREFIX}/lib/libcudf_identify_stream_usage.so"
+STREAM_IDENTIFY_LIB_MODE_CUDF="${CONDA_PREFIX}/lib/libcudf_identify_stream_usage_mode_cudf.so"
+STREAM_IDENTIFY_LIB_MODE_TESTING="${CONDA_PREFIX}/lib/libcudf_identify_stream_usage_mode_testing.so"
-echo "STREAM_IDENTIFY_LIB=${STREAM_IDENTIFY_LIB}"
+echo "STREAM_IDENTIFY_LIB=${STREAM_IDENTIFY_LIB_MODE_CUDF}"
# Run libcudf and libcudf_kafka gtests from libcudf-tests package
rapids-logger "Run gtests"
-# TODO: exit code handling is too verbose. Find a cleaner solution.
-
-for gt in "$CONDA_PREFIX"/bin/gtests/{libcudf,libcudf_kafka}/* ; do
- test_name=$(basename ${gt})
- echo "Running gtest $test_name"
-
- # TODO: This strategy for using the stream lib will need to change when we
- # switch to invoking ctest. For one, we will want to set the test
- # properties to use the lib (which means that the decision will be made at
- # CMake-configure time instead of runtime). We may also need to leverage
- # something like gtest_discover_tests to be able to filter on the
- # underlying test names.
- if [[ ${test_name} == "SPAN_TEST" ]]; then
- # This one test is specifically designed to test using a thrust device
- # vector, so we expect and allow it to include default stream usage.
- gtest_filter="SpanTest.CanConstructFromDeviceContainers"
- GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="-${gtest_filter}" && \
- ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="${gtest_filter}"
- else
- GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR}
- fi
-done
+cd $CONDA_PREFIX/bin/gtests/libcudf/
+export GTEST_CUDF_STREAM_MODE="new_cudf_default"
+export GTEST_OUTPUT=xml:${RAPIDS_TESTS_DIR}/
+export LD_PRELOAD=${STREAM_IDENTIFY_LIB_MODE_CUDF}
+
+ctest -E SPAN_TEST -j20 --output-on-failure
+
+# This one test is specifically designed to test using a thrust device vector,
+# so we expect and allow it to include default stream usage.
+_allowlist_filter="SpanTest.CanConstructFromDeviceContainers"
+GTEST_FILTER="-${_allowlist_filter}" ctest -R SPAN_TEST -VV
+LD_PRELOAD= GTEST_CUDF_STREAM_MODE=default GTEST_FILTER="${_allowlist_filter}" ctest -R SPAN_TEST -VV
+
+SUITEERROR=$?
+
+if (( ${SUITEERROR} == 0 )); then
+ cd $CONDA_PREFIX/bin/gtests/libcudf_kafka/
+ ctest -j20 --output-on-failure
+ SUITEERROR=$?
+fi
rapids-logger "Test script exiting with value: $EXITCODE"
exit ${EXITCODE}
diff --git a/ci/test_cpp_memcheck.sh b/ci/test_cpp_memcheck.sh
index db9ce143d51..0e85268cb72 100755
--- a/ci/test_cpp_memcheck.sh
+++ b/ci/test_cpp_memcheck.sh
@@ -11,7 +11,7 @@ set +e
rapids-logger "Memcheck gtests with rmm_mode=cuda"
export GTEST_CUDF_RMM_MODE=cuda
COMPUTE_SANITIZER_CMD="compute-sanitizer --tool memcheck"
-for gt in "$CONDA_PREFIX"/bin/gtests/libcudf/* ; do
+for gt in "$CONDA_PREFIX"/bin/gtests/libcudf/*_TEST ; do
test_name=$(basename ${gt})
if [[ "$test_name" == "ERROR_TEST" ]] || [[ "$test_name" == "STREAM_IDENTIFICATION_TEST" ]]; then
continue
diff --git a/ci/test_java.sh b/ci/test_java.sh
index f905aaa1178..e4df62501cc 100755
--- a/ci/test_java.sh
+++ b/ci/test_java.sh
@@ -38,7 +38,7 @@ set +e
rapids-logger "Run Java tests"
pushd java
-mvn test -B -DCUDF_JNI_ARROW_STATIC=OFF -DCUDF_JNI_ENABLE_PROFILING=OFF
+mvn test -B -DCUDF_JNI_ENABLE_PROFILING=OFF
popd
rapids-logger "Test script exiting with value: $EXITCODE"
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 66d375910d4..890cb199419 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -21,7 +21,7 @@ dependencies:
- cupy>=9.5.0,<12.0.0a0
- cxx-compiler
- cython>=0.29,<0.30
-- dask-cuda=23.04.*
+- dask-cuda==23.4.*
- dask>=2023.1.1
- distributed>=2023.1.1
- dlpack>=0.5,<0.6.0a0
@@ -30,18 +30,21 @@ dependencies:
- fmt>=9.1.0,<10
- fsspec>=0.6.0
- gcc_linux-64=11.*
+- gmock==1.10.0.*
+- gtest==1.10.0.*
- hypothesis
- ipython
-- libarrow=10
+- libarrow==10.0.1.*
- librdkafka=1.7.0
-- librmm=23.04.*
+- librmm==23.4.*
- mimesis>=4.1.0
- moto>=4.0.8
+- msgpack-python
- myst-nb
- nbsphinx
- ninja
- notebook
-- numba>=0.56.2
+- numba>=0.56.4,<0.57
- numpy>=1.21
- numpydoc
- nvcc_linux-64=11.8
@@ -53,7 +56,7 @@ dependencies:
- pre-commit
- protobuf>=4.21.6,<4.22
- ptxcompiler
-- pyarrow=10
+- pyarrow==10.0.1.*
- pydata-sphinx-theme
- pyorc
- pytest
@@ -61,11 +64,11 @@ dependencies:
- pytest-cases
- pytest-cov
- pytest-xdist
-- python-confluent-kafka=1.7.0
+- python-confluent-kafka==1.7.0
- python-snappy>=0.6.0
- python>=3.8,<3.11
- pytorch<1.12.0
-- rmm=23.04.*
+- rmm==23.4.*
- s3fs>=2022.3.0
- scikit-build>=0.13.1
- scipy
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 6b23c8953d3..bbd9961320a 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -52,7 +52,7 @@ requirements:
- cython >=0.29,<0.30
- scikit-build >=0.13.1
- setuptools
- - numba >=0.56.2
+ - numba >=0.56.4,<0.57
- dlpack >=0.5,<0.6.0a0
- pyarrow =10
- libcudf ={{ version }}
@@ -64,7 +64,7 @@ requirements:
- typing_extensions
- pandas >=1.3,<1.6.0dev0
- cupy >=9.5.0,<12.0.0a0
- - numba >=0.56.2
+ - numba >=0.56.4,<0.57
- numpy >=1.21
- {{ pin_compatible('pyarrow', max_pin='x.x.x') }}
- libcudf {{ version }}
diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 770a234b56e..469c25fb673 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -51,6 +51,8 @@ requirements:
- librdkafka {{ librdkafka_version }}
- fmt {{ fmt_version }}
- spdlog {{ spdlog_version }}
+ - gtest {{ gtest_version }}
+ - gmock {{ gtest_version }}
outputs:
- name: libcudf
@@ -71,10 +73,14 @@ outputs:
- librmm ={{ minor_version }}
- libarrow {{ libarrow_version }}
- dlpack {{ dlpack_version }}
+ - gtest {{ gtest_version }}
+ - gmock {{ gtest_version }}
test:
commands:
- test -f $PREFIX/lib/libcudf.so
- test -f $PREFIX/lib/libcudftestutil.a
+ - test -f $PREFIX/lib/libcudf_identify_stream_usage_mode_cudf.so
+ - test -f $PREFIX/lib/libcudf_identify_stream_usage_mode_testing.so
- test -f $PREFIX/include/cudf/aggregation.hpp
- test -f $PREFIX/include/cudf/ast/detail/expression_parser.hpp
- test -f $PREFIX/include/cudf/ast/detail/operators.hpp
@@ -86,6 +92,7 @@ outputs:
- test -f $PREFIX/include/cudf/concatenate.hpp
- test -f $PREFIX/include/cudf/copying.hpp
- test -f $PREFIX/include/cudf/datetime.hpp
+ - test -f $PREFIX/include/cudf/timezone.hpp
- test -f $PREFIX/include/cudf/detail/aggregation/aggregation.hpp
- test -f $PREFIX/include/cudf/detail/aggregation/result_cache.hpp
- test -f $PREFIX/include/cudf/detail/binaryop.hpp
@@ -107,7 +114,6 @@ outputs:
- test -f $PREFIX/include/cudf/detail/nvtx/nvtx3.hpp
- test -f $PREFIX/include/cudf/detail/nvtx/ranges.hpp
- test -f $PREFIX/include/cudf/detail/quantiles.hpp
- - test -f $PREFIX/include/cudf/detail/reduction_functions.hpp
- test -f $PREFIX/include/cudf/detail/repeat.hpp
- test -f $PREFIX/include/cudf/detail/replace.hpp
- test -f $PREFIX/include/cudf/detail/reshape.hpp
@@ -116,12 +122,13 @@ outputs:
- test -f $PREFIX/include/cudf/detail/scan.hpp
- test -f $PREFIX/include/cudf/detail/scatter.hpp
- test -f $PREFIX/include/cudf/detail/search.hpp
- - test -f $PREFIX/include/cudf/detail/segmented_reduction_functions.hpp
- test -f $PREFIX/include/cudf/detail/sequence.hpp
- test -f $PREFIX/include/cudf/detail/sorting.hpp
- test -f $PREFIX/include/cudf/detail/stream_compaction.hpp
- test -f $PREFIX/include/cudf/detail/structs/utilities.hpp
- test -f $PREFIX/include/cudf/detail/tdigest/tdigest.hpp
+ - test -f $PREFIX/include/cudf/detail/timezone.cuh
+ - test -f $PREFIX/include/cudf/detail/timezone.hpp
- test -f $PREFIX/include/cudf/detail/transform.hpp
- test -f $PREFIX/include/cudf/detail/transpose.hpp
- test -f $PREFIX/include/cudf/detail/unary.hpp
@@ -209,6 +216,8 @@ outputs:
- test -f $PREFIX/include/cudf/partitioning.hpp
- test -f $PREFIX/include/cudf/quantiles.hpp
- test -f $PREFIX/include/cudf/reduction.hpp
+ - test -f $PREFIX/include/cudf/reduction/detail/reduction_functions.hpp
+ - test -f $PREFIX/include/cudf/reduction/detail/segmented_reduction_functions.hpp
- test -f $PREFIX/include/cudf/replace.hpp
- test -f $PREFIX/include/cudf/reshape.hpp
- test -f $PREFIX/include/cudf/rolling.hpp
@@ -294,11 +303,12 @@ outputs:
- test -f $PREFIX/include/cudf_test/column_wrapper.hpp
- test -f $PREFIX/include/cudf_test/cudf_gtest.hpp
- test -f $PREFIX/include/cudf_test/cxxopts.hpp
+ - test -f $PREFIX/include/cudf_test/default_stream.hpp
- test -f $PREFIX/include/cudf_test/detail/column_utilities.hpp
- test -f $PREFIX/include/cudf_test/file_utilities.hpp
- test -f $PREFIX/include/cudf_test/io_metadata_utilities.hpp
- test -f $PREFIX/include/cudf_test/iterator_utilities.hpp
- - test -f $PREFIX/include/cudf_test/stream_checking_resource_adapter.hpp
+ - test -f $PREFIX/include/cudf_test/stream_checking_resource_adaptor.hpp
- test -f $PREFIX/include/cudf_test/table_utilities.hpp
- test -f $PREFIX/include/cudf_test/timestamp_utilities.cuh
- test -f $PREFIX/include/cudf_test/type_list_utilities.hpp
@@ -376,8 +386,6 @@ outputs:
- {{ pin_subpackage('libcudf', exact=True) }}
- {{ pin_subpackage('libcudf_kafka', exact=True) }}
- cudatoolkit {{ cuda_spec }}
- - gtest {{ gtest_version }}
- - gmock {{ gtest_version }}
about:
home: https://rapids.ai/
license: Apache-2.0
diff --git a/conda/recipes/libcudf/post-link.sh b/conda/recipes/libcudf/post-link.sh
index 64e0b1ad305..8ae2349f791 100644
--- a/conda/recipes/libcudf/post-link.sh
+++ b/conda/recipes/libcudf/post-link.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Only add the license notice to libcudf and not our examples / tests
if [[ "$PKG_NAME" == "libcudf" ]]; then
- cat ./nvlink.txt >> $PREFIX/.messages.txt
+ cat ./nvcomp.txt >> $PREFIX/.messages.txt
fi
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index a261049d3f0..127df03c54d 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -73,7 +73,7 @@ option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compila
option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON)
-if(${CUDA_STATIC_RUNTIME})
+if(CUDA_STATIC_RUNTIME OR NOT BUILD_SHARED_LIBS)
set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL OFF)
endif()
option(
@@ -369,7 +369,7 @@ add_library(
src/io/orc/stripe_data.cu
src/io/orc/stripe_enc.cu
src/io/orc/stripe_init.cu
- src/io/orc/timezone.cpp
+ src/datetime/timezone.cpp
src/io/orc/writer_impl.cu
src/io/parquet/compact_protocol_reader.cpp
src/io/parquet/compact_protocol_writer.cpp
@@ -464,6 +464,7 @@ add_library(
src/reductions/segmented/max.cu
src/reductions/segmented/mean.cu
src/reductions/segmented/min.cu
+ src/reductions/segmented/nunique.cu
src/reductions/segmented/product.cu
src/reductions/segmented/reductions.cpp
src/reductions/segmented/std.cu
@@ -547,6 +548,7 @@ add_library(
src/strings/regex/regex_program.cpp
src/strings/repeat_strings.cu
src/strings/replace/backref_re.cu
+ src/strings/replace/multi.cu
src/strings/replace/multi_re.cu
src/strings/replace/replace.cu
src/strings/replace/replace_re.cu
@@ -739,6 +741,35 @@ add_library(cudf::cudf ALIAS cudf)
# * build cudftestutil ----------------------------------------------------------------------------
if(CUDF_BUILD_TESTUTIL)
+ add_library(
+ cudftest_default_stream
+ # When compiled as a dynamic library allows us to use LD_PRELOAD injection of symbols. We
+ # currently leverage this for stream-related library validation and may make use of it for
+ # other similar features in the future.
+ tests/utilities/default_stream.cpp
+ )
+ set_target_properties(
+ cudftest_default_stream
+ PROPERTIES BUILD_RPATH "\$ORIGIN"
+ INSTALL_RPATH "\$ORIGIN"
+ # set target compile options
+ CXX_STANDARD 17
+ CXX_STANDARD_REQUIRED ON
+ CUDA_STANDARD 17
+ CUDA_STANDARD_REQUIRED ON
+ POSITION_INDEPENDENT_CODE ON
+ INTERFACE_POSITION_INDEPENDENT_CODE ON
+ )
+ target_link_libraries(
+ cudftest_default_stream
+ PUBLIC cudf
+ PRIVATE $
+ )
+
+ add_library(cudf::cudftest_default_stream ALIAS cudftest_default_stream)
+
+ # Needs to be static so that we support usage of static builds of gtest which doesn't compile with
+ # fPIC enabled and therefore can't be embedded into shared libraries.
add_library(
cudftestutil STATIC
tests/io/metadata_utilities.cpp
@@ -768,7 +799,7 @@ if(CUDF_BUILD_TESTUTIL)
target_link_libraries(
cudftestutil
- PUBLIC GTest::gmock GTest::gtest Threads::Threads cudf
+ PUBLIC GTest::gmock GTest::gtest Threads::Threads cudf cudftest_default_stream
PRIVATE $
)
@@ -790,18 +821,27 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL)
)
endif()
- # Libraries for stream-related testing.
- add_library(cudf_identify_stream_usage SHARED tests/utilities/identify_stream_usage.cpp)
+ # Libraries for stream-related testing. We build the library twice, one with STREAM_MODE_TESTING
+ # on and one with it set to off. Each test will then be configured to use the appropriate library
+ # depending via ctest and whether it has been updated to expose public stream APIs.
+ foreach(_mode cudf testing)
+ set(_tgt "cudf_identify_stream_usage_mode_${_mode}")
+ add_library(${_tgt} SHARED tests/utilities/identify_stream_usage.cpp)
+
+ set_target_properties(
+ ${_tgt}
+ PROPERTIES # set target compile options
+ CXX_STANDARD 17
+ CXX_STANDARD_REQUIRED ON
+ POSITION_INDEPENDENT_CODE ON
+ )
+ target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm)
+ add_library(cudf::${_tgt} ALIAS ${_tgt})
- set_target_properties(
- cudf_identify_stream_usage
- PROPERTIES # set target compile options
- CXX_STANDARD 17
- CXX_STANDARD_REQUIRED ON
- POSITION_INDEPENDENT_CODE ON
- )
- target_link_libraries(cudf_identify_stream_usage PUBLIC CUDA::cudart rmm::rmm)
- add_library(cudf::cudf_identify_stream_usage ALIAS cudf_identify_stream_usage)
+ if("${_mode}" STREQUAL "testing")
+ target_compile_definitions(${_tgt} PUBLIC STREAM_MODE_TESTING)
+ endif()
+ endforeach()
endif()
# ##################################################################################################
@@ -851,33 +891,23 @@ install(
EXPORT cudf-exports
)
-install(DIRECTORY ${CUDF_SOURCE_DIR}/include/cudf ${CUDF_SOURCE_DIR}/include/cudf_test
- ${CUDF_SOURCE_DIR}/include/nvtext DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
-)
-
-if(CUDF_BUILD_TESTUTIL)
+set(_components_export_string)
+if(TARGET cudftestutil)
install(
- TARGETS cudftestutil
+ TARGETS cudftest_default_stream cudftestutil
DESTINATION ${lib_dir}
EXPORT cudf-testing-exports
)
-
- install(
- EXPORT cudf-testing-exports
- FILE cudf-testing-targets.cmake
- NAMESPACE cudf::
- DESTINATION "${lib_dir}/cmake/cudf"
- )
-
- include("${rapids-cmake-dir}/export/write_dependencies.cmake")
- rapids_export_write_dependencies(
- INSTALL cudf-testing-exports
- "${PROJECT_BINARY_DIR}/rapids-cmake/cudf/export/cudf-testing-dependencies.cmake"
- )
+ set(_components_export_string COMPONENTS testing COMPONENTS_EXPORT_SET cudf-testing-exports)
endif()
+install(DIRECTORY ${CUDF_SOURCE_DIR}/include/cudf ${CUDF_SOURCE_DIR}/include/cudf_test
+ ${CUDF_SOURCE_DIR}/include/nvtext DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+)
+
if(CUDF_BUILD_STREAMS_TEST_UTIL)
- install(TARGETS cudf_identify_stream_usage DESTINATION ${lib_dir})
+ install(TARGETS cudf_identify_stream_usage_mode_cudf DESTINATION ${lib_dir})
+ install(TARGETS cudf_identify_stream_usage_mode_testing DESTINATION ${lib_dir})
endif()
set(doc_string
@@ -936,12 +966,6 @@ string(
[=[
if(testing IN_LIST cudf_FIND_COMPONENTS)
enable_language(CUDA)
- if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-dependencies.cmake")
- include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-dependencies.cmake")
- endif()
- if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake")
- include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake")
- endif()
endif()
]=]
)
@@ -949,8 +973,8 @@ string(APPEND install_code_string "${common_code_string}")
rapids_export(
INSTALL cudf
- EXPORT_SET cudf-exports
- GLOBAL_TARGETS cudf
+ EXPORT_SET cudf-exports ${_components_export_string}
+ GLOBAL_TARGETS cudf cudftestutil
NAMESPACE cudf::
DOCUMENTATION doc_string
FINAL_CODE_BLOCK install_code_string
@@ -973,23 +997,13 @@ string(APPEND build_code_string "${common_code_string}")
rapids_export(
BUILD cudf
- EXPORT_SET cudf-exports
- GLOBAL_TARGETS cudf
+ EXPORT_SET cudf-exports ${_components_export_string}
+ GLOBAL_TARGETS cudf cudftestutil
NAMESPACE cudf::
DOCUMENTATION doc_string
FINAL_CODE_BLOCK build_code_string
)
-if(CUDF_BUILD_TESTUTIL)
- export(
- EXPORT cudf-testing-exports
- FILE ${CUDF_BINARY_DIR}/cudf-testing-targets.cmake
- NAMESPACE cudf::
- )
- rapids_export_write_dependencies(
- BUILD cudf-testing-exports "${CUDF_BINARY_DIR}/cudf-testing-dependencies.cmake"
- )
-endif()
# ##################################################################################################
# * make documentation ----------------------------------------------------------------------------
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index cc0b642a337..b9c15e244de 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -150,6 +150,7 @@ ConfigureBench(APPLY_BOOLEAN_MASK_BENCH stream_compaction/apply_boolean_mask.cpp
# * stream_compaction benchmark -------------------------------------------------------------------
ConfigureNVBench(
STREAM_COMPACTION_NVBENCH stream_compaction/distinct.cpp stream_compaction/unique.cpp
+ stream_compaction/unique_count.cpp
)
# ##################################################################################################
@@ -191,7 +192,7 @@ ConfigureBench(
)
ConfigureNVBench(
REDUCTION_NVBENCH reduction/distinct_count.cpp reduction/rank.cpp reduction/scan_structs.cpp
- reduction/segment_reduce.cu
+ reduction/segmented_reduce.cpp
)
# ##################################################################################################
diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu
index edb19b7b0ca..762e9640d12 100644
--- a/cpp/benchmarks/common/generate_input.cu
+++ b/cpp/benchmarks/common/generate_input.cu
@@ -31,6 +31,7 @@
#include
#include
+#include
#include
#include
@@ -429,8 +430,12 @@ std::unique_ptr create_random_column(data_profile const& profile,
null_mask.begin());
}
- auto [result_bitmask, null_count] = cudf::detail::valid_if(
- null_mask.begin(), null_mask.end(), thrust::identity{}, cudf::get_default_stream());
+ auto [result_bitmask, null_count] =
+ cudf::detail::valid_if(null_mask.begin(),
+ null_mask.end(),
+ thrust::identity{},
+ cudf::get_default_stream(),
+ rmm::mr::get_current_device_resource());
return std::make_unique(
dtype,
@@ -508,8 +513,12 @@ std::unique_ptr create_random_utf8_string_column(data_profile cons
thrust::make_zip_iterator(offsets.begin(), offsets.begin() + 1),
num_rows,
string_generator{chars.data(), engine});
- auto [result_bitmask, null_count] = cudf::detail::valid_if(
- null_mask.begin(), null_mask.end() - 1, thrust::identity{}, cudf::get_default_stream());
+ auto [result_bitmask, null_count] =
+ cudf::detail::valid_if(null_mask.begin(),
+ null_mask.end() - 1,
+ thrust::identity{},
+ cudf::get_default_stream(),
+ rmm::mr::get_current_device_resource());
return cudf::make_strings_column(
num_rows,
std::move(offsets),
@@ -542,7 +551,8 @@ std::unique_ptr create_random_column(data_profi
sample_indices,
cudf::out_of_bounds_policy::DONT_CHECK,
cudf::detail::negative_index_policy::NOT_ALLOWED,
- cudf::get_default_stream());
+ cudf::get_default_stream(),
+ rmm::mr::get_current_device_resource());
return std::move(str_table->release()[0]);
}
@@ -626,8 +636,11 @@ std::unique_ptr create_random_column(data_profi
auto [null_mask, null_count] = [&]() {
if (profile.get_null_probability().has_value()) {
auto valids = valid_dist(engine, num_rows);
- return cudf::detail::valid_if(
- valids.begin(), valids.end(), thrust::identity{}, cudf::get_default_stream());
+ return cudf::detail::valid_if(valids.begin(),
+ valids.end(),
+ thrust::identity{},
+ cudf::get_default_stream(),
+ rmm::mr::get_current_device_resource());
}
return std::pair{};
}();
@@ -710,9 +723,12 @@ std::unique_ptr create_random_column(data_profile
auto offsets_column = std::make_unique(
cudf::data_type{cudf::type_id::INT32}, num_rows + 1, offsets.release());
- auto [null_mask, null_count] = cudf::detail::valid_if(
- valids.begin(), valids.end(), thrust::identity{}, cudf::get_default_stream());
- list_column = cudf::make_lists_column(
+ auto [null_mask, null_count] = cudf::detail::valid_if(valids.begin(),
+ valids.end(),
+ thrust::identity{},
+ cudf::get_default_stream(),
+ rmm::mr::get_current_device_resource());
+ list_column = cudf::make_lists_column(
num_rows,
std::move(offsets_column),
std::move(current_child_column),
@@ -838,7 +854,8 @@ std::pair create_random_null_mask(
return cudf::detail::valid_if(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(size),
bool_generator{seed, 1.0 - *null_probability},
- cudf::get_default_stream());
+ cudf::get_default_stream(),
+ rmm::mr::get_current_device_resource());
}
}
diff --git a/cpp/benchmarks/io/json/nested_json.cpp b/cpp/benchmarks/io/json/nested_json.cpp
index 416cf403671..d03f36ca81f 100644
--- a/cpp/benchmarks/io/json/nested_json.cpp
+++ b/cpp/benchmarks/io/json/nested_json.cpp
@@ -171,7 +171,8 @@ void BM_NESTED_JSON(nvbench::state& state)
cudf::io::json::detail::device_parse_nested_json(
cudf::device_span{input->data(), static_cast(input->size())},
default_options,
- cudf::get_default_stream());
+ cudf::get_default_stream(),
+ rmm::mr::get_current_device_resource());
});
auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
@@ -202,7 +203,7 @@ void BM_NESTED_JSON_DEPTH(nvbench::state& state)
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
// Allocate device-side temporary storage & run algorithm
cudf::io::json::detail::device_parse_nested_json(
- input, default_options, cudf::get_default_stream());
+ input, default_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
});
auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
diff --git a/cpp/benchmarks/iterator/iterator.cu b/cpp/benchmarks/iterator/iterator.cu
index 73060200d00..1b1cf9b7e9d 100644
--- a/cpp/benchmarks/iterator/iterator.cu
+++ b/cpp/benchmarks/iterator/iterator.cu
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -140,8 +140,8 @@ void BM_iterator(benchmark::State& state)
cudf::column_view hasnull_F = wrap_hasnull_F;
// Initialize dev_result to false
- auto dev_result =
- cudf::detail::make_zeroed_device_uvector_sync(1, cudf::get_default_stream());
+ auto dev_result = cudf::detail::make_zeroed_device_uvector_sync(
+ 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
for (auto _ : state) {
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
if (cub_or_thrust) {
@@ -210,7 +210,7 @@ void BM_pair_iterator(benchmark::State& state)
// Initialize dev_result to false
auto dev_result = cudf::detail::make_zeroed_device_uvector_sync>(
- 1, cudf::get_default_stream());
+ 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
for (auto _ : state) {
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
if (cub_or_thrust) {
diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp
index e37a4ca1193..70036a95377 100644
--- a/cpp/benchmarks/join/join_common.hpp
+++ b/cpp/benchmarks/join/join_common.hpp
@@ -104,8 +104,11 @@ void BM_join(state_type& state, Join JoinFunc)
// roughly 75% nulls
auto validity =
thrust::make_transform_iterator(thrust::make_counting_iterator(0), null75_generator{});
- return cudf::detail::valid_if(
- validity, validity + size, thrust::identity{}, cudf::get_default_stream())
+ return cudf::detail::valid_if(validity,
+ validity + size,
+ thrust::identity{},
+ cudf::get_default_stream(),
+ rmm::mr::get_current_device_resource())
.first;
};
diff --git a/cpp/benchmarks/reduction/segment_reduce.cu b/cpp/benchmarks/reduction/segmented_reduce.cpp
similarity index 58%
rename from cpp/benchmarks/reduction/segment_reduce.cu
rename to cpp/benchmarks/reduction/segmented_reduce.cpp
index 127b3598dae..590a014ad76 100644
--- a/cpp/benchmarks/reduction/segment_reduce.cu
+++ b/cpp/benchmarks/reduction/segmented_reduce.cpp
@@ -20,17 +20,15 @@
#include
#include
-#include
+#include
#include
+#include
#include
#include
#include
-#include
-
#include
-#include
bool constexpr is_boolean_output_agg(cudf::segmented_reduce_aggregation::Kind kind)
{
@@ -38,8 +36,15 @@ bool constexpr is_boolean_output_agg(cudf::segmented_reduce_aggregation::Kind ki
kind == cudf::segmented_reduce_aggregation::ANY;
}
+bool constexpr is_float_output_agg(cudf::segmented_reduce_aggregation::Kind kind)
+{
+ return kind == cudf::segmented_reduce_aggregation::MEAN ||
+ kind == cudf::segmented_reduce_aggregation::VARIANCE ||
+ kind == cudf::segmented_reduce_aggregation::STD;
+}
+
template
-std::unique_ptr make_simple_aggregation()
+std::unique_ptr make_reduce_aggregation()
{
switch (kind) {
case cudf::segmented_reduce_aggregation::SUM:
@@ -54,12 +59,22 @@ std::unique_ptr make_simple_aggregation()
return cudf::make_all_aggregation();
case cudf::segmented_reduce_aggregation::ANY:
return cudf::make_any_aggregation();
- default: CUDF_FAIL("Unsupported simple segmented aggregation");
+ case cudf::segmented_reduce_aggregation::SUM_OF_SQUARES:
+ return cudf::make_sum_of_squares_aggregation();
+ case cudf::segmented_reduce_aggregation::MEAN:
+ return cudf::make_mean_aggregation();
+ case cudf::segmented_reduce_aggregation::VARIANCE:
+ return cudf::make_variance_aggregation();
+ case cudf::segmented_reduce_aggregation::STD:
+ return cudf::make_std_aggregation();
+ case cudf::segmented_reduce_aggregation::NUNIQUE:
+ return cudf::make_nunique_aggregation();
+ default: CUDF_FAIL("Unsupported segmented reduce aggregation in this benchmark");
}
}
template
-std::pair, thrust::device_vector> make_test_data(
+std::pair, std::unique_ptr> make_test_data(
nvbench::state& state)
{
auto const column_size{cudf::size_type(state.get_int64("column_size"))};
@@ -72,28 +87,30 @@ std::pair, thrust::device_vector>
dtype, distribution_id::UNIFORM, 0, 100);
auto input = create_random_column(dtype, row_count{column_size}, profile);
- auto offset_it = cudf::detail::make_counting_transform_iterator(
- 0, [column_size, segment_length] __device__(auto i) {
- return column_size < i * segment_length ? column_size : i * segment_length;
- });
-
- thrust::device_vector d_offsets(offset_it, offset_it + num_segments + 1);
-
- return std::pair(std::move(input), d_offsets);
+ auto offsets = cudf::sequence(num_segments + 1,
+ cudf::numeric_scalar(0),
+ cudf::numeric_scalar(segment_length));
+ return std::pair(std::move(input), std::move(offsets));
}
template
-void BM_Simple_Segmented_Reduction(nvbench::state& state,
- nvbench::type_list>)
+void BM_Segmented_Reduction(nvbench::state& state,
+ nvbench::type_list>)
{
auto const column_size{cudf::size_type(state.get_int64("column_size"))};
auto const num_segments{cudf::size_type(state.get_int64("num_segments"))};
auto [input, offsets] = make_test_data(state);
- auto agg = make_simple_aggregation();
+ auto agg = make_reduce_aggregation();
- auto output_type = is_boolean_output_agg(kind) ? cudf::data_type{cudf::type_id::BOOL8}
- : cudf::data_type{cudf::type_to_id()};
+ auto const output_type = [] {
+ if (is_boolean_output_agg(kind)) { return cudf::data_type{cudf::type_id::BOOL8}; }
+ if (is_float_output_agg(kind)) { return cudf::data_type{cudf::type_id::FLOAT64}; }
+ if (kind == cudf::segmented_reduce_aggregation::NUNIQUE) {
+ return cudf::data_type{cudf::type_to_id()};
+ }
+ return cudf::data_type{cudf::type_to_id()};
+ }();
state.add_element_count(column_size);
state.add_global_memory_reads(column_size);
@@ -103,8 +120,10 @@ void BM_Simple_Segmented_Reduction(nvbench::state& state,
state.add_global_memory_writes(num_segments);
}
- auto const input_view = input->view();
- auto const offset_span = cudf::device_span{offsets};
+ auto const input_view = input->view();
+ auto const offsets_view = offsets->view();
+ auto const offset_span = cudf::device_span{
+ offsets_view.template data(), static_cast(offsets_view.size())};
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
state.exec(
@@ -115,13 +134,17 @@ void BM_Simple_Segmented_Reduction(nvbench::state& state,
using Types = nvbench::type_list;
// Skip benchmarking MAX/ANY since they are covered by MIN/ALL respectively.
+// Also VARIANCE includes STD calculation.
using AggKinds = nvbench::enum_type_list;
+ cudf::aggregation::ALL,
+ cudf::aggregation::MEAN,
+ cudf::aggregation::VARIANCE,
+ cudf::aggregation::NUNIQUE>;
-NVBENCH_BENCH_TYPES(BM_Simple_Segmented_Reduction, NVBENCH_TYPE_AXES(Types, AggKinds))
- .set_name("segmented_reduction_simple")
+NVBENCH_BENCH_TYPES(BM_Segmented_Reduction, NVBENCH_TYPE_AXES(Types, AggKinds))
+ .set_name("segmented_reduction")
.set_type_axes_names({"DataType", "AggregationKinds"})
.add_int64_axis("column_size", {100'000, 1'000'000, 10'000'000, 100'000'000})
.add_int64_axis("num_segments", {1'000, 10'000, 100'000});
diff --git a/cpp/benchmarks/stream_compaction/unique_count.cpp b/cpp/benchmarks/stream_compaction/unique_count.cpp
new file mode 100644
index 00000000000..f8319e0385c
--- /dev/null
+++ b/cpp/benchmarks/stream_compaction/unique_count.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+#include
+
+template
+void nvbench_unique_count(nvbench::state& state, nvbench::type_list)
+{
+ auto const num_rows = static_cast(state.get_int64("NumRows"));
+ auto const nulls = state.get_float64("NullProbability");
+
+ data_profile profile = data_profile_builder().cardinality(0).null_probability(nulls).distribution(
+ cudf::type_to_id(), distribution_id::UNIFORM, 0, num_rows / 100);
+
+ auto source_column = create_random_column(cudf::type_to_id(), row_count{num_rows}, profile);
+ auto sorted_table = cudf::sort(cudf::table_view({source_column->view()}));
+
+ auto input = sorted_table->view();
+
+ state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+ state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+ cudf::unique_count(input, cudf::null_equality::EQUAL);
+ });
+}
+
+using data_type = nvbench::type_list;
+
+NVBENCH_BENCH_TYPES(nvbench_unique_count, NVBENCH_TYPE_AXES(data_type))
+ .set_name("unique_count")
+ .set_type_axes_names({"Type"})
+ .add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000})
+ .add_float64_axis("NullProbability", {0.0, 0.1});
diff --git a/cpp/benchmarks/string/replace.cpp b/cpp/benchmarks/string/replace.cpp
index b25af14ec2a..cb570020f0e 100644
--- a/cpp/benchmarks/string/replace.cpp
+++ b/cpp/benchmarks/string/replace.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -69,7 +69,7 @@ static void generate_bench_args(benchmark::internal::Benchmark* b)
int const row_mult = 8;
int const min_rowlen = 1 << 5;
int const max_rowlen = 1 << 13;
- int const len_mult = 4;
+ int const len_mult = 2;
generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult);
}
diff --git a/cpp/cmake/config.json b/cpp/cmake/config.json
index f7d7b001856..a65afe9e58d 100644
--- a/cpp/cmake/config.json
+++ b/cpp/cmake/config.json
@@ -13,7 +13,11 @@
}
},
"ConfigureTest": {
- "flags": ["TEST_NAME", "TEST_SRC"]
+ "flags": ["TEST_NAME", "TEST_SRC"],
+ "kwargs": {
+ "GPUS": 1,
+ "PERCENT": 1
+ }
},
"ConfigureBench": {
"flags": ["BENCH_NAME", "BENCH_SRC"]
diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake
index 943b89238e0..a716995182d 100644
--- a/cpp/cmake/thirdparty/get_arrow.cmake
+++ b/cpp/cmake/thirdparty/get_arrow.cmake
@@ -379,6 +379,8 @@ endfunction()
if(NOT DEFINED CUDF_VERSION_Arrow)
set(CUDF_VERSION_Arrow
+ # This version must be kept in sync with the libarrow version pinned for builds in
+ # dependencies.yaml.
10.0.1
CACHE STRING "The version of Arrow to find (or build)"
)
diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
index 8cd4f8c6d27..91c3dccfdc6 100644
--- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
+++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
@@ -121,8 +121,8 @@ recommend watching Sean Parent's [C++ Seasoning talk](https://www.youtube.com/wa
and we try to follow his rules: "No raw loops. No raw pointers. No raw synchronization primitives."
* Prefer algorithms from STL and Thrust to raw loops.
- * Prefer libcudf and RMM [owning data structures and views](#libcudf-data-structures) to raw pointers
- and raw memory allocation.
+ * Prefer libcudf and RMM [owning data structures and views](#libcudf-data-structures) to raw
+ pointers and raw memory allocation.
* libcudf doesn't have a lot of CPU-thread concurrency, but there is some. And currently libcudf
does use raw synchronization primitives. So we should revisit Parent's third rule and improve
here.
@@ -146,8 +146,8 @@ The following guidelines apply to organizing `#include` lines.
* Separate groups by a blank line.
* Order the groups from "nearest" to "farthest". In other words, local includes, then includes
from other RAPIDS libraries, then includes from related libraries, like ``, then
- includes from dependencies installed with cuDF, and then standard headers (for example ``,
- ``).
+ includes from dependencies installed with cuDF, and then standard headers (for example
+ ``, ``).
* Use `<>` instead of `""` unless the header is in the same directory as the source file.
* Tools like `clangd` often auto-insert includes when they can, but they usually get the grouping
and brackets wrong.
@@ -269,6 +269,15 @@ An *immutable*, non-owning view of a table.
A *mutable*, non-owning view of a table.
+## cudf::size_type
+
+The `cudf::size_type` is the type used for the number of elements in a column, offsets to elements
+within a column, indices to address specific elements, segments for subsets of column elements, etc.
+It is equivalent to a signed, 32-bit integer type and therefore has a maximum value of 2147483647.
+Some APIs also accept negative index values and those functions support a minimum value of
+-2147483648. This fundamental type also influences output values not just for column size limits
+but for counting elements as well.
+
## Spans
libcudf provides `span` classes that mimic C++20 `std::span`, which is a lightweight
@@ -336,8 +345,8 @@ auto s1 = static_cast(s.get());
```
### Passing to device
-Each scalar type, except `list_scalar`, has a corresponding non-owning device view class which allows
-access to the value and its validity from the device. This can be obtained using the function
+Each scalar type, except `list_scalar`, has a corresponding non-owning device view class which
+allows access to the value and its validity from the device. This can be obtained using the function
`get_scalar_device_view(ScalarType s)`. Note that a device view is not provided for a base scalar
object, only for the derived typed scalar class objects.
@@ -348,68 +357,84 @@ data, a specialized device view for list columns can be constructed via
# libcudf Policies and Design Principles
-`libcudf` is designed to provide thread-safe, single-GPU accelerated algorithm primitives for solving a wide variety of problems that arise in data science.
-APIs are written to execute on the default GPU, which can be controlled by the caller through standard CUDA device APIs or environment variables like `CUDA_VISIBLE_DEVICES`.
-Our goal is to enable diverse use cases like Spark or Pandas to benefit from the performance of GPUs, and libcudf relies on these higher-level layers like Spark or Dask to orchestrate multi-GPU tasks.
+`libcudf` is designed to provide thread-safe, single-GPU accelerated algorithm primitives for
+solving a wide variety of problems that arise in data science. APIs are written to execute on the
+default GPU, which can be controlled by the caller through standard CUDA device APIs or environment
+variables like `CUDA_VISIBLE_DEVICES`. Our goal is to enable diverse use cases like Spark or Pandas
+to benefit from the performance of GPUs, and libcudf relies on these higher-level layers like Spark
+or Dask to orchestrate multi-GPU tasks.
-To best satisfy these use-cases, libcudf prioritizes performance and flexibility, which sometimes may come at the cost of convenience.
-While we welcome users to use libcudf directly, we design with the expectation that most users will be consuming libcudf through higher-level layers like Spark or cuDF Python that handle some of details that direct users of libcudf must handle on their own.
-We document these policies and the reasons behind them here.
+To best satisfy these use-cases, libcudf prioritizes performance and flexibility, which sometimes
+may come at the cost of convenience. While we welcome users to use libcudf directly, we design with
+the expectation that most users will be consuming libcudf through higher-level layers like Spark or
+cuDF Python that handle some of details that direct users of libcudf must handle on their own. We
+document these policies and the reasons behind them here.
## libcudf does not introspect data
libcudf APIs generally do not perform deep introspection and validation of input data.
There are numerous reasons for this:
1. It violates the single responsibility principle: validation is separate from execution.
-2. Since libcudf data structures store data on the GPU, any validation incurs _at minimum_ the overhead of a kernel launch, and may in general be prohibitively expensive.
+2. Since libcudf data structures store data on the GPU, any validation incurs _at minimum_ the
+ overhead of a kernel launch, and may in general be prohibitively expensive.
3. API promises around data introspection often significantly complicate implementation.
Users are therefore responsible for passing valid data into such APIs.
_Note that this policy does not mean that libcudf performs no validation whatsoever_.
libcudf APIs should still perform any validation that does not require introspection.
-To give some idea of what should or should not be validated, here are (non-exhaustive) lists of examples.
+To give some idea of what should or should not be validated, here are (non-exhaustive) lists of
+examples.
**Things that libcudf should validate**:
-- Input column/table sizes or dtypes
+- Input column/table sizes or data types
**Things that libcudf should not validate**:
- Integer overflow
-- Ensuring that outputs will not exceed the 2GB size limit for a given set of inputs
+- Ensuring that outputs will not exceed the [2GB size](#cudfsize_type) limit for a given set of
+ inputs
## libcudf expects nested types to have sanitized null masks
-Various libcudf APIs accepting columns of nested dtypes (such as `LIST` or `STRUCT`) may assume that these columns have been sanitized.
-In this context, sanitization refers to ensuring that the null elements in a column with a nested dtype are compatible with the elements of nested columns.
+Various libcudf APIs accepting columns of nested data types (such as `LIST` or `STRUCT`) may assume
+that these columns have been sanitized. In this context, sanitization refers to ensuring that the
+null elements in a column with a nested dtype are compatible with the elements of nested columns.
Specifically:
-- Null elements of list columns should also be empty. The starting offset of a null element should be equal to the ending offset.
+- Null elements of list columns should also be empty. The starting offset of a null element should
+ be equal to the ending offset.
- Null elements of struct columns should also be null elements in the underlying structs.
-- For compound columns, nulls should only be present at the level of the parent column. Child columns should not contain nulls.
+- For compound columns, nulls should only be present at the level of the parent column. Child
+ columns should not contain nulls.
- Slice operations on nested columns do not propagate offsets to child columns.
-libcudf APIs _should_ promise to never return "dirty" columns, i.e. columns containing unsanitized data.
-Therefore, the only problem is if users construct input columns that are not correctly sanitized and then pass those into libcudf APIs.
+libcudf APIs _should_ promise to never return "dirty" columns, i.e. columns containing unsanitized
+data. Therefore, the only problem is if users construct input columns that are not correctly
+sanitized and then pass those into libcudf APIs.
## Treat libcudf APIs as if they were asynchronous
libcudf APIs called on the host do not guarantee that the stream is synchronized before returning.
-Work in libcudf occurs on `cudf::get_default_stream().value`, which defaults to the CUDA default stream (stream 0).
-Note that the stream 0 behavior differs if [per-thread default stream is enabled](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html) via `CUDF_USE_PER_THREAD_DEFAULT_STREAM`.
-Any data provided to or returned by libcudf that uses a separate non-blocking stream requires synchronization with the default libcudf stream to ensure stream safety.
+Work in libcudf occurs on `cudf::get_default_stream().value`, which defaults to the CUDA default
+stream (stream 0). Note that the stream 0 behavior differs if [per-thread default stream is
+enabled](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html) via
+`CUDF_USE_PER_THREAD_DEFAULT_STREAM`. Any data provided to or returned by libcudf that uses a
+separate non-blocking stream requires synchronization with the default libcudf stream to ensure
+stream safety.
## libcudf generally does not make ordering guarantees
-Functions like merge or groupby in libcudf make no guarantees about the order of entries in the output.
-Promising deterministic ordering is not, in general, conducive to fast parallel algorithms.
+Functions like merge or groupby in libcudf make no guarantees about the order of entries in the
+output. Promising deterministic ordering is not, in general, conducive to fast parallel algorithms.
Calling code is responsible for performing sorts after the fact if sorted outputs are needed.
## libcudf does not promise specific exception messages
-libcudf documents the exceptions that will be thrown by an API for different kinds of invalid inputs.
-The types of those exceptions (e.g. `cudf::logic_error`) are part of the public API.
-However, the explanatory string returned by the `what` method of those exceptions is not part of the API and is subject to change.
-Calling code should not rely on the contents of libcudf error messages to determine the nature of the error.
-For information on the types of exceptions that libcudf throws under different circumstances, see the [section on error handling](#errors).
+libcudf documents the exceptions that will be thrown by an API for different kinds of invalid
+inputs. The types of those exceptions (e.g. `cudf::logic_error`) are part of the public API.
+However, the explanatory string returned by the `what` method of those exceptions is not part of the
+API and is subject to change. Calling code should not rely on the contents of libcudf error
+messages to determine the nature of the error. For information on the types of exceptions that
+libcudf throws under different circumstances, see the [section on error handling](#errors).
# libcudf API and Implementation
@@ -468,14 +493,6 @@ asynchrony if and when we add an asynchronous API to libcudf.
**Note:** `cudaDeviceSynchronize()` should *never* be used.
This limits the ability to do any multi-stream/multi-threaded work with libcudf APIs.
- ### NVTX Ranges
-
-In order to aid in performance optimization and debugging, all compute intensive libcudf functions
-should have a corresponding NVTX range. In libcudf, we have a convenience macro `CUDF_FUNC_RANGE()`
-that will automatically annotate the lifetime of the enclosing function and use the function's name
-as the name of the NVTX range. For more information about NVTX, see
-[here](https://github.com/NVIDIA/NVTX/tree/dev/c).
-
### Stream Creation
There may be times in implementing libcudf features where it would be advantageous to use streams
@@ -487,8 +504,8 @@ should avoid creating streams (even if it is slightly less efficient). It is a g
## Memory Allocation
-Device [memory resources](#rmmdevice_memory_resource) are used in libcudf to abstract and control how device
-memory is allocated.
+Device [memory resources](#rmmdevice_memory_resource) are used in libcudf to abstract and control
+how device memory is allocated.
### Output Memory
@@ -508,6 +525,12 @@ std::unique_ptr returns_output_memory(
void does_not_allocate_output_memory(...);
```
+This rule automatically applies to all detail APIs that allocates memory. Any detail API may be
+called by any public API, and therefore could be allocating memory that is returned to the user.
+To support such uses cases, all detail APIs allocating memory resources should accept an `mr`
+parameter. Callers are responsible for either passing through a provided `mr` or
+`rmm::mr::get_current_device_resource()` as needed.
+
### Temporary Memory
Not all memory allocated within a libcudf API is returned to the caller. Often algorithms must
@@ -528,7 +551,7 @@ rmm::device_buffer some_function(
### Memory Management
libcudf code generally eschews raw pointers and direct memory allocation. Use RMM classes built to
-use `device_memory_resource`(*)s for device memory allocation with automated lifetime management.
+use `device_memory_resource`s for device memory allocation with automated lifetime management.
#### rmm::device_buffer
Allocates a specified number of bytes of untyped, uninitialized device memory using a
@@ -610,6 +633,32 @@ rmm::mr::device_memory_resource * mr = new my_custom_resource{...};
rmm::device_uvector v2{100, s, mr};
```
+## Default Parameters
+
+While public libcudf APIs are free to include default function parameters, detail functions should
+not. Default memory resource parameters make it easy for developers to accidentally allocate memory
+using the incorrect resource. Avoiding default memory resources forces developers to consider each
+memory allocation carefully.
+
+While streams are not currently exposed in libcudf's API, we plan to do so eventually. As a result,
+the same reasons for memory resources also apply to streams. Public APIs default to using
+`cudf::get_default_stream()`. However, including the same default in detail APIs opens the door for
+developers to forget to pass in a user-provided stream if one is passed to a public API. Forcing
+every detail API call to explicitly pass a stream is intended to prevent such mistakes.
+
+The memory resources (and eventually, the stream) are the final parameters for essentially all
+public APIs. For API consistency, the same is true throughout libcudf's internals. Therefore, a
+consequence of not allowing default streams or MRs is that no parameters in detail APIs may have
+defaults.
+
+## NVTX Ranges
+
+In order to aid in performance optimization and debugging, all compute intensive libcudf functions
+should have a corresponding NVTX range. libcudf has a convenience macro `CUDF_FUNC_RANGE()` that
+automatically annotates the lifetime of the enclosing function and uses the function's name as
+the name of the NVTX range. For more information about NVTX, see
+[here](https://github.com/NVIDIA/NVTX/tree/dev/c).
+
## Input/Output Style
The preferred style for how inputs are passed in and outputs are returned is the following:
@@ -746,8 +795,8 @@ where compile time was a problem is in types used to store indices, which can be
The "Indexalator", or index-normalizing iterator (`include/cudf/detail/indexalator.cuh`), can be
used for index types (integers) without requiring a type-specific instance. It can be used for any
iterator interface for reading an array of integer values of type `int8`, `int16`, `int32`,
-`int64`, `uint8`, `uint16`, `uint32`, or `uint64`. Reading specific elements always return a
-`cudf::size_type` integer.
+`int64`, `uint8`, `uint16`, `uint32`, or `uint64`. Reading specific elements always returns a
+[`cudf::size_type`](#cudfsize_type) integer.
Use the `indexalator_factory` to create an appropriate input iterator from a column_view. Example
input iterator usage:
@@ -879,9 +928,9 @@ CUDF_FAIL("This code path should not be reached.");
### CUDA Error Checking
-Use the `CUDF_CUDA_TRY` macro to check for the successful completion of CUDA runtime API functions. This
-macro throws a `cudf::cuda_error` exception if the CUDA API return value is not `cudaSuccess`. The
-thrown exception includes a description of the CUDA error code in its `what()` message.
+Use the `CUDF_CUDA_TRY` macro to check for the successful completion of CUDA runtime API functions.
+This macro throws a `cudf::cuda_error` exception if the CUDA API return value is not `cudaSuccess`.
+The thrown exception includes a description of the CUDA error code in its `what()` message.
Example:
@@ -1104,8 +1153,8 @@ For list columns, the parent column's type is `LIST` and contains no data, but i
the number of lists in the column, and its null mask represents the validity of each list element.
The parent has two children.
-1. A non-nullable column of `INT32` elements that indicates the offset to the beginning of each list
- in a dense column of elements.
+1. A non-nullable column of [`size_type`](#cudfsize_type) elements that indicates the offset to the
+ beginning of each list in a dense column of elements.
2. A column containing the actual data and optional null mask for all elements of all the lists
packed together.
@@ -1152,7 +1201,7 @@ a non-nullable column of `INT8` data. The parent column's type is `STRING` and c
but its size represents the number of strings in the column, and its null mask represents the
validity of each string. To summarize, the strings column children are:
-1. A non-nullable column of `INT32` elements that indicates the offset to the beginning of each
+1. A non-nullable column of [`size_type`](#cudfsize_type) elements that indicates the offset to the beginning of each
string in a dense column of all characters.
2. A non-nullable column of `INT8` elements of all the characters across all the strings packed
together.
@@ -1264,9 +1313,9 @@ libcudf provides view types for nested column types as well as for the data elem
`cudf::strings_column_view` is a view of a strings column, like `cudf::column_view` is a view of
any `cudf::column`. `cudf::string_view` is a view of a single string, and therefore
`cudf::string_view` is the data type of a `cudf::column` of type `STRING` just like `int32_t` is the
-data type for a `cudf::column` of type `INT32`. As it's name implies, this is a read-only object
-instance that points to device memory inside the strings column. It's lifespan is the same (or less)
-as the column it views.
+data type for a `cudf::column` of type [`size_type`](#cudfsize_type). As its name implies, this is a
+read-only object instance that points to device memory inside the strings column. It's lifespan is
+the same (or less) as the column it views.
Use the `column_device_view::element` method to access an individual row element. Like any other
column, do not call `element()` on a row that is null.
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp
index e269d4d2e13..b688bf3d445 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.hpp
+++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp
@@ -535,7 +535,9 @@ class argmin_aggregation final : public rolling_aggregation, public groupby_aggr
/**
* @brief Derived class for specifying a nunique aggregation
*/
-class nunique_aggregation final : public groupby_aggregation, public reduce_aggregation {
+class nunique_aggregation final : public groupby_aggregation,
+ public reduce_aggregation,
+ public segmented_reduce_aggregation {
public:
nunique_aggregation(null_policy null_handling)
: aggregation{NUNIQUE}, _null_handling{null_handling}
diff --git a/cpp/include/cudf/detail/binaryop.hpp b/cpp/include/cudf/detail/binaryop.hpp
index ffd8be971ab..e5609568d10 100644
--- a/cpp/include/cudf/detail/binaryop.hpp
+++ b/cpp/include/cudf/detail/binaryop.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -30,13 +30,12 @@ namespace detail {
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr binary_operation(
- column_view const& lhs,
- column_view const& rhs,
- std::string const& ptx,
- data_type output_type,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr binary_operation(column_view const& lhs,
+ column_view const& rhs,
+ std::string const& ptx,
+ data_type output_type,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::binary_operation(scalar const&, column_view const&, binary_operator,
@@ -44,13 +43,12 @@ std::unique_ptr binary_operation(
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr binary_operation(
- scalar const& lhs,
- column_view const& rhs,
- binary_operator op,
- data_type output_type,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr binary_operation(scalar const& lhs,
+ column_view const& rhs,
+ binary_operator op,
+ data_type output_type,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::binary_operation(column_view const&, scalar const&, binary_operator,
@@ -58,13 +56,12 @@ std::unique_ptr binary_operation(
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr binary_operation(
- column_view const& lhs,
- scalar const& rhs,
- binary_operator op,
- data_type output_type,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr binary_operation(column_view const& lhs,
+ scalar const& rhs,
+ binary_operator op,
+ data_type output_type,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::binary_operation(column_view const&, column_view const&,
@@ -72,12 +69,11 @@ std::unique_ptr binary_operation(
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr binary_operation(
- column_view const& lhs,
- column_view const& rhs,
- binary_operator op,
- data_type output_type,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr binary_operation(column_view const& lhs,
+ column_view const& rhs,
+ binary_operator op,
+ data_type output_type,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace cudf
diff --git a/cpp/include/cudf/detail/calendrical_month_sequence.cuh b/cpp/include/cudf/detail/calendrical_month_sequence.cuh
index 9dba0ba8961..59fb6758973 100644
--- a/cpp/include/cudf/detail/calendrical_month_sequence.cuh
+++ b/cpp/include/cudf/detail/calendrical_month_sequence.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -38,7 +38,7 @@ struct calendrical_month_sequence_functor {
scalar const& input,
size_type months,
rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+ rmm::mr::device_memory_resource* mr)
{
// Return empty column if n = 0
if (n == 0) return cudf::make_empty_column(input.type());
diff --git a/cpp/include/cudf/detail/concatenate.hpp b/cpp/include/cudf/detail/concatenate.hpp
index 925029597a6..442814bc4fd 100644
--- a/cpp/include/cudf/detail/concatenate.hpp
+++ b/cpp/include/cudf/detail/concatenate.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -33,20 +33,18 @@ namespace detail {
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr concatenate(
- host_span columns_to_concat,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr concatenate(host_span columns_to_concat,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::concatenate(host_span,rmm::mr::device_memory_resource*)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr concatenate(
- host_span tables_to_concat,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr concatenate(host_span tables_to_concat,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace cudf
diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp
index 8c3f315284d..83395f8fa90 100644
--- a/cpp/include/cudf/detail/copy.hpp
+++ b/cpp/include/cudf/detail/copy.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -144,12 +144,11 @@ std::vector split(table_view const& input,
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr shift(
- column_view const& input,
- size_type offset,
- scalar const& fill_value,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr shift(column_view const& input,
+ size_type offset,
+ scalar const& fill_value,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @brief Performs segmented shifts for specified values.
@@ -184,24 +183,22 @@ std::unique_ptr shift(
*
* @note If `offset == 0`, a copy of @p segmented_values is returned.
*/
-std::unique_ptr segmented_shift(
- column_view const& segmented_values,
- device_span segment_offsets,
- size_type offset,
- scalar const& fill_value,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr segmented_shift(column_view const& segmented_values,
+ device_span segment_offsets,
+ size_type offset,
+ scalar const& fill_value,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::contiguous_split
*
* @param stream CUDA stream used for device memory operations and kernel launches.
**/
-std::vector contiguous_split(
- cudf::table_view const& input,
- std::vector const& splits,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::vector contiguous_split(cudf::table_view const& input,
+ std::vector const& splits,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::pack
@@ -210,7 +207,7 @@ std::vector contiguous_split(
**/
packed_columns pack(cudf::table_view const& input,
rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::allocate_like(column_view const&, size_type, mask_allocation_policy,
@@ -218,12 +215,11 @@ packed_columns pack(cudf::table_view const& input,
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr allocate_like(
- column_view const& input,
- size_type size,
- mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN,
- rmm::cuda_stream_view stream = cudf::get_default_stream(),
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr allocate_like(column_view const& input,
+ size_type size,
+ mask_allocation_policy mask_alloc,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::copy_if_else( column_view const&, column_view const&,
@@ -231,12 +227,11 @@ std::unique_ptr allocate_like(
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr copy_if_else(
- column_view const& lhs,
- column_view const& rhs,
- column_view const& boolean_mask,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr copy_if_else(column_view const& lhs,
+ column_view const& rhs,
+ column_view const& boolean_mask,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::copy_if_else( scalar const&, column_view const&,
@@ -244,12 +239,11 @@ std::unique_ptr copy_if_else(
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr copy_if_else(
- scalar const& lhs,
- column_view const& rhs,
- column_view const& boolean_mask,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr copy_if_else(scalar const& lhs,
+ column_view const& rhs,
+ column_view const& boolean_mask,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::copy_if_else( column_view const&, scalar const&,
@@ -257,12 +251,11 @@ std::unique_ptr copy_if_else(
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr copy_if_else(
- column_view const& lhs,
- scalar const& rhs,
- column_view const& boolean_mask,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr copy_if_else(column_view const& lhs,
+ scalar const& rhs,
+ column_view const& boolean_mask,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::copy_if_else( scalar const&, scalar const&,
@@ -270,36 +263,33 @@ std::unique_ptr copy_if_else(
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr copy_if_else(
- scalar const& lhs,
- scalar const& rhs,
- column_view const& boolean_mask,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr copy_if_else(scalar const& lhs,
+ scalar const& rhs,
+ column_view const& boolean_mask,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::sample
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr sample(
- table_view const& input,
- size_type const n,
- sample_with_replacement replacement = sample_with_replacement::FALSE,
- int64_t const seed = 0,
- rmm::cuda_stream_view stream = cudf::get_default_stream(),
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr sample(table_view const& input,
+ size_type const n,
+ sample_with_replacement replacement,
+ int64_t const seed,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::get_element
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr get_element(
- column_view const& input,
- size_type index,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr get_element(column_view const& input,
+ size_type index,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::has_nonempty_nulls
@@ -320,10 +310,9 @@ bool may_have_nonempty_nulls(column_view const& input, rmm::cuda_stream_view str
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr purge_nonempty_nulls(
- column_view const& input,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr purge_nonempty_nulls(column_view const& input,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace cudf
diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh
index 6eea72a1e0d..2870a891f87 100644
--- a/cpp/include/cudf/detail/copy_if.cuh
+++ b/cpp/include/cudf/detail/copy_if.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -229,14 +229,13 @@ struct DeviceType()>> {
template
struct scatter_gather_functor {
template ()>* = nullptr>
- std::unique_ptr operator()(
- cudf::column_view const& input,
- cudf::size_type const& output_size,
- cudf::size_type const* block_offsets,
- Filter filter,
- cudf::size_type per_thread,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+ std::unique_ptr operator()(cudf::column_view const& input,
+ cudf::size_type const& output_size,
+ cudf::size_type const* block_offsets,
+ Filter filter,
+ cudf::size_type per_thread,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
{
auto output_column = cudf::detail::allocate_like(
input, output_size, cudf::mask_allocation_policy::RETAIN, stream, mr);
@@ -277,14 +276,13 @@ struct scatter_gather_functor {
template () and !cudf::is_fixed_point()>* = nullptr>
- std::unique_ptr operator()(
- cudf::column_view const& input,
- cudf::size_type const& output_size,
- cudf::size_type const*,
- Filter filter,
- cudf::size_type,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+ std::unique_ptr operator()(cudf::column_view const& input,
+ cudf::size_type const& output_size,
+ cudf::size_type const*,
+ Filter filter,
+ cudf::size_type,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
{
rmm::device_uvector indices(output_size, stream);
@@ -320,11 +318,10 @@ struct scatter_gather_functor {
* @return unique_ptr The table generated from filtered `input`.
*/
template
-std::unique_ptr copy_if(
- table_view const& input,
- Filter filter,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr copy_if(table_view const& input,
+ Filter filter,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
diff --git a/cpp/include/cudf/detail/copy_if_else.cuh b/cpp/include/cudf/detail/copy_if_else.cuh
index b20753239ab..083b12edbf8 100644
--- a/cpp/include/cudf/detail/copy_if_else.cuh
+++ b/cpp/include/cudf/detail/copy_if_else.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -145,15 +145,14 @@ __launch_bounds__(block_size) __global__
* by `filter[i]`
*/
template
-std::unique_ptr copy_if_else(
- bool nullable,
- LeftIter lhs_begin,
- LeftIter lhs_end,
- RightIter rhs,
- FilterFn filter,
- cudf::data_type output_type,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr copy_if_else(bool nullable,
+ LeftIter lhs_begin,
+ LeftIter lhs_end,
+ RightIter rhs,
+ FilterFn filter,
+ cudf::data_type output_type,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
{
// This is the type of the thrust::optional element in the passed iterators
using Element = typename thrust::iterator_traits::value_type::value_type;
diff --git a/cpp/include/cudf/detail/copy_range.cuh b/cpp/include/cudf/detail/copy_range.cuh
index 22714e97dfa..0d5aa509e08 100644
--- a/cpp/include/cudf/detail/copy_range.cuh
+++ b/cpp/include/cudf/detail/copy_range.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -203,14 +203,13 @@ void copy_range_in_place(column_view const& source,
* @param stream CUDA stream used for device memory operations and kernel launches.
* @return std::unique_ptr The result target column
*/
-std::unique_ptr copy_range(
- column_view const& source,
- column_view const& target,
- size_type source_begin,
- size_type source_end,
- size_type target_begin,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr copy_range(column_view const& source,
+ column_view const& target,
+ size_type source_begin,
+ size_type source_end,
+ size_type target_begin,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace cudf
diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp
index c2e3c32b65f..c5160958165 100644
--- a/cpp/include/cudf/detail/datetime.hpp
+++ b/cpp/include/cudf/detail/datetime.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -29,70 +29,63 @@ namespace detail {
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr extract_year(
- cudf::column_view const& column,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr extract_year(cudf::column_view const& column,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::extract_month(cudf::column_view const&, rmm::mr::device_memory_resource *)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr extract_month(
- cudf::column_view const& column,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr extract_month(cudf::column_view const& column,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::extract_day(cudf::column_view const&, rmm::mr::device_memory_resource *)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr extract_day(
- cudf::column_view const& column,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr extract_day(cudf::column_view const& column,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::extract_weekday(cudf::column_view const&, rmm::mr::device_memory_resource *)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr extract_weekday(
- cudf::column_view const& column,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr extract_weekday(cudf::column_view const& column,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::extract_hour(cudf::column_view const&, rmm::mr::device_memory_resource *)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr extract_hour(
- cudf::column_view const& column,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr extract_hour(cudf::column_view const& column,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::extract_minute(cudf::column_view const&, rmm::mr::device_memory_resource *)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr extract_minute(
- cudf::column_view const& column,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr extract_minute(cudf::column_view const& column,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::extract_second(cudf::column_view const&, rmm::mr::device_memory_resource *)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr extract_second(
- cudf::column_view const& column,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr extract_second(cudf::column_view const& column,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::extract_millisecond_fraction(cudf::column_view const&,
@@ -100,10 +93,9 @@ std::unique_ptr extract_second(
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr extract_millisecond_fraction(
- cudf::column_view const& column,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr extract_millisecond_fraction(cudf::column_view const& column,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::extract_microsecond_fraction(cudf::column_view const&,
@@ -111,10 +103,9 @@ std::unique_ptr extract_millisecond_fraction(
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr extract_microsecond_fraction(
- cudf::column_view const& column,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr extract_microsecond_fraction(cudf::column_view const& column,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::extract_nanosecond_fraction(cudf::column_view const&,
@@ -122,30 +113,27 @@ std::unique_ptr extract_microsecond_fraction(
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr extract_nanosecond_fraction(
- cudf::column_view const& column,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr extract_nanosecond_fraction(cudf::column_view const& column,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::mr::device_memory_resource *)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr last_day_of_month(
- cudf::column_view const& column,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr last_day_of_month(cudf::column_view const& column,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::day_of_year(cudf::column_view const&, rmm::mr::device_memory_resource *)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr day_of_year(
- cudf::column_view const& column,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr day_of_year(cudf::column_view const& column,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::add_calendrical_months(cudf::column_view const&, cudf::column_view const&,
@@ -153,11 +141,10 @@ std::unique_ptr day_of_year(
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr add_calendrical_months(
- cudf::column_view const& timestamps,
- cudf::column_view const& months,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr add_calendrical_months(cudf::column_view const& timestamps,
+ cudf::column_view const& months,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::add_calendrical_months(cudf::column_view const&, cudf::scalar const&,
@@ -165,26 +152,23 @@ std::unique_ptr add_calendrical_months(
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr add_calendrical_months(
- cudf::column_view const& timestamps,
- cudf::scalar const& months,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr add_calendrical_months(cudf::column_view const& timestamps,
+ cudf::scalar const& months,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::is_leap_year(cudf::column_view const&, rmm::mr::device_memory_resource *)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr is_leap_year(
- cudf::column_view const& column,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr is_leap_year(cudf::column_view const& column,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
-std::unique_ptr extract_quarter(
- cudf::column_view const& column,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr extract_quarter(cudf::column_view const& column,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace datetime
diff --git a/cpp/include/cudf/detail/fill.hpp b/cpp/include/cudf/detail/fill.hpp
index e34acfff6b9..caaccfb4851 100644
--- a/cpp/include/cudf/detail/fill.hpp
+++ b/cpp/include/cudf/detail/fill.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -43,13 +43,12 @@ void fill_in_place(mutable_column_view& destination,
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr fill(
- column_view const& input,
- size_type begin,
- size_type end,
- scalar const& value,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr fill(column_view const& input,
+ size_type begin,
+ size_type end,
+ scalar const& value,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace cudf
diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh
index 57d834e6277..5460a0e5a76 100644
--- a/cpp/include/cudf/detail/gather.cuh
+++ b/cpp/include/cudf/detail/gather.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -583,10 +583,12 @@ void gather_bitmask(table_view const& source,
std::transform(target.begin(), target.end(), target_masks.begin(), [](auto const& col) {
return col->mutable_view().null_mask();
});
- auto d_target_masks = make_device_uvector_async(target_masks, stream);
+ auto d_target_masks =
+ make_device_uvector_async(target_masks, stream, rmm::mr::get_current_device_resource());
auto const device_source = table_device_view::create(source, stream);
- auto d_valid_counts = make_zeroed_device_uvector_async(target.size(), stream);
+ auto d_valid_counts = make_zeroed_device_uvector_async(
+ target.size(), stream, rmm::mr::get_current_device_resource());
// Dispatch operation enum to get implementation
auto const impl = [op]() {
@@ -647,13 +649,12 @@ void gather_bitmask(table_view const& source,
* @return cudf::table Result of the gather
*/
template
-std::unique_ptr gather(
- table_view const& source_table,
- MapIterator gather_map_begin,
- MapIterator gather_map_end,
- out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK,
- rmm::cuda_stream_view stream = cudf::get_default_stream(),
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr gather(table_view const& source_table,
+ MapIterator gather_map_begin,
+ MapIterator gather_map_end,
+ out_of_bounds_policy bounds_policy,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
{
std::vector> destination_columns;
diff --git a/cpp/include/cudf/detail/gather.hpp b/cpp/include/cudf/detail/gather.hpp
index 9d61a8de184..034eb6c1282 100644
--- a/cpp/include/cudf/detail/gather.hpp
+++ b/cpp/include/cudf/detail/gather.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -61,13 +61,12 @@ enum class negative_index_policy : bool { ALLOWED, NOT_ALLOWED };
* @param[in] mr Device memory resource used to allocate the returned table's device memory
* @return Result of the gather
*/
-std::unique_ptr gather(
- table_view const& source_table,
- column_view const& gather_map,
- out_of_bounds_policy bounds_policy,
- negative_index_policy neg_indices,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr gather(table_view const& source_table,
+ column_view const& gather_map,
+ out_of_bounds_policy bounds_policy,
+ negative_index_policy neg_indices,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::detail::gather(table_view const&,column_view const&,table_view
@@ -76,13 +75,12 @@ std::unique_ptr gather(
*
* @throws cudf::logic_error if `gather_map` span size is larger than max of `size_type`.
*/
-std::unique_ptr gather(
- table_view const& source_table,
- device_span const gather_map,
- out_of_bounds_policy bounds_policy,
- negative_index_policy neg_indices,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr gather(table_view const& source_table,
+ device_span const gather_map,
+ out_of_bounds_policy bounds_policy,
+ negative_index_policy neg_indices,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace cudf
diff --git a/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp
index 9e64048b7b4..e081a626c75 100644
--- a/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp
+++ b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -36,12 +36,11 @@ namespace detail {
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param[in] mr Device memory resource used to allocate device memory of the returned column.
*/
-std::unique_ptr group_replace_nulls(
- cudf::column_view const& grouped_value,
- device_span group_labels,
- cudf::replace_policy replace_policy,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr group_replace_nulls(cudf::column_view const& grouped_value,
+ device_span group_labels,
+ cudf::replace_policy replace_policy,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace groupby
diff --git a/cpp/include/cudf/detail/groupby/sort_helper.hpp b/cpp/include/cudf/detail/groupby/sort_helper.hpp
index e2510d75a83..663ff44ca56 100644
--- a/cpp/include/cudf/detail/groupby/sort_helper.hpp
+++ b/cpp/include/cudf/detail/groupby/sort_helper.hpp
@@ -85,10 +85,9 @@ struct sort_groupby_helper {
* @param values The value column to group and sort
* @return the sorted and grouped column
*/
- std::unique_ptr sorted_values(
- column_view const& values,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+ std::unique_ptr sorted_values(column_view const& values,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @brief Groups a column of values according to `keys`
@@ -100,28 +99,25 @@ struct sort_groupby_helper {
* @param values The value column to group
* @return the grouped column
*/
- std::unique_ptr grouped_values(
- column_view const& values,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+ std::unique_ptr grouped_values(column_view const& values,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @brief Get a table of sorted unique keys
*
* @return a new table in which each row is a unique row in the sorted key table.
*/
- std::unique_ptr unique_keys(
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+ std::unique_ptr unique_keys(rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @brief Get a table of sorted keys
*
* @return a new table containing the sorted keys.
*/
- std::unique_ptr sorted_keys(
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+ std::unique_ptr sorted_keys(rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @brief Get the number of groups in `keys`
diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp
index b7469d80a8d..771b3e150ec 100644
--- a/cpp/include/cudf/detail/hashing.hpp
+++ b/cpp/include/cudf/detail/hashing.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -31,29 +31,25 @@ namespace detail {
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr hash(
- table_view const& input,
- hash_id hash_function = hash_id::HASH_MURMUR3,
- uint32_t seed = cudf::DEFAULT_HASH_SEED,
- rmm::cuda_stream_view stream = cudf::get_default_stream(),
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr hash(table_view const& input,
+ hash_id hash_function,
+ uint32_t seed,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
-std::unique_ptr murmur_hash3_32(
- table_view const& input,
- uint32_t seed = cudf::DEFAULT_HASH_SEED,
- rmm::cuda_stream_view stream = cudf::get_default_stream(),
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr murmur_hash3_32(table_view const& input,
+ uint32_t seed,
+ rmm::cuda_stream_view,
+ rmm::mr::device_memory_resource* mr);
-std::unique_ptr spark_murmur_hash3_32(
- table_view const& input,
- uint32_t seed = cudf::DEFAULT_HASH_SEED,
- rmm::cuda_stream_view stream = cudf::get_default_stream(),
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr spark_murmur_hash3_32(table_view const& input,
+ uint32_t seed,
+ rmm::cuda_stream_view,
+ rmm::mr::device_memory_resource* mr);
-std::unique_ptr md5_hash(
- table_view const& input,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr