diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index d02825b73d1..dd4482375b9 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -133,5 +133,6 @@ jobs: with: build_type: pull-request package-name: dask_cudf - test-before: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf-dep && python -m pip install --no-deps ./local-cudf-dep/cudf*.whl" + # Install the cudf we just built, and also test against latest dask/distributed/dask-cuda. + test-before: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf-dep && python -m pip install --no-deps ./local-cudf-dep/cudf*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04" test-unittest: "python -m pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index c808e1475e6..a4bd14439b0 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -97,4 +97,6 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} package-name: dask_cudf + # Test against latest dask/distributed/dask-cuda. + test-before: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04" test-unittest: "python -m pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1eb2c508db9..8b46eb25950 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -161,7 +161,7 @@ repos: ^CHANGELOG.md$ ) - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.4.0 + rev: v1.5.1 hooks: - id: rapids-dependency-file-generator args: ["--clean"] diff --git a/build.sh b/build.sh index bee66d819b4..7cbd0fceb5a 100755 --- a/build.sh +++ b/build.sh @@ -300,8 +300,7 @@ if buildAll || hasArg libcudf; then # Record build times if [[ "$BUILD_REPORT_METRICS" == "ON" && -f "${LIB_BUILD_DIR}/.ninja_log" ]]; then echo "Formatting build metrics" - python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt xml > ${LIB_BUILD_DIR}/ninja_log.xml - MSG="

" + MSG="" # get some sccache stats after the compile if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v sccache)" ]]; then COMPILE_REQUESTS=$(sccache -s | grep "Compile requests \+ [0-9]\+$" | awk '{ print $NF }') @@ -318,7 +317,9 @@ if buildAll || hasArg libcudf; then BMR_DIR=${RAPIDS_ARTIFACTS_DIR:-"${LIB_BUILD_DIR}"} echo "Metrics output dir: [$BMR_DIR]" mkdir -p ${BMR_DIR} - python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${BMR_DIR}/ninja_log.html + MSG_OUTFILE="$(mktemp)" + echo "$MSG" > "${MSG_OUTFILE}" + python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "${MSG_OUTFILE}" > ${BMR_DIR}/ninja_log.html cp ${LIB_BUILD_DIR}/.ninja_log ${BMR_DIR}/ninja.log fi diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index b68c2bdbef6..bc27e7d76b0 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -14,29 +14,3 @@ rapids-logger "Begin cpp build" rapids-mamba-retry mambabuild conda/recipes/libcudf rapids-upload-conda-to-s3 cpp - -echo "++++++++++++++++++++++++++++++++++++++++++++" - -if [[ -d $RAPIDS_ARTIFACTS_DIR ]]; then - ls -l ${RAPIDS_ARTIFACTS_DIR} -fi - -echo "++++++++++++++++++++++++++++++++++++++++++++" - -FILE=${RAPIDS_ARTIFACTS_DIR}/ninja.log -if [[ -f $FILE ]]; then - echo -e "\x1B[33;1m\x1B[48;5;240m Ninja log for this build available at the following link \x1B[0m" - UPLOAD_NAME=cpp_cuda${RAPIDS_CUDA_VERSION%%.*}_$(arch).ninja.log - rapids-upload-to-s3 "${UPLOAD_NAME}" "${FILE}" -fi - -echo "++++++++++++++++++++++++++++++++++++++++++++" - -FILE=${RAPIDS_ARTIFACTS_DIR}/ninja_log.html -if [[ -f $FILE ]]; then - echo -e "\x1B[33;1m\x1B[48;5;240m Build Metrics Report for this build available at the following link \x1B[0m" - UPLOAD_NAME=cpp_cuda${RAPIDS_CUDA_VERSION%%.*}_$(arch).BuildMetricsReport.html - rapids-upload-to-s3 "${UPLOAD_NAME}" "${FILE}" -fi - -echo "++++++++++++++++++++++++++++++++++++++++++++" diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 6daedb59733..4955fe08982 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -33,16 +33,25 @@ aws s3 cp s3://rapidsai-docs/librmm/${VERSION_NUMBER}/html/rmm.tag . || echo "Fa doxygen Doxyfile popd -rapids-logger "Build Sphinx docs" +rapids-logger "Build cuDF Sphinx docs" pushd docs/cudf sphinx-build -b dirhtml source _html sphinx-build -b text source _text popd +rapids-logger "Build dask-cuDF Sphinx docs" +pushd docs/dask_cudf +sphinx-build -b dirhtml source _html +sphinx-build -b text source _text +popd + + if [[ ${RAPIDS_BUILD_TYPE} == "branch" ]]; then rapids-logger "Upload Docs to S3" aws s3 sync --no-progress --delete cpp/doxygen/html "s3://rapidsai-docs/libcudf/${VERSION_NUMBER}/html" aws s3 sync --no-progress --delete docs/cudf/_html "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/html" aws s3 sync --no-progress --delete docs/cudf/_text "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/txt" + aws s3 sync --no-progress --delete docs/dask_cudf/_html "s3://rapidsai-docs/dask-cudf/${VERSION_NUMBER}/html" + aws s3 sync --no-progress --delete docs/dask_cudf/_text "s3://rapidsai-docs/dask-cudf/${VERSION_NUMBER}/txt" fi diff --git a/ci/release/apply_wheel_modifications.sh b/ci/release/apply_wheel_modifications.sh index 9d9758f1f15..0c55c4b9141 100755 --- a/ci/release/apply_wheel_modifications.sh +++ b/ci/release/apply_wheel_modifications.sh @@ -6,12 +6,6 @@ VERSION=${1} CUDA_SUFFIX=${2} -# __init__.py versions -sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/cudf/cudf/__init__.py -sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/dask_cudf/dask_cudf/__init__.py -sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/cudf_kafka/cudf_kafka/__init__.py -sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/custreamz/custreamz/__init__.py - # pyproject.toml versions sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/cudf/pyproject.toml sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/dask_cudf/pyproject.toml diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index e5c9ba0569f..dc5ea6015f9 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -24,6 +24,11 @@ NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}') NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR} NEXT_UCX_PY_VERSION="$(curl -sL https://version.gpuci.io/rapids/${NEXT_SHORT_TAG}).*" +# Need to distutils-normalize the versions for some use cases +CURRENT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${CURRENT_SHORT_TAG}'))") +NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))") +echo "current is ${CURRENT_SHORT_TAG_PEP440}, next is ${NEXT_SHORT_TAG_PEP440}" + echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG" # Inplace sed replace; workaround for Linux and Mac @@ -70,9 +75,10 @@ sed_runner 's/release = .*/release = '"'${NEXT_FULL_TAG}'"'/g' docs/cudf/source/ # bump rmm & dask-cuda for FILE in conda/environments/*.yaml dependencies.yaml; do - sed_runner "s/dask-cuda=${CURRENT_SHORT_TAG}/dask-cuda=${NEXT_SHORT_TAG}/g" ${FILE}; - sed_runner "s/rmm=${CURRENT_SHORT_TAG}/rmm=${NEXT_SHORT_TAG}/g" ${FILE}; - sed_runner "s/rmm-cu11=${CURRENT_SHORT_TAG}/rmm-cu11=${NEXT_SHORT_TAG}/g" ${FILE}; + sed_runner "s/dask-cuda==${CURRENT_SHORT_TAG_PEP440}/dask-cuda==${NEXT_SHORT_TAG_PEP440}/g" ${FILE}; + sed_runner "s/rmm==${CURRENT_SHORT_TAG_PEP440}/rmm==${NEXT_SHORT_TAG_PEP440}/g" ${FILE}; + sed_runner "s/cudf==${CURRENT_SHORT_TAG_PEP440}/cudf==${NEXT_SHORT_TAG_PEP440}/g" ${FILE}; + sed_runner "s/cudf_kafka==${CURRENT_SHORT_TAG_PEP440}/cudf_kafka==${NEXT_SHORT_TAG_PEP440}/g" ${FILE}; done # Doxyfile update @@ -86,13 +92,11 @@ sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/basic/CMakeLists.txt sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/strings/CMakeLists.txt -# Need to distutils-normalize the original version -NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))") - # Dependency versions in pyproject.toml sed_runner "s/rmm==.*\",/rmm==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/cudf/pyproject.toml sed_runner "s/cudf==.*\",/cudf==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/dask_cudf/pyproject.toml for FILE in .github/workflows/*.yaml; do sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" + sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" ${FILE}; done diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index bd7a82afbea..846b90c78e5 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -8,35 +8,34 @@ trap "EXITCODE=1" ERR set +e # Get library for finding incorrect default stream usage. -STREAM_IDENTIFY_LIB="${CONDA_PREFIX}/lib/libcudf_identify_stream_usage.so" +STREAM_IDENTIFY_LIB_MODE_CUDF="${CONDA_PREFIX}/lib/libcudf_identify_stream_usage_mode_cudf.so" +STREAM_IDENTIFY_LIB_MODE_TESTING="${CONDA_PREFIX}/lib/libcudf_identify_stream_usage_mode_testing.so" -echo "STREAM_IDENTIFY_LIB=${STREAM_IDENTIFY_LIB}" +echo "STREAM_IDENTIFY_LIB=${STREAM_IDENTIFY_LIB_MODE_CUDF}" # Run libcudf and libcudf_kafka gtests from libcudf-tests package rapids-logger "Run gtests" -# TODO: exit code handling is too verbose. Find a cleaner solution. - -for gt in "$CONDA_PREFIX"/bin/gtests/{libcudf,libcudf_kafka}/* ; do - test_name=$(basename ${gt}) - echo "Running gtest $test_name" - - # TODO: This strategy for using the stream lib will need to change when we - # switch to invoking ctest. For one, we will want to set the test - # properties to use the lib (which means that the decision will be made at - # CMake-configure time instead of runtime). We may also need to leverage - # something like gtest_discover_tests to be able to filter on the - # underlying test names. - if [[ ${test_name} == "SPAN_TEST" ]]; then - # This one test is specifically designed to test using a thrust device - # vector, so we expect and allow it to include default stream usage. - gtest_filter="SpanTest.CanConstructFromDeviceContainers" - GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="-${gtest_filter}" && \ - ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="${gtest_filter}" - else - GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} - fi -done +cd $CONDA_PREFIX/bin/gtests/libcudf/ +export GTEST_CUDF_STREAM_MODE="new_cudf_default" +export GTEST_OUTPUT=xml:${RAPIDS_TESTS_DIR}/ +export LD_PRELOAD=${STREAM_IDENTIFY_LIB_MODE_CUDF} + +ctest -E SPAN_TEST -j20 --output-on-failure + +# This one test is specifically designed to test using a thrust device vector, +# so we expect and allow it to include default stream usage. +_allowlist_filter="SpanTest.CanConstructFromDeviceContainers" +GTEST_FILTER="-${_allowlist_filter}" ctest -R SPAN_TEST -VV +LD_PRELOAD= GTEST_CUDF_STREAM_MODE=default GTEST_FILTER="${_allowlist_filter}" ctest -R SPAN_TEST -VV + +SUITEERROR=$? + +if (( ${SUITEERROR} == 0 )); then + cd $CONDA_PREFIX/bin/gtests/libcudf_kafka/ + ctest -j20 --output-on-failure + SUITEERROR=$? +fi rapids-logger "Test script exiting with value: $EXITCODE" exit ${EXITCODE} diff --git a/ci/test_cpp_memcheck.sh b/ci/test_cpp_memcheck.sh index db9ce143d51..0e85268cb72 100755 --- a/ci/test_cpp_memcheck.sh +++ b/ci/test_cpp_memcheck.sh @@ -11,7 +11,7 @@ set +e rapids-logger "Memcheck gtests with rmm_mode=cuda" export GTEST_CUDF_RMM_MODE=cuda COMPUTE_SANITIZER_CMD="compute-sanitizer --tool memcheck" -for gt in "$CONDA_PREFIX"/bin/gtests/libcudf/* ; do +for gt in "$CONDA_PREFIX"/bin/gtests/libcudf/*_TEST ; do test_name=$(basename ${gt}) if [[ "$test_name" == "ERROR_TEST" ]] || [[ "$test_name" == "STREAM_IDENTIFICATION_TEST" ]]; then continue diff --git a/ci/test_java.sh b/ci/test_java.sh index f905aaa1178..e4df62501cc 100755 --- a/ci/test_java.sh +++ b/ci/test_java.sh @@ -38,7 +38,7 @@ set +e rapids-logger "Run Java tests" pushd java -mvn test -B -DCUDF_JNI_ARROW_STATIC=OFF -DCUDF_JNI_ENABLE_PROFILING=OFF +mvn test -B -DCUDF_JNI_ENABLE_PROFILING=OFF popd rapids-logger "Test script exiting with value: $EXITCODE" diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 66d375910d4..890cb199419 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -21,7 +21,7 @@ dependencies: - cupy>=9.5.0,<12.0.0a0 - cxx-compiler - cython>=0.29,<0.30 -- dask-cuda=23.04.* +- dask-cuda==23.4.* - dask>=2023.1.1 - distributed>=2023.1.1 - dlpack>=0.5,<0.6.0a0 @@ -30,18 +30,21 @@ dependencies: - fmt>=9.1.0,<10 - fsspec>=0.6.0 - gcc_linux-64=11.* +- gmock==1.10.0.* +- gtest==1.10.0.* - hypothesis - ipython -- libarrow=10 +- libarrow==10.0.1.* - librdkafka=1.7.0 -- librmm=23.04.* +- librmm==23.4.* - mimesis>=4.1.0 - moto>=4.0.8 +- msgpack-python - myst-nb - nbsphinx - ninja - notebook -- numba>=0.56.2 +- numba>=0.56.4,<0.57 - numpy>=1.21 - numpydoc - nvcc_linux-64=11.8 @@ -53,7 +56,7 @@ dependencies: - pre-commit - protobuf>=4.21.6,<4.22 - ptxcompiler -- pyarrow=10 +- pyarrow==10.0.1.* - pydata-sphinx-theme - pyorc - pytest @@ -61,11 +64,11 @@ dependencies: - pytest-cases - pytest-cov - pytest-xdist -- python-confluent-kafka=1.7.0 +- python-confluent-kafka==1.7.0 - python-snappy>=0.6.0 - python>=3.8,<3.11 - pytorch<1.12.0 -- rmm=23.04.* +- rmm==23.4.* - s3fs>=2022.3.0 - scikit-build>=0.13.1 - scipy diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 6b23c8953d3..bbd9961320a 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -52,7 +52,7 @@ requirements: - cython >=0.29,<0.30 - scikit-build >=0.13.1 - setuptools - - numba >=0.56.2 + - numba >=0.56.4,<0.57 - dlpack >=0.5,<0.6.0a0 - pyarrow =10 - libcudf ={{ version }} @@ -64,7 +64,7 @@ requirements: - typing_extensions - pandas >=1.3,<1.6.0dev0 - cupy >=9.5.0,<12.0.0a0 - - numba >=0.56.2 + - numba >=0.56.4,<0.57 - numpy >=1.21 - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} - libcudf {{ version }} diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 770a234b56e..469c25fb673 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -51,6 +51,8 @@ requirements: - librdkafka {{ librdkafka_version }} - fmt {{ fmt_version }} - spdlog {{ spdlog_version }} + - gtest {{ gtest_version }} + - gmock {{ gtest_version }} outputs: - name: libcudf @@ -71,10 +73,14 @@ outputs: - librmm ={{ minor_version }} - libarrow {{ libarrow_version }} - dlpack {{ dlpack_version }} + - gtest {{ gtest_version }} + - gmock {{ gtest_version }} test: commands: - test -f $PREFIX/lib/libcudf.so - test -f $PREFIX/lib/libcudftestutil.a + - test -f $PREFIX/lib/libcudf_identify_stream_usage_mode_cudf.so + - test -f $PREFIX/lib/libcudf_identify_stream_usage_mode_testing.so - test -f $PREFIX/include/cudf/aggregation.hpp - test -f $PREFIX/include/cudf/ast/detail/expression_parser.hpp - test -f $PREFIX/include/cudf/ast/detail/operators.hpp @@ -86,6 +92,7 @@ outputs: - test -f $PREFIX/include/cudf/concatenate.hpp - test -f $PREFIX/include/cudf/copying.hpp - test -f $PREFIX/include/cudf/datetime.hpp + - test -f $PREFIX/include/cudf/timezone.hpp - test -f $PREFIX/include/cudf/detail/aggregation/aggregation.hpp - test -f $PREFIX/include/cudf/detail/aggregation/result_cache.hpp - test -f $PREFIX/include/cudf/detail/binaryop.hpp @@ -107,7 +114,6 @@ outputs: - test -f $PREFIX/include/cudf/detail/nvtx/nvtx3.hpp - test -f $PREFIX/include/cudf/detail/nvtx/ranges.hpp - test -f $PREFIX/include/cudf/detail/quantiles.hpp - - test -f $PREFIX/include/cudf/detail/reduction_functions.hpp - test -f $PREFIX/include/cudf/detail/repeat.hpp - test -f $PREFIX/include/cudf/detail/replace.hpp - test -f $PREFIX/include/cudf/detail/reshape.hpp @@ -116,12 +122,13 @@ outputs: - test -f $PREFIX/include/cudf/detail/scan.hpp - test -f $PREFIX/include/cudf/detail/scatter.hpp - test -f $PREFIX/include/cudf/detail/search.hpp - - test -f $PREFIX/include/cudf/detail/segmented_reduction_functions.hpp - test -f $PREFIX/include/cudf/detail/sequence.hpp - test -f $PREFIX/include/cudf/detail/sorting.hpp - test -f $PREFIX/include/cudf/detail/stream_compaction.hpp - test -f $PREFIX/include/cudf/detail/structs/utilities.hpp - test -f $PREFIX/include/cudf/detail/tdigest/tdigest.hpp + - test -f $PREFIX/include/cudf/detail/timezone.cuh + - test -f $PREFIX/include/cudf/detail/timezone.hpp - test -f $PREFIX/include/cudf/detail/transform.hpp - test -f $PREFIX/include/cudf/detail/transpose.hpp - test -f $PREFIX/include/cudf/detail/unary.hpp @@ -209,6 +216,8 @@ outputs: - test -f $PREFIX/include/cudf/partitioning.hpp - test -f $PREFIX/include/cudf/quantiles.hpp - test -f $PREFIX/include/cudf/reduction.hpp + - test -f $PREFIX/include/cudf/reduction/detail/reduction_functions.hpp + - test -f $PREFIX/include/cudf/reduction/detail/segmented_reduction_functions.hpp - test -f $PREFIX/include/cudf/replace.hpp - test -f $PREFIX/include/cudf/reshape.hpp - test -f $PREFIX/include/cudf/rolling.hpp @@ -294,11 +303,12 @@ outputs: - test -f $PREFIX/include/cudf_test/column_wrapper.hpp - test -f $PREFIX/include/cudf_test/cudf_gtest.hpp - test -f $PREFIX/include/cudf_test/cxxopts.hpp + - test -f $PREFIX/include/cudf_test/default_stream.hpp - test -f $PREFIX/include/cudf_test/detail/column_utilities.hpp - test -f $PREFIX/include/cudf_test/file_utilities.hpp - test -f $PREFIX/include/cudf_test/io_metadata_utilities.hpp - test -f $PREFIX/include/cudf_test/iterator_utilities.hpp - - test -f $PREFIX/include/cudf_test/stream_checking_resource_adapter.hpp + - test -f $PREFIX/include/cudf_test/stream_checking_resource_adaptor.hpp - test -f $PREFIX/include/cudf_test/table_utilities.hpp - test -f $PREFIX/include/cudf_test/timestamp_utilities.cuh - test -f $PREFIX/include/cudf_test/type_list_utilities.hpp @@ -376,8 +386,6 @@ outputs: - {{ pin_subpackage('libcudf', exact=True) }} - {{ pin_subpackage('libcudf_kafka', exact=True) }} - cudatoolkit {{ cuda_spec }} - - gtest {{ gtest_version }} - - gmock {{ gtest_version }} about: home: https://rapids.ai/ license: Apache-2.0 diff --git a/conda/recipes/libcudf/post-link.sh b/conda/recipes/libcudf/post-link.sh index 64e0b1ad305..8ae2349f791 100644 --- a/conda/recipes/libcudf/post-link.sh +++ b/conda/recipes/libcudf/post-link.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Only add the license notice to libcudf and not our examples / tests if [[ "$PKG_NAME" == "libcudf" ]]; then - cat ./nvlink.txt >> $PREFIX/.messages.txt + cat ./nvcomp.txt >> $PREFIX/.messages.txt fi diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a261049d3f0..127df03c54d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -73,7 +73,7 @@ option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compila option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON) -if(${CUDA_STATIC_RUNTIME}) +if(CUDA_STATIC_RUNTIME OR NOT BUILD_SHARED_LIBS) set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL OFF) endif() option( @@ -369,7 +369,7 @@ add_library( src/io/orc/stripe_data.cu src/io/orc/stripe_enc.cu src/io/orc/stripe_init.cu - src/io/orc/timezone.cpp + src/datetime/timezone.cpp src/io/orc/writer_impl.cu src/io/parquet/compact_protocol_reader.cpp src/io/parquet/compact_protocol_writer.cpp @@ -464,6 +464,7 @@ add_library( src/reductions/segmented/max.cu src/reductions/segmented/mean.cu src/reductions/segmented/min.cu + src/reductions/segmented/nunique.cu src/reductions/segmented/product.cu src/reductions/segmented/reductions.cpp src/reductions/segmented/std.cu @@ -547,6 +548,7 @@ add_library( src/strings/regex/regex_program.cpp src/strings/repeat_strings.cu src/strings/replace/backref_re.cu + src/strings/replace/multi.cu src/strings/replace/multi_re.cu src/strings/replace/replace.cu src/strings/replace/replace_re.cu @@ -739,6 +741,35 @@ add_library(cudf::cudf ALIAS cudf) # * build cudftestutil ---------------------------------------------------------------------------- if(CUDF_BUILD_TESTUTIL) + add_library( + cudftest_default_stream + # When compiled as a dynamic library allows us to use LD_PRELOAD injection of symbols. We + # currently leverage this for stream-related library validation and may make use of it for + # other similar features in the future. + tests/utilities/default_stream.cpp + ) + set_target_properties( + cudftest_default_stream + PROPERTIES BUILD_RPATH "\$ORIGIN" + INSTALL_RPATH "\$ORIGIN" + # set target compile options + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + INTERFACE_POSITION_INDEPENDENT_CODE ON + ) + target_link_libraries( + cudftest_default_stream + PUBLIC cudf + PRIVATE $ + ) + + add_library(cudf::cudftest_default_stream ALIAS cudftest_default_stream) + + # Needs to be static so that we support usage of static builds of gtest which doesn't compile with + # fPIC enabled and therefore can't be embedded into shared libraries. add_library( cudftestutil STATIC tests/io/metadata_utilities.cpp @@ -768,7 +799,7 @@ if(CUDF_BUILD_TESTUTIL) target_link_libraries( cudftestutil - PUBLIC GTest::gmock GTest::gtest Threads::Threads cudf + PUBLIC GTest::gmock GTest::gtest Threads::Threads cudf cudftest_default_stream PRIVATE $ ) @@ -790,18 +821,27 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL) ) endif() - # Libraries for stream-related testing. - add_library(cudf_identify_stream_usage SHARED tests/utilities/identify_stream_usage.cpp) + # Libraries for stream-related testing. We build the library twice, one with STREAM_MODE_TESTING + # on and one with it set to off. Each test will then be configured to use the appropriate library + # depending via ctest and whether it has been updated to expose public stream APIs. + foreach(_mode cudf testing) + set(_tgt "cudf_identify_stream_usage_mode_${_mode}") + add_library(${_tgt} SHARED tests/utilities/identify_stream_usage.cpp) + + set_target_properties( + ${_tgt} + PROPERTIES # set target compile options + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + ) + target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm) + add_library(cudf::${_tgt} ALIAS ${_tgt}) - set_target_properties( - cudf_identify_stream_usage - PROPERTIES # set target compile options - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON - ) - target_link_libraries(cudf_identify_stream_usage PUBLIC CUDA::cudart rmm::rmm) - add_library(cudf::cudf_identify_stream_usage ALIAS cudf_identify_stream_usage) + if("${_mode}" STREQUAL "testing") + target_compile_definitions(${_tgt} PUBLIC STREAM_MODE_TESTING) + endif() + endforeach() endif() # ################################################################################################## @@ -851,33 +891,23 @@ install( EXPORT cudf-exports ) -install(DIRECTORY ${CUDF_SOURCE_DIR}/include/cudf ${CUDF_SOURCE_DIR}/include/cudf_test - ${CUDF_SOURCE_DIR}/include/nvtext DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} -) - -if(CUDF_BUILD_TESTUTIL) +set(_components_export_string) +if(TARGET cudftestutil) install( - TARGETS cudftestutil + TARGETS cudftest_default_stream cudftestutil DESTINATION ${lib_dir} EXPORT cudf-testing-exports ) - - install( - EXPORT cudf-testing-exports - FILE cudf-testing-targets.cmake - NAMESPACE cudf:: - DESTINATION "${lib_dir}/cmake/cudf" - ) - - include("${rapids-cmake-dir}/export/write_dependencies.cmake") - rapids_export_write_dependencies( - INSTALL cudf-testing-exports - "${PROJECT_BINARY_DIR}/rapids-cmake/cudf/export/cudf-testing-dependencies.cmake" - ) + set(_components_export_string COMPONENTS testing COMPONENTS_EXPORT_SET cudf-testing-exports) endif() +install(DIRECTORY ${CUDF_SOURCE_DIR}/include/cudf ${CUDF_SOURCE_DIR}/include/cudf_test + ${CUDF_SOURCE_DIR}/include/nvtext DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} +) + if(CUDF_BUILD_STREAMS_TEST_UTIL) - install(TARGETS cudf_identify_stream_usage DESTINATION ${lib_dir}) + install(TARGETS cudf_identify_stream_usage_mode_cudf DESTINATION ${lib_dir}) + install(TARGETS cudf_identify_stream_usage_mode_testing DESTINATION ${lib_dir}) endif() set(doc_string @@ -936,12 +966,6 @@ string( [=[ if(testing IN_LIST cudf_FIND_COMPONENTS) enable_language(CUDA) - if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-dependencies.cmake") - include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-dependencies.cmake") - endif() - if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake") - include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake") - endif() endif() ]=] ) @@ -949,8 +973,8 @@ string(APPEND install_code_string "${common_code_string}") rapids_export( INSTALL cudf - EXPORT_SET cudf-exports - GLOBAL_TARGETS cudf + EXPORT_SET cudf-exports ${_components_export_string} + GLOBAL_TARGETS cudf cudftestutil NAMESPACE cudf:: DOCUMENTATION doc_string FINAL_CODE_BLOCK install_code_string @@ -973,23 +997,13 @@ string(APPEND build_code_string "${common_code_string}") rapids_export( BUILD cudf - EXPORT_SET cudf-exports - GLOBAL_TARGETS cudf + EXPORT_SET cudf-exports ${_components_export_string} + GLOBAL_TARGETS cudf cudftestutil NAMESPACE cudf:: DOCUMENTATION doc_string FINAL_CODE_BLOCK build_code_string ) -if(CUDF_BUILD_TESTUTIL) - export( - EXPORT cudf-testing-exports - FILE ${CUDF_BINARY_DIR}/cudf-testing-targets.cmake - NAMESPACE cudf:: - ) - rapids_export_write_dependencies( - BUILD cudf-testing-exports "${CUDF_BINARY_DIR}/cudf-testing-dependencies.cmake" - ) -endif() # ################################################################################################## # * make documentation ---------------------------------------------------------------------------- diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index cc0b642a337..b9c15e244de 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -150,6 +150,7 @@ ConfigureBench(APPLY_BOOLEAN_MASK_BENCH stream_compaction/apply_boolean_mask.cpp # * stream_compaction benchmark ------------------------------------------------------------------- ConfigureNVBench( STREAM_COMPACTION_NVBENCH stream_compaction/distinct.cpp stream_compaction/unique.cpp + stream_compaction/unique_count.cpp ) # ################################################################################################## @@ -191,7 +192,7 @@ ConfigureBench( ) ConfigureNVBench( REDUCTION_NVBENCH reduction/distinct_count.cpp reduction/rank.cpp reduction/scan_structs.cpp - reduction/segment_reduce.cu + reduction/segmented_reduce.cpp ) # ################################################################################################## diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu index edb19b7b0ca..762e9640d12 100644 --- a/cpp/benchmarks/common/generate_input.cu +++ b/cpp/benchmarks/common/generate_input.cu @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -429,8 +430,12 @@ std::unique_ptr create_random_column(data_profile const& profile, null_mask.begin()); } - auto [result_bitmask, null_count] = cudf::detail::valid_if( - null_mask.begin(), null_mask.end(), thrust::identity{}, cudf::get_default_stream()); + auto [result_bitmask, null_count] = + cudf::detail::valid_if(null_mask.begin(), + null_mask.end(), + thrust::identity{}, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); return std::make_unique( dtype, @@ -508,8 +513,12 @@ std::unique_ptr create_random_utf8_string_column(data_profile cons thrust::make_zip_iterator(offsets.begin(), offsets.begin() + 1), num_rows, string_generator{chars.data(), engine}); - auto [result_bitmask, null_count] = cudf::detail::valid_if( - null_mask.begin(), null_mask.end() - 1, thrust::identity{}, cudf::get_default_stream()); + auto [result_bitmask, null_count] = + cudf::detail::valid_if(null_mask.begin(), + null_mask.end() - 1, + thrust::identity{}, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); return cudf::make_strings_column( num_rows, std::move(offsets), @@ -542,7 +551,8 @@ std::unique_ptr create_random_column(data_profi sample_indices, cudf::out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); return std::move(str_table->release()[0]); } @@ -626,8 +636,11 @@ std::unique_ptr create_random_column(data_profi auto [null_mask, null_count] = [&]() { if (profile.get_null_probability().has_value()) { auto valids = valid_dist(engine, num_rows); - return cudf::detail::valid_if( - valids.begin(), valids.end(), thrust::identity{}, cudf::get_default_stream()); + return cudf::detail::valid_if(valids.begin(), + valids.end(), + thrust::identity{}, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); } return std::pair{}; }(); @@ -710,9 +723,12 @@ std::unique_ptr create_random_column(data_profile auto offsets_column = std::make_unique( cudf::data_type{cudf::type_id::INT32}, num_rows + 1, offsets.release()); - auto [null_mask, null_count] = cudf::detail::valid_if( - valids.begin(), valids.end(), thrust::identity{}, cudf::get_default_stream()); - list_column = cudf::make_lists_column( + auto [null_mask, null_count] = cudf::detail::valid_if(valids.begin(), + valids.end(), + thrust::identity{}, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); + list_column = cudf::make_lists_column( num_rows, std::move(offsets_column), std::move(current_child_column), @@ -838,7 +854,8 @@ std::pair create_random_null_mask( return cudf::detail::valid_if(thrust::make_counting_iterator(0), thrust::make_counting_iterator(size), bool_generator{seed, 1.0 - *null_probability}, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); } } diff --git a/cpp/benchmarks/io/json/nested_json.cpp b/cpp/benchmarks/io/json/nested_json.cpp index 416cf403671..d03f36ca81f 100644 --- a/cpp/benchmarks/io/json/nested_json.cpp +++ b/cpp/benchmarks/io/json/nested_json.cpp @@ -171,7 +171,8 @@ void BM_NESTED_JSON(nvbench::state& state) cudf::io::json::detail::device_parse_nested_json( cudf::device_span{input->data(), static_cast(input->size())}, default_options, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); }); auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); @@ -202,7 +203,7 @@ void BM_NESTED_JSON_DEPTH(nvbench::state& state) state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { // Allocate device-side temporary storage & run algorithm cudf::io::json::detail::device_parse_nested_json( - input, default_options, cudf::get_default_stream()); + input, default_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); }); auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); diff --git a/cpp/benchmarks/iterator/iterator.cu b/cpp/benchmarks/iterator/iterator.cu index 73060200d00..1b1cf9b7e9d 100644 --- a/cpp/benchmarks/iterator/iterator.cu +++ b/cpp/benchmarks/iterator/iterator.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -140,8 +140,8 @@ void BM_iterator(benchmark::State& state) cudf::column_view hasnull_F = wrap_hasnull_F; // Initialize dev_result to false - auto dev_result = - cudf::detail::make_zeroed_device_uvector_sync(1, cudf::get_default_stream()); + auto dev_result = cudf::detail::make_zeroed_device_uvector_sync( + 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 if (cub_or_thrust) { @@ -210,7 +210,7 @@ void BM_pair_iterator(benchmark::State& state) // Initialize dev_result to false auto dev_result = cudf::detail::make_zeroed_device_uvector_sync>( - 1, cudf::get_default_stream()); + 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 if (cub_or_thrust) { diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp index e37a4ca1193..70036a95377 100644 --- a/cpp/benchmarks/join/join_common.hpp +++ b/cpp/benchmarks/join/join_common.hpp @@ -104,8 +104,11 @@ void BM_join(state_type& state, Join JoinFunc) // roughly 75% nulls auto validity = thrust::make_transform_iterator(thrust::make_counting_iterator(0), null75_generator{}); - return cudf::detail::valid_if( - validity, validity + size, thrust::identity{}, cudf::get_default_stream()) + return cudf::detail::valid_if(validity, + validity + size, + thrust::identity{}, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()) .first; }; diff --git a/cpp/benchmarks/reduction/segment_reduce.cu b/cpp/benchmarks/reduction/segmented_reduce.cpp similarity index 58% rename from cpp/benchmarks/reduction/segment_reduce.cu rename to cpp/benchmarks/reduction/segmented_reduce.cpp index 127b3598dae..590a014ad76 100644 --- a/cpp/benchmarks/reduction/segment_reduce.cu +++ b/cpp/benchmarks/reduction/segmented_reduce.cpp @@ -20,17 +20,15 @@ #include #include -#include +#include #include +#include #include #include #include -#include - #include -#include bool constexpr is_boolean_output_agg(cudf::segmented_reduce_aggregation::Kind kind) { @@ -38,8 +36,15 @@ bool constexpr is_boolean_output_agg(cudf::segmented_reduce_aggregation::Kind ki kind == cudf::segmented_reduce_aggregation::ANY; } +bool constexpr is_float_output_agg(cudf::segmented_reduce_aggregation::Kind kind) +{ + return kind == cudf::segmented_reduce_aggregation::MEAN || + kind == cudf::segmented_reduce_aggregation::VARIANCE || + kind == cudf::segmented_reduce_aggregation::STD; +} + template -std::unique_ptr make_simple_aggregation() +std::unique_ptr make_reduce_aggregation() { switch (kind) { case cudf::segmented_reduce_aggregation::SUM: @@ -54,12 +59,22 @@ std::unique_ptr make_simple_aggregation() return cudf::make_all_aggregation(); case cudf::segmented_reduce_aggregation::ANY: return cudf::make_any_aggregation(); - default: CUDF_FAIL("Unsupported simple segmented aggregation"); + case cudf::segmented_reduce_aggregation::SUM_OF_SQUARES: + return cudf::make_sum_of_squares_aggregation(); + case cudf::segmented_reduce_aggregation::MEAN: + return cudf::make_mean_aggregation(); + case cudf::segmented_reduce_aggregation::VARIANCE: + return cudf::make_variance_aggregation(); + case cudf::segmented_reduce_aggregation::STD: + return cudf::make_std_aggregation(); + case cudf::segmented_reduce_aggregation::NUNIQUE: + return cudf::make_nunique_aggregation(); + default: CUDF_FAIL("Unsupported segmented reduce aggregation in this benchmark"); } } template -std::pair, thrust::device_vector> make_test_data( +std::pair, std::unique_ptr> make_test_data( nvbench::state& state) { auto const column_size{cudf::size_type(state.get_int64("column_size"))}; @@ -72,28 +87,30 @@ std::pair, thrust::device_vector> dtype, distribution_id::UNIFORM, 0, 100); auto input = create_random_column(dtype, row_count{column_size}, profile); - auto offset_it = cudf::detail::make_counting_transform_iterator( - 0, [column_size, segment_length] __device__(auto i) { - return column_size < i * segment_length ? column_size : i * segment_length; - }); - - thrust::device_vector d_offsets(offset_it, offset_it + num_segments + 1); - - return std::pair(std::move(input), d_offsets); + auto offsets = cudf::sequence(num_segments + 1, + cudf::numeric_scalar(0), + cudf::numeric_scalar(segment_length)); + return std::pair(std::move(input), std::move(offsets)); } template -void BM_Simple_Segmented_Reduction(nvbench::state& state, - nvbench::type_list>) +void BM_Segmented_Reduction(nvbench::state& state, + nvbench::type_list>) { auto const column_size{cudf::size_type(state.get_int64("column_size"))}; auto const num_segments{cudf::size_type(state.get_int64("num_segments"))}; auto [input, offsets] = make_test_data(state); - auto agg = make_simple_aggregation(); + auto agg = make_reduce_aggregation(); - auto output_type = is_boolean_output_agg(kind) ? cudf::data_type{cudf::type_id::BOOL8} - : cudf::data_type{cudf::type_to_id()}; + auto const output_type = [] { + if (is_boolean_output_agg(kind)) { return cudf::data_type{cudf::type_id::BOOL8}; } + if (is_float_output_agg(kind)) { return cudf::data_type{cudf::type_id::FLOAT64}; } + if (kind == cudf::segmented_reduce_aggregation::NUNIQUE) { + return cudf::data_type{cudf::type_to_id()}; + } + return cudf::data_type{cudf::type_to_id()}; + }(); state.add_element_count(column_size); state.add_global_memory_reads(column_size); @@ -103,8 +120,10 @@ void BM_Simple_Segmented_Reduction(nvbench::state& state, state.add_global_memory_writes(num_segments); } - auto const input_view = input->view(); - auto const offset_span = cudf::device_span{offsets}; + auto const input_view = input->view(); + auto const offsets_view = offsets->view(); + auto const offset_span = cudf::device_span{ + offsets_view.template data(), static_cast(offsets_view.size())}; state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec( @@ -115,13 +134,17 @@ void BM_Simple_Segmented_Reduction(nvbench::state& state, using Types = nvbench::type_list; // Skip benchmarking MAX/ANY since they are covered by MIN/ALL respectively. +// Also VARIANCE includes STD calculation. using AggKinds = nvbench::enum_type_list; + cudf::aggregation::ALL, + cudf::aggregation::MEAN, + cudf::aggregation::VARIANCE, + cudf::aggregation::NUNIQUE>; -NVBENCH_BENCH_TYPES(BM_Simple_Segmented_Reduction, NVBENCH_TYPE_AXES(Types, AggKinds)) - .set_name("segmented_reduction_simple") +NVBENCH_BENCH_TYPES(BM_Segmented_Reduction, NVBENCH_TYPE_AXES(Types, AggKinds)) + .set_name("segmented_reduction") .set_type_axes_names({"DataType", "AggregationKinds"}) .add_int64_axis("column_size", {100'000, 1'000'000, 10'000'000, 100'000'000}) .add_int64_axis("num_segments", {1'000, 10'000, 100'000}); diff --git a/cpp/benchmarks/stream_compaction/unique_count.cpp b/cpp/benchmarks/stream_compaction/unique_count.cpp new file mode 100644 index 00000000000..f8319e0385c --- /dev/null +++ b/cpp/benchmarks/stream_compaction/unique_count.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include + +#include + +template +void nvbench_unique_count(nvbench::state& state, nvbench::type_list) +{ + auto const num_rows = static_cast(state.get_int64("NumRows")); + auto const nulls = state.get_float64("NullProbability"); + + data_profile profile = data_profile_builder().cardinality(0).null_probability(nulls).distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, num_rows / 100); + + auto source_column = create_random_column(cudf::type_to_id(), row_count{num_rows}, profile); + auto sorted_table = cudf::sort(cudf::table_view({source_column->view()})); + + auto input = sorted_table->view(); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::unique_count(input, cudf::null_equality::EQUAL); + }); +} + +using data_type = nvbench::type_list; + +NVBENCH_BENCH_TYPES(nvbench_unique_count, NVBENCH_TYPE_AXES(data_type)) + .set_name("unique_count") + .set_type_axes_names({"Type"}) + .add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000}) + .add_float64_axis("NullProbability", {0.0, 0.1}); diff --git a/cpp/benchmarks/string/replace.cpp b/cpp/benchmarks/string/replace.cpp index b25af14ec2a..cb570020f0e 100644 --- a/cpp/benchmarks/string/replace.cpp +++ b/cpp/benchmarks/string/replace.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,7 +69,7 @@ static void generate_bench_args(benchmark::internal::Benchmark* b) int const row_mult = 8; int const min_rowlen = 1 << 5; int const max_rowlen = 1 << 13; - int const len_mult = 4; + int const len_mult = 2; generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); } diff --git a/cpp/cmake/config.json b/cpp/cmake/config.json index f7d7b001856..a65afe9e58d 100644 --- a/cpp/cmake/config.json +++ b/cpp/cmake/config.json @@ -13,7 +13,11 @@ } }, "ConfigureTest": { - "flags": ["TEST_NAME", "TEST_SRC"] + "flags": ["TEST_NAME", "TEST_SRC"], + "kwargs": { + "GPUS": 1, + "PERCENT": 1 + } }, "ConfigureBench": { "flags": ["BENCH_NAME", "BENCH_SRC"] diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 943b89238e0..a716995182d 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -379,6 +379,8 @@ endfunction() if(NOT DEFINED CUDF_VERSION_Arrow) set(CUDF_VERSION_Arrow + # This version must be kept in sync with the libarrow version pinned for builds in + # dependencies.yaml. 10.0.1 CACHE STRING "The version of Arrow to find (or build)" ) diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index 8cd4f8c6d27..91c3dccfdc6 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -121,8 +121,8 @@ recommend watching Sean Parent's [C++ Seasoning talk](https://www.youtube.com/wa and we try to follow his rules: "No raw loops. No raw pointers. No raw synchronization primitives." * Prefer algorithms from STL and Thrust to raw loops. - * Prefer libcudf and RMM [owning data structures and views](#libcudf-data-structures) to raw pointers - and raw memory allocation. + * Prefer libcudf and RMM [owning data structures and views](#libcudf-data-structures) to raw + pointers and raw memory allocation. * libcudf doesn't have a lot of CPU-thread concurrency, but there is some. And currently libcudf does use raw synchronization primitives. So we should revisit Parent's third rule and improve here. @@ -146,8 +146,8 @@ The following guidelines apply to organizing `#include` lines. * Separate groups by a blank line. * Order the groups from "nearest" to "farthest". In other words, local includes, then includes from other RAPIDS libraries, then includes from related libraries, like ``, then - includes from dependencies installed with cuDF, and then standard headers (for example ``, - ``). + includes from dependencies installed with cuDF, and then standard headers (for example + ``, ``). * Use `<>` instead of `""` unless the header is in the same directory as the source file. * Tools like `clangd` often auto-insert includes when they can, but they usually get the grouping and brackets wrong. @@ -269,6 +269,15 @@ An *immutable*, non-owning view of a table. A *mutable*, non-owning view of a table. +## cudf::size_type + +The `cudf::size_type` is the type used for the number of elements in a column, offsets to elements +within a column, indices to address specific elements, segments for subsets of column elements, etc. +It is equivalent to a signed, 32-bit integer type and therefore has a maximum value of 2147483647. +Some APIs also accept negative index values and those functions support a minimum value of +-2147483648. This fundamental type also influences output values not just for column size limits +but for counting elements as well. + ## Spans libcudf provides `span` classes that mimic C++20 `std::span`, which is a lightweight @@ -336,8 +345,8 @@ auto s1 = static_cast(s.get()); ``` ### Passing to device -Each scalar type, except `list_scalar`, has a corresponding non-owning device view class which allows -access to the value and its validity from the device. This can be obtained using the function +Each scalar type, except `list_scalar`, has a corresponding non-owning device view class which +allows access to the value and its validity from the device. This can be obtained using the function `get_scalar_device_view(ScalarType s)`. Note that a device view is not provided for a base scalar object, only for the derived typed scalar class objects. @@ -348,68 +357,84 @@ data, a specialized device view for list columns can be constructed via # libcudf Policies and Design Principles -`libcudf` is designed to provide thread-safe, single-GPU accelerated algorithm primitives for solving a wide variety of problems that arise in data science. -APIs are written to execute on the default GPU, which can be controlled by the caller through standard CUDA device APIs or environment variables like `CUDA_VISIBLE_DEVICES`. -Our goal is to enable diverse use cases like Spark or Pandas to benefit from the performance of GPUs, and libcudf relies on these higher-level layers like Spark or Dask to orchestrate multi-GPU tasks. +`libcudf` is designed to provide thread-safe, single-GPU accelerated algorithm primitives for +solving a wide variety of problems that arise in data science. APIs are written to execute on the +default GPU, which can be controlled by the caller through standard CUDA device APIs or environment +variables like `CUDA_VISIBLE_DEVICES`. Our goal is to enable diverse use cases like Spark or Pandas +to benefit from the performance of GPUs, and libcudf relies on these higher-level layers like Spark +or Dask to orchestrate multi-GPU tasks. -To best satisfy these use-cases, libcudf prioritizes performance and flexibility, which sometimes may come at the cost of convenience. -While we welcome users to use libcudf directly, we design with the expectation that most users will be consuming libcudf through higher-level layers like Spark or cuDF Python that handle some of details that direct users of libcudf must handle on their own. -We document these policies and the reasons behind them here. +To best satisfy these use-cases, libcudf prioritizes performance and flexibility, which sometimes +may come at the cost of convenience. While we welcome users to use libcudf directly, we design with +the expectation that most users will be consuming libcudf through higher-level layers like Spark or +cuDF Python that handle some of details that direct users of libcudf must handle on their own. We +document these policies and the reasons behind them here. ## libcudf does not introspect data libcudf APIs generally do not perform deep introspection and validation of input data. There are numerous reasons for this: 1. It violates the single responsibility principle: validation is separate from execution. -2. Since libcudf data structures store data on the GPU, any validation incurs _at minimum_ the overhead of a kernel launch, and may in general be prohibitively expensive. +2. Since libcudf data structures store data on the GPU, any validation incurs _at minimum_ the + overhead of a kernel launch, and may in general be prohibitively expensive. 3. API promises around data introspection often significantly complicate implementation. Users are therefore responsible for passing valid data into such APIs. _Note that this policy does not mean that libcudf performs no validation whatsoever_. libcudf APIs should still perform any validation that does not require introspection. -To give some idea of what should or should not be validated, here are (non-exhaustive) lists of examples. +To give some idea of what should or should not be validated, here are (non-exhaustive) lists of +examples. **Things that libcudf should validate**: -- Input column/table sizes or dtypes +- Input column/table sizes or data types **Things that libcudf should not validate**: - Integer overflow -- Ensuring that outputs will not exceed the 2GB size limit for a given set of inputs +- Ensuring that outputs will not exceed the [2GB size](#cudfsize_type) limit for a given set of + inputs ## libcudf expects nested types to have sanitized null masks -Various libcudf APIs accepting columns of nested dtypes (such as `LIST` or `STRUCT`) may assume that these columns have been sanitized. -In this context, sanitization refers to ensuring that the null elements in a column with a nested dtype are compatible with the elements of nested columns. +Various libcudf APIs accepting columns of nested data types (such as `LIST` or `STRUCT`) may assume +that these columns have been sanitized. In this context, sanitization refers to ensuring that the +null elements in a column with a nested dtype are compatible with the elements of nested columns. Specifically: -- Null elements of list columns should also be empty. The starting offset of a null element should be equal to the ending offset. +- Null elements of list columns should also be empty. The starting offset of a null element should + be equal to the ending offset. - Null elements of struct columns should also be null elements in the underlying structs. -- For compound columns, nulls should only be present at the level of the parent column. Child columns should not contain nulls. +- For compound columns, nulls should only be present at the level of the parent column. Child + columns should not contain nulls. - Slice operations on nested columns do not propagate offsets to child columns. -libcudf APIs _should_ promise to never return "dirty" columns, i.e. columns containing unsanitized data. -Therefore, the only problem is if users construct input columns that are not correctly sanitized and then pass those into libcudf APIs. +libcudf APIs _should_ promise to never return "dirty" columns, i.e. columns containing unsanitized +data. Therefore, the only problem is if users construct input columns that are not correctly +sanitized and then pass those into libcudf APIs. ## Treat libcudf APIs as if they were asynchronous libcudf APIs called on the host do not guarantee that the stream is synchronized before returning. -Work in libcudf occurs on `cudf::get_default_stream().value`, which defaults to the CUDA default stream (stream 0). -Note that the stream 0 behavior differs if [per-thread default stream is enabled](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html) via `CUDF_USE_PER_THREAD_DEFAULT_STREAM`. -Any data provided to or returned by libcudf that uses a separate non-blocking stream requires synchronization with the default libcudf stream to ensure stream safety. +Work in libcudf occurs on `cudf::get_default_stream().value`, which defaults to the CUDA default +stream (stream 0). Note that the stream 0 behavior differs if [per-thread default stream is +enabled](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html) via +`CUDF_USE_PER_THREAD_DEFAULT_STREAM`. Any data provided to or returned by libcudf that uses a +separate non-blocking stream requires synchronization with the default libcudf stream to ensure +stream safety. ## libcudf generally does not make ordering guarantees -Functions like merge or groupby in libcudf make no guarantees about the order of entries in the output. -Promising deterministic ordering is not, in general, conducive to fast parallel algorithms. +Functions like merge or groupby in libcudf make no guarantees about the order of entries in the +output. Promising deterministic ordering is not, in general, conducive to fast parallel algorithms. Calling code is responsible for performing sorts after the fact if sorted outputs are needed. ## libcudf does not promise specific exception messages -libcudf documents the exceptions that will be thrown by an API for different kinds of invalid inputs. -The types of those exceptions (e.g. `cudf::logic_error`) are part of the public API. -However, the explanatory string returned by the `what` method of those exceptions is not part of the API and is subject to change. -Calling code should not rely on the contents of libcudf error messages to determine the nature of the error. -For information on the types of exceptions that libcudf throws under different circumstances, see the [section on error handling](#errors). +libcudf documents the exceptions that will be thrown by an API for different kinds of invalid +inputs. The types of those exceptions (e.g. `cudf::logic_error`) are part of the public API. +However, the explanatory string returned by the `what` method of those exceptions is not part of the +API and is subject to change. Calling code should not rely on the contents of libcudf error +messages to determine the nature of the error. For information on the types of exceptions that +libcudf throws under different circumstances, see the [section on error handling](#errors). # libcudf API and Implementation @@ -468,14 +493,6 @@ asynchrony if and when we add an asynchronous API to libcudf. **Note:** `cudaDeviceSynchronize()` should *never* be used. This limits the ability to do any multi-stream/multi-threaded work with libcudf APIs. - ### NVTX Ranges - -In order to aid in performance optimization and debugging, all compute intensive libcudf functions -should have a corresponding NVTX range. In libcudf, we have a convenience macro `CUDF_FUNC_RANGE()` -that will automatically annotate the lifetime of the enclosing function and use the function's name -as the name of the NVTX range. For more information about NVTX, see -[here](https://github.com/NVIDIA/NVTX/tree/dev/c). - ### Stream Creation There may be times in implementing libcudf features where it would be advantageous to use streams @@ -487,8 +504,8 @@ should avoid creating streams (even if it is slightly less efficient). It is a g ## Memory Allocation -Device [memory resources](#rmmdevice_memory_resource) are used in libcudf to abstract and control how device -memory is allocated. +Device [memory resources](#rmmdevice_memory_resource) are used in libcudf to abstract and control +how device memory is allocated. ### Output Memory @@ -508,6 +525,12 @@ std::unique_ptr returns_output_memory( void does_not_allocate_output_memory(...); ``` +This rule automatically applies to all detail APIs that allocates memory. Any detail API may be +called by any public API, and therefore could be allocating memory that is returned to the user. +To support such uses cases, all detail APIs allocating memory resources should accept an `mr` +parameter. Callers are responsible for either passing through a provided `mr` or +`rmm::mr::get_current_device_resource()` as needed. + ### Temporary Memory Not all memory allocated within a libcudf API is returned to the caller. Often algorithms must @@ -528,7 +551,7 @@ rmm::device_buffer some_function( ### Memory Management libcudf code generally eschews raw pointers and direct memory allocation. Use RMM classes built to -use `device_memory_resource`(*)s for device memory allocation with automated lifetime management. +use `device_memory_resource`s for device memory allocation with automated lifetime management. #### rmm::device_buffer Allocates a specified number of bytes of untyped, uninitialized device memory using a @@ -610,6 +633,32 @@ rmm::mr::device_memory_resource * mr = new my_custom_resource{...}; rmm::device_uvector v2{100, s, mr}; ``` +## Default Parameters + +While public libcudf APIs are free to include default function parameters, detail functions should +not. Default memory resource parameters make it easy for developers to accidentally allocate memory +using the incorrect resource. Avoiding default memory resources forces developers to consider each +memory allocation carefully. + +While streams are not currently exposed in libcudf's API, we plan to do so eventually. As a result, +the same reasons for memory resources also apply to streams. Public APIs default to using +`cudf::get_default_stream()`. However, including the same default in detail APIs opens the door for +developers to forget to pass in a user-provided stream if one is passed to a public API. Forcing +every detail API call to explicitly pass a stream is intended to prevent such mistakes. + +The memory resources (and eventually, the stream) are the final parameters for essentially all +public APIs. For API consistency, the same is true throughout libcudf's internals. Therefore, a +consequence of not allowing default streams or MRs is that no parameters in detail APIs may have +defaults. + +## NVTX Ranges + +In order to aid in performance optimization and debugging, all compute intensive libcudf functions +should have a corresponding NVTX range. libcudf has a convenience macro `CUDF_FUNC_RANGE()` that +automatically annotates the lifetime of the enclosing function and uses the function's name as +the name of the NVTX range. For more information about NVTX, see +[here](https://github.com/NVIDIA/NVTX/tree/dev/c). + ## Input/Output Style The preferred style for how inputs are passed in and outputs are returned is the following: @@ -746,8 +795,8 @@ where compile time was a problem is in types used to store indices, which can be The "Indexalator", or index-normalizing iterator (`include/cudf/detail/indexalator.cuh`), can be used for index types (integers) without requiring a type-specific instance. It can be used for any iterator interface for reading an array of integer values of type `int8`, `int16`, `int32`, -`int64`, `uint8`, `uint16`, `uint32`, or `uint64`. Reading specific elements always return a -`cudf::size_type` integer. +`int64`, `uint8`, `uint16`, `uint32`, or `uint64`. Reading specific elements always returns a +[`cudf::size_type`](#cudfsize_type) integer. Use the `indexalator_factory` to create an appropriate input iterator from a column_view. Example input iterator usage: @@ -879,9 +928,9 @@ CUDF_FAIL("This code path should not be reached."); ### CUDA Error Checking -Use the `CUDF_CUDA_TRY` macro to check for the successful completion of CUDA runtime API functions. This -macro throws a `cudf::cuda_error` exception if the CUDA API return value is not `cudaSuccess`. The -thrown exception includes a description of the CUDA error code in its `what()` message. +Use the `CUDF_CUDA_TRY` macro to check for the successful completion of CUDA runtime API functions. +This macro throws a `cudf::cuda_error` exception if the CUDA API return value is not `cudaSuccess`. +The thrown exception includes a description of the CUDA error code in its `what()` message. Example: @@ -1104,8 +1153,8 @@ For list columns, the parent column's type is `LIST` and contains no data, but i the number of lists in the column, and its null mask represents the validity of each list element. The parent has two children. -1. A non-nullable column of `INT32` elements that indicates the offset to the beginning of each list - in a dense column of elements. +1. A non-nullable column of [`size_type`](#cudfsize_type) elements that indicates the offset to the + beginning of each list in a dense column of elements. 2. A column containing the actual data and optional null mask for all elements of all the lists packed together. @@ -1152,7 +1201,7 @@ a non-nullable column of `INT8` data. The parent column's type is `STRING` and c but its size represents the number of strings in the column, and its null mask represents the validity of each string. To summarize, the strings column children are: -1. A non-nullable column of `INT32` elements that indicates the offset to the beginning of each +1. A non-nullable column of [`size_type`](#cudfsize_type) elements that indicates the offset to the beginning of each string in a dense column of all characters. 2. A non-nullable column of `INT8` elements of all the characters across all the strings packed together. @@ -1264,9 +1313,9 @@ libcudf provides view types for nested column types as well as for the data elem `cudf::strings_column_view` is a view of a strings column, like `cudf::column_view` is a view of any `cudf::column`. `cudf::string_view` is a view of a single string, and therefore `cudf::string_view` is the data type of a `cudf::column` of type `STRING` just like `int32_t` is the -data type for a `cudf::column` of type `INT32`. As it's name implies, this is a read-only object -instance that points to device memory inside the strings column. It's lifespan is the same (or less) -as the column it views. +data type for a `cudf::column` of type [`size_type`](#cudfsize_type). As its name implies, this is a +read-only object instance that points to device memory inside the strings column. It's lifespan is +the same (or less) as the column it views. Use the `column_device_view::element` method to access an individual row element. Like any other column, do not call `element()` on a row that is null. diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index e269d4d2e13..b688bf3d445 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -535,7 +535,9 @@ class argmin_aggregation final : public rolling_aggregation, public groupby_aggr /** * @brief Derived class for specifying a nunique aggregation */ -class nunique_aggregation final : public groupby_aggregation, public reduce_aggregation { +class nunique_aggregation final : public groupby_aggregation, + public reduce_aggregation, + public segmented_reduce_aggregation { public: nunique_aggregation(null_policy null_handling) : aggregation{NUNIQUE}, _null_handling{null_handling} diff --git a/cpp/include/cudf/detail/binaryop.hpp b/cpp/include/cudf/detail/binaryop.hpp index ffd8be971ab..e5609568d10 100644 --- a/cpp/include/cudf/detail/binaryop.hpp +++ b/cpp/include/cudf/detail/binaryop.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,13 +30,12 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr binary_operation( - column_view const& lhs, - column_view const& rhs, - std::string const& ptx, - data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr binary_operation(column_view const& lhs, + column_view const& rhs, + std::string const& ptx, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::binary_operation(scalar const&, column_view const&, binary_operator, @@ -44,13 +43,12 @@ std::unique_ptr binary_operation( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr binary_operation( - scalar const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr binary_operation(scalar const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::binary_operation(column_view const&, scalar const&, binary_operator, @@ -58,13 +56,12 @@ std::unique_ptr binary_operation( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr binary_operation( - column_view const& lhs, - scalar const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr binary_operation(column_view const& lhs, + scalar const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::binary_operation(column_view const&, column_view const&, @@ -72,12 +69,11 @@ std::unique_ptr binary_operation( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr binary_operation( - column_view const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr binary_operation(column_view const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/calendrical_month_sequence.cuh b/cpp/include/cudf/detail/calendrical_month_sequence.cuh index 9dba0ba8961..59fb6758973 100644 --- a/cpp/include/cudf/detail/calendrical_month_sequence.cuh +++ b/cpp/include/cudf/detail/calendrical_month_sequence.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ struct calendrical_month_sequence_functor { scalar const& input, size_type months, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr) { // Return empty column if n = 0 if (n == 0) return cudf::make_empty_column(input.type()); diff --git a/cpp/include/cudf/detail/concatenate.hpp b/cpp/include/cudf/detail/concatenate.hpp index 925029597a6..442814bc4fd 100644 --- a/cpp/include/cudf/detail/concatenate.hpp +++ b/cpp/include/cudf/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,20 +33,18 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr concatenate( - host_span columns_to_concat, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr concatenate(host_span columns_to_concat, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::concatenate(host_span,rmm::mr::device_memory_resource*) * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr concatenate( - host_span tables_to_concat, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
concatenate(host_span tables_to_concat, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp index 8c3f315284d..83395f8fa90 100644 --- a/cpp/include/cudf/detail/copy.hpp +++ b/cpp/include/cudf/detail/copy.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -144,12 +144,11 @@ std::vector split(table_view const& input, * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr shift( - column_view const& input, - size_type offset, - scalar const& fill_value, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr shift(column_view const& input, + size_type offset, + scalar const& fill_value, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Performs segmented shifts for specified values. @@ -184,24 +183,22 @@ std::unique_ptr shift( * * @note If `offset == 0`, a copy of @p segmented_values is returned. */ -std::unique_ptr segmented_shift( - column_view const& segmented_values, - device_span segment_offsets, - size_type offset, - scalar const& fill_value, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr segmented_shift(column_view const& segmented_values, + device_span segment_offsets, + size_type offset, + scalar const& fill_value, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::contiguous_split * * @param stream CUDA stream used for device memory operations and kernel launches. **/ -std::vector contiguous_split( - cudf::table_view const& input, - std::vector const& splits, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::vector contiguous_split(cudf::table_view const& input, + std::vector const& splits, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::pack @@ -210,7 +207,7 @@ std::vector contiguous_split( **/ packed_columns pack(cudf::table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::allocate_like(column_view const&, size_type, mask_allocation_policy, @@ -218,12 +215,11 @@ packed_columns pack(cudf::table_view const& input, * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr allocate_like( - column_view const& input, - size_type size, - mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr allocate_like(column_view const& input, + size_type size, + mask_allocation_policy mask_alloc, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::copy_if_else( column_view const&, column_view const&, @@ -231,12 +227,11 @@ std::unique_ptr allocate_like( * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr copy_if_else( - column_view const& lhs, - column_view const& rhs, - column_view const& boolean_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr copy_if_else(column_view const& lhs, + column_view const& rhs, + column_view const& boolean_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::copy_if_else( scalar const&, column_view const&, @@ -244,12 +239,11 @@ std::unique_ptr copy_if_else( * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr copy_if_else( - scalar const& lhs, - column_view const& rhs, - column_view const& boolean_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr copy_if_else(scalar const& lhs, + column_view const& rhs, + column_view const& boolean_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::copy_if_else( column_view const&, scalar const&, @@ -257,12 +251,11 @@ std::unique_ptr copy_if_else( * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr copy_if_else( - column_view const& lhs, - scalar const& rhs, - column_view const& boolean_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr copy_if_else(column_view const& lhs, + scalar const& rhs, + column_view const& boolean_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::copy_if_else( scalar const&, scalar const&, @@ -270,36 +263,33 @@ std::unique_ptr copy_if_else( * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr copy_if_else( - scalar const& lhs, - scalar const& rhs, - column_view const& boolean_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr copy_if_else(scalar const& lhs, + scalar const& rhs, + column_view const& boolean_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::sample * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
sample( - table_view const& input, - size_type const n, - sample_with_replacement replacement = sample_with_replacement::FALSE, - int64_t const seed = 0, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
sample(table_view const& input, + size_type const n, + sample_with_replacement replacement, + int64_t const seed, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::get_element * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr get_element( - column_view const& input, - size_type index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr get_element(column_view const& input, + size_type index, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::has_nonempty_nulls @@ -320,10 +310,9 @@ bool may_have_nonempty_nulls(column_view const& input, rmm::cuda_stream_view str * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr purge_nonempty_nulls( - column_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr purge_nonempty_nulls(column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh index 6eea72a1e0d..2870a891f87 100644 --- a/cpp/include/cudf/detail/copy_if.cuh +++ b/cpp/include/cudf/detail/copy_if.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -229,14 +229,13 @@ struct DeviceType()>> { template struct scatter_gather_functor { template ()>* = nullptr> - std::unique_ptr operator()( - cudf::column_view const& input, - cudf::size_type const& output_size, - cudf::size_type const* block_offsets, - Filter filter, - cudf::size_type per_thread, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::unique_ptr operator()(cudf::column_view const& input, + cudf::size_type const& output_size, + cudf::size_type const* block_offsets, + Filter filter, + cudf::size_type per_thread, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto output_column = cudf::detail::allocate_like( input, output_size, cudf::mask_allocation_policy::RETAIN, stream, mr); @@ -277,14 +276,13 @@ struct scatter_gather_functor { template () and !cudf::is_fixed_point()>* = nullptr> - std::unique_ptr operator()( - cudf::column_view const& input, - cudf::size_type const& output_size, - cudf::size_type const*, - Filter filter, - cudf::size_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::unique_ptr operator()(cudf::column_view const& input, + cudf::size_type const& output_size, + cudf::size_type const*, + Filter filter, + cudf::size_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { rmm::device_uvector indices(output_size, stream); @@ -320,11 +318,10 @@ struct scatter_gather_functor { * @return unique_ptr
The table generated from filtered `input`. */ template -std::unique_ptr
copy_if( - table_view const& input, - Filter filter, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr
copy_if(table_view const& input, + Filter filter, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); diff --git a/cpp/include/cudf/detail/copy_if_else.cuh b/cpp/include/cudf/detail/copy_if_else.cuh index b20753239ab..083b12edbf8 100644 --- a/cpp/include/cudf/detail/copy_if_else.cuh +++ b/cpp/include/cudf/detail/copy_if_else.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -145,15 +145,14 @@ __launch_bounds__(block_size) __global__ * by `filter[i]` */ template -std::unique_ptr copy_if_else( - bool nullable, - LeftIter lhs_begin, - LeftIter lhs_end, - RightIter rhs, - FilterFn filter, - cudf::data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr copy_if_else(bool nullable, + LeftIter lhs_begin, + LeftIter lhs_end, + RightIter rhs, + FilterFn filter, + cudf::data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // This is the type of the thrust::optional element in the passed iterators using Element = typename thrust::iterator_traits::value_type::value_type; diff --git a/cpp/include/cudf/detail/copy_range.cuh b/cpp/include/cudf/detail/copy_range.cuh index 22714e97dfa..0d5aa509e08 100644 --- a/cpp/include/cudf/detail/copy_range.cuh +++ b/cpp/include/cudf/detail/copy_range.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -203,14 +203,13 @@ void copy_range_in_place(column_view const& source, * @param stream CUDA stream used for device memory operations and kernel launches. * @return std::unique_ptr The result target column */ -std::unique_ptr copy_range( - column_view const& source, - column_view const& target, - size_type source_begin, - size_type source_end, - size_type target_begin, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr copy_range(column_view const& source, + column_view const& target, + size_type source_begin, + size_type source_end, + size_type target_begin, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp index c2e3c32b65f..c5160958165 100644 --- a/cpp/include/cudf/detail/datetime.hpp +++ b/cpp/include/cudf/detail/datetime.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,70 +29,63 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr extract_year( - cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr extract_year(cudf::column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::extract_month(cudf::column_view const&, rmm::mr::device_memory_resource *) * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr extract_month( - cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr extract_month(cudf::column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::extract_day(cudf::column_view const&, rmm::mr::device_memory_resource *) * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr extract_day( - cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr extract_day(cudf::column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::extract_weekday(cudf::column_view const&, rmm::mr::device_memory_resource *) * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr extract_weekday( - cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr extract_weekday(cudf::column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::extract_hour(cudf::column_view const&, rmm::mr::device_memory_resource *) * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr extract_hour( - cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr extract_hour(cudf::column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::extract_minute(cudf::column_view const&, rmm::mr::device_memory_resource *) * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr extract_minute( - cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr extract_minute(cudf::column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::extract_second(cudf::column_view const&, rmm::mr::device_memory_resource *) * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr extract_second( - cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr extract_second(cudf::column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::extract_millisecond_fraction(cudf::column_view const&, @@ -100,10 +93,9 @@ std::unique_ptr extract_second( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr extract_millisecond_fraction( - cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr extract_millisecond_fraction(cudf::column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::extract_microsecond_fraction(cudf::column_view const&, @@ -111,10 +103,9 @@ std::unique_ptr extract_millisecond_fraction( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr extract_microsecond_fraction( - cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr extract_microsecond_fraction(cudf::column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::extract_nanosecond_fraction(cudf::column_view const&, @@ -122,30 +113,27 @@ std::unique_ptr extract_microsecond_fraction( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr extract_nanosecond_fraction( - cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr extract_nanosecond_fraction(cudf::column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::mr::device_memory_resource *) * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr last_day_of_month( - cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr last_day_of_month(cudf::column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::day_of_year(cudf::column_view const&, rmm::mr::device_memory_resource *) * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr day_of_year( - cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr day_of_year(cudf::column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::add_calendrical_months(cudf::column_view const&, cudf::column_view const&, @@ -153,11 +141,10 @@ std::unique_ptr day_of_year( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr add_calendrical_months( - cudf::column_view const& timestamps, - cudf::column_view const& months, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr add_calendrical_months(cudf::column_view const& timestamps, + cudf::column_view const& months, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::add_calendrical_months(cudf::column_view const&, cudf::scalar const&, @@ -165,26 +152,23 @@ std::unique_ptr add_calendrical_months( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr add_calendrical_months( - cudf::column_view const& timestamps, - cudf::scalar const& months, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr add_calendrical_months(cudf::column_view const& timestamps, + cudf::scalar const& months, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::is_leap_year(cudf::column_view const&, rmm::mr::device_memory_resource *) * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr is_leap_year( - cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr is_leap_year(cudf::column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); -std::unique_ptr extract_quarter( - cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr extract_quarter(cudf::column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace datetime diff --git a/cpp/include/cudf/detail/fill.hpp b/cpp/include/cudf/detail/fill.hpp index e34acfff6b9..caaccfb4851 100644 --- a/cpp/include/cudf/detail/fill.hpp +++ b/cpp/include/cudf/detail/fill.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,13 +43,12 @@ void fill_in_place(mutable_column_view& destination, * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr fill( - column_view const& input, - size_type begin, - size_type end, - scalar const& value, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr fill(column_view const& input, + size_type begin, + size_type end, + scalar const& value, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 57d834e6277..5460a0e5a76 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -583,10 +583,12 @@ void gather_bitmask(table_view const& source, std::transform(target.begin(), target.end(), target_masks.begin(), [](auto const& col) { return col->mutable_view().null_mask(); }); - auto d_target_masks = make_device_uvector_async(target_masks, stream); + auto d_target_masks = + make_device_uvector_async(target_masks, stream, rmm::mr::get_current_device_resource()); auto const device_source = table_device_view::create(source, stream); - auto d_valid_counts = make_zeroed_device_uvector_async(target.size(), stream); + auto d_valid_counts = make_zeroed_device_uvector_async( + target.size(), stream, rmm::mr::get_current_device_resource()); // Dispatch operation enum to get implementation auto const impl = [op]() { @@ -647,13 +649,12 @@ void gather_bitmask(table_view const& source, * @return cudf::table Result of the gather */ template -std::unique_ptr
gather( - table_view const& source_table, - MapIterator gather_map_begin, - MapIterator gather_map_end, - out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr
gather(table_view const& source_table, + MapIterator gather_map_begin, + MapIterator gather_map_end, + out_of_bounds_policy bounds_policy, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { std::vector> destination_columns; diff --git a/cpp/include/cudf/detail/gather.hpp b/cpp/include/cudf/detail/gather.hpp index 9d61a8de184..034eb6c1282 100644 --- a/cpp/include/cudf/detail/gather.hpp +++ b/cpp/include/cudf/detail/gather.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,13 +61,12 @@ enum class negative_index_policy : bool { ALLOWED, NOT_ALLOWED }; * @param[in] mr Device memory resource used to allocate the returned table's device memory * @return Result of the gather */ -std::unique_ptr
gather( - table_view const& source_table, - column_view const& gather_map, - out_of_bounds_policy bounds_policy, - negative_index_policy neg_indices, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
gather(table_view const& source_table, + column_view const& gather_map, + out_of_bounds_policy bounds_policy, + negative_index_policy neg_indices, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::detail::gather(table_view const&,column_view const&,table_view @@ -76,13 +75,12 @@ std::unique_ptr
gather( * * @throws cudf::logic_error if `gather_map` span size is larger than max of `size_type`. */ -std::unique_ptr
gather( - table_view const& source_table, - device_span const gather_map, - out_of_bounds_policy bounds_policy, - negative_index_policy neg_indices, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
gather(table_view const& source_table, + device_span const gather_map, + out_of_bounds_policy bounds_policy, + negative_index_policy neg_indices, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp index 9e64048b7b4..e081a626c75 100644 --- a/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp +++ b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,12 +36,11 @@ namespace detail { * @param stream CUDA stream used for device memory operations and kernel launches. * @param[in] mr Device memory resource used to allocate device memory of the returned column. */ -std::unique_ptr group_replace_nulls( - cudf::column_view const& grouped_value, - device_span group_labels, - cudf::replace_policy replace_policy, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr group_replace_nulls(cudf::column_view const& grouped_value, + device_span group_labels, + cudf::replace_policy replace_policy, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace groupby diff --git a/cpp/include/cudf/detail/groupby/sort_helper.hpp b/cpp/include/cudf/detail/groupby/sort_helper.hpp index e2510d75a83..663ff44ca56 100644 --- a/cpp/include/cudf/detail/groupby/sort_helper.hpp +++ b/cpp/include/cudf/detail/groupby/sort_helper.hpp @@ -85,10 +85,9 @@ struct sort_groupby_helper { * @param values The value column to group and sort * @return the sorted and grouped column */ - std::unique_ptr sorted_values( - column_view const& values, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + std::unique_ptr sorted_values(column_view const& values, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Groups a column of values according to `keys` @@ -100,28 +99,25 @@ struct sort_groupby_helper { * @param values The value column to group * @return the grouped column */ - std::unique_ptr grouped_values( - column_view const& values, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + std::unique_ptr grouped_values(column_view const& values, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Get a table of sorted unique keys * * @return a new table in which each row is a unique row in the sorted key table. */ - std::unique_ptr
unique_keys( - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + std::unique_ptr
unique_keys(rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Get a table of sorted keys * * @return a new table containing the sorted keys. */ - std::unique_ptr
sorted_keys( - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + std::unique_ptr
sorted_keys(rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Get the number of groups in `keys` diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp index b7469d80a8d..771b3e150ec 100644 --- a/cpp/include/cudf/detail/hashing.hpp +++ b/cpp/include/cudf/detail/hashing.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,29 +31,25 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr hash( - table_view const& input, - hash_id hash_function = hash_id::HASH_MURMUR3, - uint32_t seed = cudf::DEFAULT_HASH_SEED, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr hash(table_view const& input, + hash_id hash_function, + uint32_t seed, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); -std::unique_ptr murmur_hash3_32( - table_view const& input, - uint32_t seed = cudf::DEFAULT_HASH_SEED, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr murmur_hash3_32(table_view const& input, + uint32_t seed, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource* mr); -std::unique_ptr spark_murmur_hash3_32( - table_view const& input, - uint32_t seed = cudf::DEFAULT_HASH_SEED, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr spark_murmur_hash3_32(table_view const& input, + uint32_t seed, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource* mr); -std::unique_ptr md5_hash( - table_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr md5_hash(table_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /* Copyright 2005-2014 Daniel James. * diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp index 25ce5b09eb8..7117517487c 100644 --- a/cpp/include/cudf/detail/interop.hpp +++ b/cpp/include/cudf/detail/interop.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,20 +40,18 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
from_dlpack( - DLManagedTensor const* managed_tensor, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
from_dlpack(DLManagedTensor const* managed_tensor, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::to_dlpack * * @param stream CUDA stream used for device memory operations and kernel launches. */ -DLManagedTensor* to_dlpack( - table_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +DLManagedTensor* to_dlpack(table_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); // Creating arrow as per given type_id and buffer arguments template @@ -111,19 +109,18 @@ data_type arrow_to_cudf_type(arrow::DataType const& arrow_type); * @param stream CUDA stream used for device memory operations and kernel launches. */ std::shared_ptr to_arrow(table_view input, - std::vector const& metadata = {}, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - arrow::MemoryPool* ar_mr = arrow::default_memory_pool()); + std::vector const& metadata, + rmm::cuda_stream_view stream, + arrow::MemoryPool* ar_mr); /** * @copydoc cudf::arrow_to_cudf * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
from_arrow( - arrow::Table const& input_table, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
from_arrow(arrow::Table const& input_table, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/label_bins.hpp b/cpp/include/cudf/detail/label_bins.hpp index f556c81c371..7f3cf033e66 100644 --- a/cpp/include/cudf/detail/label_bins.hpp +++ b/cpp/include/cudf/detail/label_bins.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,14 +45,13 @@ namespace detail { * * @param stream Stream view on which to allocate resources and queue execution. */ -std::unique_ptr label_bins( - column_view const& input, - column_view const& left_edges, - inclusive left_inclusive, - column_view const& right_edges, - inclusive right_inclusive, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr label_bins(column_view const& input, + column_view const& left_edges, + inclusive left_inclusive, + column_view const& right_edges, + inclusive right_inclusive, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** @} */ // end of group } // namespace detail diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index cb9ced6fc28..3ff3bb4cf3c 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -114,13 +114,12 @@ __global__ void offset_bitmask_binop(Binop op, * @param stream CUDA stream used for device memory operations and kernel launches */ template -std::pair bitmask_binop( - Binop op, - host_span masks, - host_span masks_begin_bits, - size_type mask_size_bits, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::pair bitmask_binop(Binop op, + host_span masks, + host_span masks_begin_bits, + size_type mask_size_bits, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto dest_mask = rmm::device_buffer{bitmask_allocation_size_bytes(mask_size_bits), stream, mr}; auto null_count = @@ -426,7 +425,8 @@ std::vector segmented_count_bits(bitmask_type const* bitmask, // Construct a contiguous host buffer of indices and copy to device. auto const h_indices = std::vector(indices_begin, indices_end); - auto const d_indices = make_device_uvector_async(h_indices, stream); + auto const d_indices = + make_device_uvector_async(h_indices, stream, rmm::mr::get_current_device_resource()); // Compute the bit counts over each segment. auto first_bit_indices_begin = thrust::make_transform_iterator( diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index a0e04d7b215..7f1b15893c5 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,11 +31,10 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -rmm::device_buffer create_null_mask( - size_type size, - mask_state state, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +rmm::device_buffer create_null_mask(size_type size, + mask_state state, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::set_null_mask(bitmask_type*, size_type, size_type, bool) @@ -209,22 +208,20 @@ std::vector segmented_null_count(bitmask_type const* bitmask, * * @param stream CUDA stream used for device memory operations and kernel launches. */ -rmm::device_buffer copy_bitmask( - bitmask_type const* mask, - size_type begin_bit, - size_type end_bit, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +rmm::device_buffer copy_bitmask(bitmask_type const* mask, + size_type begin_bit, + size_type end_bit, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::copy_bitmask(column_view const& view, rmm::mr::device_memory_resource*) * * @param stream CUDA stream used for device memory operations and kernel launches. */ -rmm::device_buffer copy_bitmask( - column_view const& view, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +rmm::device_buffer copy_bitmask(column_view const& view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc bitmask_and(host_span, host_span const, @@ -232,32 +229,29 @@ rmm::device_buffer copy_bitmask( * * @param stream CUDA stream used for device memory operations and kernel launches */ -std::pair bitmask_and( - host_span masks, - host_span masks_begin_bits, - size_type mask_size_bits, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::pair bitmask_and(host_span masks, + host_span masks_begin_bits, + size_type mask_size_bits, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::bitmask_and * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::pair bitmask_and( - table_view const& view, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::pair bitmask_and(table_view const& view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::bitmask_or * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::pair bitmask_or( - table_view const& view, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::pair bitmask_or(table_view const& view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Performs a bitwise AND of the specified bitmasks, diff --git a/cpp/include/cudf/detail/repeat.hpp b/cpp/include/cudf/detail/repeat.hpp index 69d9705556f..883d5d158fb 100644 --- a/cpp/include/cudf/detail/repeat.hpp +++ b/cpp/include/cudf/detail/repeat.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,12 +32,11 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
repeat( - table_view const& input_table, - column_view const& count, - bool check_count, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
repeat(table_view const& input_table, + column_view const& count, + bool check_count, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::repeat(table_view const&, size_type, @@ -45,11 +44,10 @@ std::unique_ptr
repeat( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
repeat( - table_view const& input_table, - size_type count, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
repeat(table_view const& input_table, + size_type count, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/replace.hpp b/cpp/include/cudf/detail/replace.hpp index 9721c6e9849..da83f7b285d 100644 --- a/cpp/include/cudf/detail/replace.hpp +++ b/cpp/include/cudf/detail/replace.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,11 +31,10 @@ namespace detail { * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr replace_nulls( - column_view const& input, - cudf::column_view const& replacement, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr replace_nulls(column_view const& input, + cudf::column_view const& replacement, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::replace_nulls(column_view const&, scalar const&, @@ -43,11 +42,10 @@ std::unique_ptr replace_nulls( * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr replace_nulls( - column_view const& input, - scalar const& replacement, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr replace_nulls(column_view const& input, + scalar const& replacement, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::replace_nulls(column_view const&, replace_policy const&, @@ -55,11 +53,10 @@ std::unique_ptr replace_nulls( * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr replace_nulls( - column_view const& input, - replace_policy const& replace_policy, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr replace_nulls(column_view const& input, + replace_policy const& replace_policy, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::replace_nans(column_view const&, column_view const&, @@ -67,11 +64,10 @@ std::unique_ptr replace_nulls( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr replace_nans( - column_view const& input, - column_view const& replacement, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr replace_nans(column_view const& input, + column_view const& replacement, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::replace_nans(column_view const&, scalar const&, @@ -79,33 +75,30 @@ std::unique_ptr replace_nans( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr replace_nans( - column_view const& input, - scalar const& replacement, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr replace_nans(column_view const& input, + scalar const& replacement, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::find_and_replace_all * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr find_and_replace_all( - column_view const& input_col, - column_view const& values_to_replace, - column_view const& replacement_values, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr find_and_replace_all(column_view const& input_col, + column_view const& values_to_replace, + column_view const& replacement_values, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::normalize_nans_and_zeros * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr normalize_nans_and_zeros( - column_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr normalize_nans_and_zeros(column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/reshape.hpp b/cpp/include/cudf/detail/reshape.hpp index ccffcbc61df..5ab53690a23 100644 --- a/cpp/include/cudf/detail/reshape.hpp +++ b/cpp/include/cudf/detail/reshape.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,21 +30,19 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches */ -std::unique_ptr
tile( - table_view const& input, - size_type count, - rmm::cuda_stream_view, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
tile(table_view const& input, + size_type count, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::interleave_columns * * @param stream CUDA stream used for device memory operations and kernel launches */ -std::unique_ptr interleave_columns( - table_view const& input, - rmm::cuda_stream_view, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr interleave_columns(table_view const& input, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/rolling.hpp b/cpp/include/cudf/detail/rolling.hpp index dcaece2bafc..da90217c254 100644 --- a/cpp/include/cudf/detail/rolling.hpp +++ b/cpp/include/cudf/detail/rolling.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,14 +39,13 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr rolling_window( - column_view const& input, - column_view const& preceding_window, - column_view const& following_window, - size_type min_periods, - rolling_aggregation const& agg, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr rolling_window(column_view const& input, + column_view const& preceding_window, + column_view const& following_window, + size_type min_periods, + rolling_aggregation const& agg, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/round.hpp b/cpp/include/cudf/detail/round.hpp index 1e5612919f4..cdfc7caef37 100644 --- a/cpp/include/cudf/detail/round.hpp +++ b/cpp/include/cudf/detail/round.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,12 +31,11 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr round( - column_view const& input, - int32_t decimal_places, - rounding_method method, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr round(column_view const& input, + int32_t decimal_places, + rounding_method method, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index c8b17e22df2..dbf7bfa9527 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -386,13 +386,12 @@ struct column_scatterer_impl { * @return Result of scattering values from source to target */ template -std::unique_ptr
scatter( - table_view const& source, - MapIterator scatter_map_begin, - MapIterator scatter_map_end, - table_view const& target, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr
scatter(table_view const& source, + MapIterator scatter_map_begin, + MapIterator scatter_map_end, + table_view const& target, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp index 7c4b04537ea..39ae4fe1944 100644 --- a/cpp/include/cudf/detail/scatter.hpp +++ b/cpp/include/cudf/detail/scatter.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,12 +59,11 @@ namespace detail { * @param mr Device memory resource used to allocate the returned table's device memory * @return Result of scattering values from source to target */ -std::unique_ptr
scatter( - table_view const& source, - column_view const& scatter_map, - table_view const& target, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
scatter(table_view const& source, + column_view const& scatter_map, + table_view const& target, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::detail::scatter(table_view const&,column_view const&,table_view @@ -72,12 +71,11 @@ std::unique_ptr
scatter( * * @throws cudf::logic_error if `scatter_map` span size is larger than max of `size_type`. */ -std::unique_ptr
scatter( - table_view const& source, - device_span const scatter_map, - table_view const& target, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
scatter(table_view const& source, + device_span const scatter_map, + table_view const& target, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Scatters a row of scalar values into a copy of the target table @@ -108,12 +106,11 @@ std::unique_ptr
scatter( * @param mr Device memory resource used to allocate the returned table's device memory * @return Result of scattering values from source to target */ -std::unique_ptr
scatter( - std::vector> const& source, - column_view const& indices, - table_view const& target, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
scatter(std::vector> const& source, + column_view const& indices, + table_view const& target, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::boolean_mask_scatter( @@ -123,12 +120,11 @@ std::unique_ptr
scatter( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
boolean_mask_scatter( - table_view const& source, - table_view const& target, - column_view const& boolean_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
boolean_mask_scatter(table_view const& source, + table_view const& target, + column_view const& boolean_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::boolean_mask_scatter( @@ -144,7 +140,7 @@ std::unique_ptr
boolean_mask_scatter( table_view const& target, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/search.hpp b/cpp/include/cudf/detail/search.hpp index 56d41fd635c..4c4ad7834f4 100644 --- a/cpp/include/cudf/detail/search.hpp +++ b/cpp/include/cudf/detail/search.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,12 +89,11 @@ std::unique_ptr contains(column_view const& haystack, * @param mr Device memory resource used to allocate the returned vector * @return A vector of bools indicating if each row in `needles` has matching rows in `haystack` */ -rmm::device_uvector contains( - table_view const& haystack, - table_view const& needles, - null_equality compare_nulls, - nan_equality compare_nans, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +rmm::device_uvector contains(table_view const& haystack, + table_view const& needles, + null_equality compare_nulls, + nan_equality compare_nans, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace cudf::detail diff --git a/cpp/include/cudf/detail/sequence.hpp b/cpp/include/cudf/detail/sequence.hpp index 4a9bf5c74e1..3c3d1d0ed9e 100644 --- a/cpp/include/cudf/detail/sequence.hpp +++ b/cpp/include/cudf/detail/sequence.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,12 +32,11 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr sequence( - size_type size, - scalar const& init, - scalar const& step, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr sequence(size_type size, + scalar const& init, + scalar const& step, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::sequence(size_type size, scalar const& init, @@ -46,11 +45,10 @@ std::unique_ptr sequence( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr sequence( - size_type size, - scalar const& init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr sequence(size_type size, + scalar const& init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::calendrical_month_sequence(size_type size, @@ -60,12 +58,11 @@ std::unique_ptr sequence( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr calendrical_month_sequence( - size_type size, - scalar const& init, - size_type months, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr calendrical_month_sequence(size_type size, + scalar const& init, + size_type months, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp index e725718ed22..e0fc7b71cd9 100644 --- a/cpp/include/cudf/detail/stream_compaction.hpp +++ b/cpp/include/cudf/detail/stream_compaction.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,12 +32,11 @@ namespace detail { * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
drop_nulls( - table_view const& input, - std::vector const& keys, - cudf::size_type keep_threshold, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
drop_nulls(table_view const& input, + std::vector const& keys, + cudf::size_type keep_threshold, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::drop_nans(table_view const&, std::vector const&, @@ -45,50 +44,46 @@ std::unique_ptr
drop_nulls( * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
drop_nans( - table_view const& input, - std::vector const& keys, - cudf::size_type keep_threshold, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
drop_nans(table_view const& input, + std::vector const& keys, + cudf::size_type keep_threshold, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::apply_boolean_mask * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
apply_boolean_mask( - table_view const& input, - column_view const& boolean_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
apply_boolean_mask(table_view const& input, + column_view const& boolean_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::unique * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
unique( - table_view const& input, - std::vector const& keys, - duplicate_keep_option keep, - null_equality nulls_equal = null_equality::EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
unique(table_view const& input, + std::vector const& keys, + duplicate_keep_option keep, + null_equality nulls_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::distinct * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
distinct( - table_view const& input, - std::vector const& keys, - duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, - null_equality nulls_equal = null_equality::EQUAL, - nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
distinct(table_view const& input, + std::vector const& keys, + duplicate_keep_option keep, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Create a new table without duplicate rows. @@ -110,14 +105,13 @@ std::unique_ptr
distinct( * @param mr Device memory resource used to allocate the returned table * @return A table containing the resulting distinct rows */ -std::unique_ptr
stable_distinct( - table_view const& input, - std::vector const& keys, - duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, - null_equality nulls_equal = null_equality::EQUAL, - nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
stable_distinct(table_view const& input, + std::vector const& keys, + duplicate_keep_option keep, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Create a column of indices of all distinct rows in the input table. @@ -133,13 +127,12 @@ std::unique_ptr
stable_distinct( * @param mr Device memory resource used to allocate the returned vector * @return A device_uvector containing the result indices */ -rmm::device_uvector get_distinct_indices( - table_view const& input, - duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, - null_equality nulls_equal = null_equality::EQUAL, - nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +rmm::device_uvector get_distinct_indices(table_view const& input, + duplicate_keep_option keep, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::unique_count(column_view const&, null_policy, nan_policy) @@ -157,8 +150,8 @@ cudf::size_type unique_count(column_view const& input, * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ cudf::size_type unique_count(table_view const& input, - null_equality nulls_equal = null_equality::EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream()); + null_equality nulls_equal, + rmm::cuda_stream_view stream); /** * @copydoc cudf::distinct_count(column_view const&, null_policy, nan_policy) @@ -176,8 +169,8 @@ cudf::size_type distinct_count(column_view const& input, * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ cudf::size_type distinct_count(table_view const& input, - null_equality nulls_equal = null_equality::EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream()); + null_equality nulls_equal, + rmm::cuda_stream_view stream); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp index 4a708d2fb51..5fcc331a382 100644 --- a/cpp/include/cudf/detail/structs/utilities.hpp +++ b/cpp/include/cudf/detail/structs/utilities.hpp @@ -175,7 +175,7 @@ class flattened_table { std::vector const& null_precedence, column_nullability nullability, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr); /** * @brief Superimpose nulls from a given null mask into the input column, using bitwise AND. @@ -222,9 +222,7 @@ class flattened_table { * to be kept alive. */ [[nodiscard]] std::pair push_down_nulls( - column_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); /** * @brief Push down nulls from columns of the input table into their children columns, using @@ -251,9 +249,7 @@ class flattened_table { * to be kept alive. */ [[nodiscard]] std::pair push_down_nulls( - table_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + table_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); /** * @brief Checks if a column or any of its children is a struct column with structs that are null. diff --git a/cpp/include/cudf/detail/tdigest/tdigest.hpp b/cpp/include/cudf/detail/tdigest/tdigest.hpp index 9df3f9daf3f..d9fb0efed45 100644 --- a/cpp/include/cudf/detail/tdigest/tdigest.hpp +++ b/cpp/include/cudf/detail/tdigest/tdigest.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -131,15 +131,14 @@ std::unique_ptr group_merge_tdigest(column_view const& values, * * @returns The constructed tdigest column. */ -std::unique_ptr make_tdigest_column( - size_type num_rows, - std::unique_ptr&& centroid_means, - std::unique_ptr&& centroid_weights, - std::unique_ptr&& tdigest_offsets, - std::unique_ptr&& min_values, - std::unique_ptr&& max_values, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr make_tdigest_column(size_type num_rows, + std::unique_ptr&& centroid_means, + std::unique_ptr&& centroid_weights, + std::unique_ptr&& tdigest_offsets, + std::unique_ptr&& min_values, + std::unique_ptr&& max_values, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Create an empty tdigest column. @@ -151,9 +150,8 @@ std::unique_ptr make_tdigest_column( * * @returns An empty tdigest column. */ -std::unique_ptr make_empty_tdigest_column( - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr make_empty_tdigest_column(rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Create an empty tdigest scalar. @@ -165,9 +163,8 @@ std::unique_ptr make_empty_tdigest_column( * * @returns An empty tdigest scalar. */ -std::unique_ptr make_empty_tdigest_scalar( - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr make_empty_tdigest_scalar(rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Generate a tdigest column from a grouped, sorted set of numeric input values. diff --git a/cpp/include/cudf/detail/timezone.cuh b/cpp/include/cudf/detail/timezone.cuh new file mode 100644 index 00000000000..830ee1a7fa6 --- /dev/null +++ b/cpp/include/cudf/detail/timezone.cuh @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace cudf::detail { + +/** + * @brief Returns the UT offset for a given date and given timezone table. + * + * @param transition_times Transition times; trailing `solar_cycle_entry_count` entries are used for + * all times beyond the one covered by the TZif file + * @param offsets Time offsets in specific intervals; trailing `solar_cycle_entry_count` entries are + * used for all times beyond the one covered by the TZif file + * @param ts ORC timestamp + * + * @return offset from UT, in seconds + */ +inline __device__ duration_s get_ut_offset(table_device_view tz_table, timestamp_s ts) +{ + if (tz_table.num_rows() == 0) { return duration_s{0}; } + + cudf::device_span transition_times(tz_table.column(0).head(), + static_cast(tz_table.num_rows())); + + auto const ts_ttime_it = [&]() { + auto last_less_equal = [](auto begin, auto end, auto value) { + auto const first_larger = thrust::upper_bound(thrust::seq, begin, end, value); + // Return start of the range if all elements are larger than the value + if (first_larger == begin) return begin; + // Element before the first larger element is the last one less or equal + return first_larger - 1; + }; + + auto const file_entry_end = + transition_times.begin() + (transition_times.size() - solar_cycle_entry_count); + + if (ts <= *(file_entry_end - 1)) { + // Search the file entries if the timestamp is in range + return last_less_equal(transition_times.begin(), file_entry_end, ts); + } else { + auto project_to_cycle = [](timestamp_s ts) { + // Years divisible by four are leap years + // Exceptions are years divisible by 100, but not divisible by 400 + static constexpr int32_t num_leap_years_in_cycle = + solar_cycle_years / 4 - (solar_cycle_years / 100 - solar_cycle_years / 400); + static constexpr duration_s cycle_s = cuda::std::chrono::duration_cast( + duration_D{365 * solar_cycle_years + num_leap_years_in_cycle}); + return timestamp_s{(ts.time_since_epoch() + cycle_s) % cycle_s}; + }; + // Search the 400-year cycle if outside of the file entries range + return last_less_equal(file_entry_end, transition_times.end(), project_to_cycle(ts)); + } + }(); + + return tz_table.column(1).element(ts_ttime_it - transition_times.begin()); +} + +} // namespace cudf::detail diff --git a/cpp/include/cudf/detail/timezone.hpp b/cpp/include/cudf/detail/timezone.hpp new file mode 100644 index 00000000000..f7f97c0a7c2 --- /dev/null +++ b/cpp/include/cudf/detail/timezone.hpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +namespace cudf::detail { + +/** + * @copydoc cudf::make_timezone_transition_table(std::optional, std::string_view, + * rmm::mr::device_memory_resource*) + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr
make_timezone_transition_table( + std::optional tzif_dir, + std::string_view timezone_name, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +} // namespace cudf::detail diff --git a/cpp/include/cudf/detail/transform.hpp b/cpp/include/cudf/detail/transform.hpp index 8e19ebb8da7..5b64f61f11a 100644 --- a/cpp/include/cudf/detail/transform.hpp +++ b/cpp/include/cudf/detail/transform.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,24 +29,22 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr transform( - column_view const& input, - std::string const& unary_udf, - data_type output_type, - bool is_ptx, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr transform(column_view const& input, + std::string const& unary_udf, + data_type output_type, + bool is_ptx, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::compute_column * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr compute_column( - table_view const table, - ast::operation const& expr, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr compute_column(table_view const table, + ast::operation const& expr, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::nans_to_nulls @@ -54,9 +52,7 @@ std::unique_ptr compute_column( * @param stream CUDA stream used for device memory operations and kernel launches. */ std::pair, size_type> nans_to_nulls( - column_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::bools_to_mask @@ -64,9 +60,7 @@ std::pair, size_type> nans_to_nulls( * @param stream CUDA stream used for device memory operations and kernel launches. */ std::pair, cudf::size_type> bools_to_mask( - column_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::encode @@ -74,42 +68,37 @@ std::pair, cudf::size_type> bools_to_mask( * @param stream CUDA stream used for device memory operations and kernel launches. */ std::pair, std::unique_ptr> encode( - cudf::table_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + cudf::table_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::one_hot_encode * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::pair, table_view> one_hot_encode( - column_view const& input, - column_view const& categories, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::pair, table_view> one_hot_encode(column_view const& input, + column_view const& categories, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::mask_to_bools * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr mask_to_bools( - bitmask_type const* null_mask, - size_type begin_bit, - size_type end_bit, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr mask_to_bools(bitmask_type const* null_mask, + size_type begin_bit, + size_type end_bit, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::row_bit_count * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr row_bit_count( - table_view const& t, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr row_bit_count(table_view const& t, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/transpose.hpp b/cpp/include/cudf/detail/transpose.hpp index 0470d625edc..d0be51860b2 100644 --- a/cpp/include/cudf/detail/transpose.hpp +++ b/cpp/include/cudf/detail/transpose.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,10 +28,9 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::pair, table_view> transpose( - table_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::pair, table_view> transpose(table_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp index b7ecedc1489..3fbdf4a5a8f 100644 --- a/cpp/include/cudf/detail/unary.hpp +++ b/cpp/include/cudf/detail/unary.hpp @@ -45,13 +45,12 @@ namespace detail { */ template -std::unique_ptr true_if( - InputIterator begin, - InputIterator end, - size_type size, - Predicate p, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr true_if(InputIterator begin, + InputIterator end, + size_type size, + Predicate p, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto output = make_numeric_column(data_type(type_id::BOOL8), size, mask_state::UNALLOCATED, stream, mr); @@ -68,52 +67,47 @@ std::unique_ptr true_if( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr unary_operation( - cudf::column_view const& input, - cudf::unary_operator op, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr unary_operation(cudf::column_view const& input, + cudf::unary_operator op, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::is_valid * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr is_valid( - cudf::column_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr is_valid(cudf::column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::cast * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr cast( - column_view const& input, - data_type type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr cast(column_view const& input, + data_type type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::is_nan * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr is_nan( - cudf::column_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr is_nan(cudf::column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::is_not_nan * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr is_not_nan( - cudf::column_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr is_not_nan(cudf::column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp index 75df0d92d0a..c446a7b5148 100644 --- a/cpp/include/cudf/detail/utilities/vector_factories.hpp +++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp @@ -48,10 +48,9 @@ namespace detail { * @return A device_uvector containing zeros */ template -rmm::device_uvector make_zeroed_device_uvector_async( - std::size_t size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +rmm::device_uvector make_zeroed_device_uvector_async(std::size_t size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { rmm::device_uvector ret(size, stream, mr); CUDF_CUDA_TRY(cudaMemsetAsync(ret.data(), 0, size * sizeof(T), stream.value())); @@ -70,10 +69,9 @@ rmm::device_uvector make_zeroed_device_uvector_async( * @return A device_uvector containing zeros */ template -rmm::device_uvector make_zeroed_device_uvector_sync( - std::size_t size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +rmm::device_uvector make_zeroed_device_uvector_sync(std::size_t size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { rmm::device_uvector ret(size, stream, mr); CUDF_CUDA_TRY(cudaMemsetAsync(ret.data(), 0, size * sizeof(T), stream.value())); @@ -94,10 +92,9 @@ rmm::device_uvector make_zeroed_device_uvector_sync( * @return A device_uvector containing the copied data */ template -rmm::device_uvector make_device_uvector_async( - host_span source_data, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +rmm::device_uvector make_device_uvector_async(host_span source_data, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { rmm::device_uvector ret(source_data.size(), stream, mr); CUDF_CUDA_TRY(cudaMemcpyAsync(ret.data(), @@ -126,9 +123,7 @@ template < std::enable_if_t< std::is_convertible_v>>* = nullptr> rmm::device_uvector make_device_uvector_async( - Container const& c, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + Container const& c, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { return make_device_uvector_async(host_span{c}, stream, mr); } @@ -146,10 +141,9 @@ rmm::device_uvector make_device_uvector_async( * @return A device_uvector containing the copied data */ template -rmm::device_uvector make_device_uvector_async( - device_span source_data, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +rmm::device_uvector make_device_uvector_async(device_span source_data, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { rmm::device_uvector ret(source_data.size(), stream, mr); CUDF_CUDA_TRY(cudaMemcpyAsync(ret.data(), @@ -178,9 +172,7 @@ template < std::enable_if_t< std::is_convertible_v>>* = nullptr> rmm::device_uvector make_device_uvector_async( - Container const& c, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + Container const& c, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { return make_device_uvector_async( device_span{c}, stream, mr); @@ -199,10 +191,9 @@ rmm::device_uvector make_device_uvector_async( * @return A device_uvector containing the copied data */ template -rmm::device_uvector make_device_uvector_sync( - host_span source_data, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +rmm::device_uvector make_device_uvector_sync(host_span source_data, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto ret = make_device_uvector_async(source_data, stream, mr); stream.synchronize(); @@ -227,9 +218,7 @@ template < std::enable_if_t< std::is_convertible_v>>* = nullptr> rmm::device_uvector make_device_uvector_sync( - Container const& c, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + Container const& c, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { return make_device_uvector_sync(host_span{c}, stream, mr); } @@ -247,10 +236,9 @@ rmm::device_uvector make_device_uvector_sync( * @return A device_uvector containing the copied data */ template -rmm::device_uvector make_device_uvector_sync( - device_span source_data, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +rmm::device_uvector make_device_uvector_sync(device_span source_data, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto ret = make_device_uvector_async(source_data, stream, mr); stream.synchronize(); @@ -275,9 +263,7 @@ template < std::enable_if_t< std::is_convertible_v>>* = nullptr> rmm::device_uvector make_device_uvector_sync( - Container const& c, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + Container const& c, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { return make_device_uvector_sync(device_span{c}, stream, mr); } diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh index 04c78bed17d..76d6fd719a4 100644 --- a/cpp/include/cudf/detail/valid_if.cuh +++ b/cpp/include/cudf/detail/valid_if.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -86,12 +86,11 @@ __global__ void valid_if_kernel( * null count */ template -std::pair valid_if( - InputIterator begin, - InputIterator end, - Predicate p, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::pair valid_if(InputIterator begin, + InputIterator end, + Predicate p, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(begin <= end, "Invalid range."); diff --git a/cpp/include/cudf/io/detail/avro.hpp b/cpp/include/cudf/io/detail/avro.hpp index c141e25f939..fede8e62d9f 100644 --- a/cpp/include/cudf/io/detail/avro.hpp +++ b/cpp/include/cudf/io/detail/avro.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,11 +36,10 @@ namespace avro { * * @return The set of columns along with table metadata */ -table_with_metadata read_avro( - std::unique_ptr&& source, - avro_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +table_with_metadata read_avro(std::unique_ptr&& source, + avro_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace avro } // namespace detail diff --git a/cpp/include/cudf/io/detail/csv.hpp b/cpp/include/cudf/io/detail/csv.hpp index 90d730338fc..9fdc7a47fb9 100644 --- a/cpp/include/cudf/io/detail/csv.hpp +++ b/cpp/include/cudf/io/detail/csv.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,7 +56,7 @@ void write_csv(data_sink* sink, host_span column_names, csv_writer_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr); } // namespace csv } // namespace detail diff --git a/cpp/include/cudf/io/detail/json.hpp b/cpp/include/cudf/io/detail/json.hpp index 7d2884880e7..7b0350e9bc8 100644 --- a/cpp/include/cudf/io/detail/json.hpp +++ b/cpp/include/cudf/io/detail/json.hpp @@ -33,11 +33,10 @@ namespace cudf::io::json::detail { * * @return cudf::table object that contains the array of cudf::column. */ -table_with_metadata read_json( - std::vector>& sources, - json_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +table_with_metadata read_json(std::vector>& sources, + json_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Write an entire dataset to JSON format. @@ -52,5 +51,5 @@ void write_json(data_sink* sink, table_view const& table, json_writer_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr); } // namespace cudf::io::json::detail diff --git a/cpp/include/cudf/io/detail/tokenize_json.hpp b/cpp/include/cudf/io/detail/tokenize_json.hpp index b03dbd4fb70..4914f434c98 100644 --- a/cpp/include/cudf/io/detail/tokenize_json.hpp +++ b/cpp/include/cudf/io/detail/tokenize_json.hpp @@ -131,7 +131,7 @@ std::pair, rmm::device_uvector> ge device_span json_in, cudf::io::json_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr); } // namespace detail diff --git a/cpp/include/cudf/io/text/detail/tile_state.hpp b/cpp/include/cudf/io/text/detail/tile_state.hpp index bf833d4720c..6ae399fbe75 100644 --- a/cpp/include/cudf/io/text/detail/tile_state.hpp +++ b/cpp/include/cudf/io/text/detail/tile_state.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -82,7 +82,7 @@ struct scan_tile_state { scan_tile_state(cudf::size_type num_tiles, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr) : tile_status(rmm::device_uvector>( num_tiles, stream, mr)), tile_state_partial(rmm::device_uvector(num_tiles, stream, mr)), diff --git a/cpp/include/cudf/io/text/detail/trie.hpp b/cpp/include/cudf/io/text/detail/trie.hpp index a908a9fa227..7bb2e4e2ece 100644 --- a/cpp/include/cudf/io/text/detail/trie.hpp +++ b/cpp/include/cudf/io/text/detail/trie.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -165,7 +165,7 @@ struct trie { */ static trie create(std::string const& pattern, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr) { return create(std::vector{pattern}, stream, mr); @@ -181,7 +181,7 @@ struct trie { */ static trie create(std::vector const& patterns, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr) { std::vector tokens; std::vector transitions; diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp index 6f97eb768d9..7426811a18d 100644 --- a/cpp/include/cudf/io/types.hpp +++ b/cpp/include/cudf/io/types.hpp @@ -519,8 +519,6 @@ class column_in_metadata { /** * @brief Set the nullability of this column * - * Only valid in case of chunked writes. In single writes, this option is ignored. - * * @param nullable Whether this column is nullable * @return this for chaining */ diff --git a/cpp/include/cudf/lists/contains.hpp b/cpp/include/cudf/lists/contains.hpp index fbe931f945d..21c2ca1d64e 100644 --- a/cpp/include/cudf/lists/contains.hpp +++ b/cpp/include/cudf/lists/contains.hpp @@ -42,7 +42,7 @@ namespace lists { * * @param lists Lists column whose `n` rows are to be searched * @param search_key The scalar key to be looked up in each list row - * @param mr Device memory resource used to allocate the returned column's device memory. + * @param mr Device memory resource used to allocate the returned column's device memory * @return BOOL8 column of `n` rows with the result of the lookup */ std::unique_ptr contains( @@ -64,7 +64,7 @@ std::unique_ptr contains( * * @param lists Lists column whose `n` rows are to be searched * @param search_keys Column of elements to be looked up in each list row - * @param mr Device memory resource used to allocate the returned column's device memory. + * @param mr Device memory resource used to allocate the returned column's device memory * @return BOOL8 column of `n` rows with the result of the lookup */ std::unique_ptr contains( @@ -85,7 +85,7 @@ std::unique_ptr contains( * Nulls inside non-null nested elements (such as lists or structs) are not considered. * * @param lists Lists column whose `n` rows are to be searched - * @param mr Device memory resource used to allocate the returned column's device memory. + * @param mr Device memory resource used to allocate the returned column's device memory * @return BOOL8 column of `n` rows with the result of the lookup */ std::unique_ptr contains_nulls( @@ -102,7 +102,7 @@ enum class duplicate_find_option : int32_t { }; /** - * @brief Create a column of `size_type` values indicating the position of a search key + * @brief Create a column of values indicating the position of a search key * within each list row in the `lists` column * * The output column has as many elements as there are rows in the input `lists` column. @@ -119,14 +119,14 @@ enum class duplicate_find_option : int32_t { * If `find_option == FIND_LAST`, the position of the last match in the list row is * returned. * + * @throw cudf::data_type_error If `search_keys` type does not match the element type in `lists` + * * @param lists Lists column whose `n` rows are to be searched * @param search_key The scalar key to be looked up in each list row * @param find_option Whether to return the position of the first match (`FIND_FIRST`) or * last (`FIND_LAST`) - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return INT32 column of `n` rows with the location of the `search_key` - * - * @throw cudf::data_type_error If `search_keys` type does not match the element type in `lists` + * @param mr Device memory resource used to allocate the returned column's device memory + * @return column of `n` rows with the location of the `search_key` */ std::unique_ptr index_of( cudf::lists_column_view const& lists, @@ -135,7 +135,7 @@ std::unique_ptr index_of( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Create a column of `size_type` values indicating the position of a search key + * @brief Create a column of values indicating the position of a search key * row within the corresponding list row in the `lists` column * * The output column has as many elements as there are rows in the input `lists` column. @@ -152,16 +152,16 @@ std::unique_ptr index_of( * If `find_option == FIND_LAST`, the position of the last match in the list row is * returned. * + * @throw cudf::logic_error If `search_keys` does not match `lists` in its number of rows + * @throw cudf::data_type_error If `search_keys` type does not match the element type in `lists` + * * @param lists Lists column whose `n` rows are to be searched * @param search_keys A column of search keys to be looked up in each corresponding row of * `lists` * @param find_option Whether to return the position of the first match (`FIND_FIRST`) or * last (`FIND_LAST`) - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return INT32 column of `n` rows with the location of the `search_key` - * - * @throw cudf::logic_error If `search_keys` does not match `lists` in its number of rows - * @throw cudf::data_type_error If `search_keys` type does not match the element type in `lists` + * @param mr Device memory resource used to allocate the returned column's device memory + * @return column of `n` rows with the location of the `search_key` */ std::unique_ptr index_of( cudf::lists_column_view const& lists, diff --git a/cpp/include/cudf/lists/count_elements.hpp b/cpp/include/cudf/lists/count_elements.hpp index dac6c1b5bf8..552ba058b93 100644 --- a/cpp/include/cudf/lists/count_elements.hpp +++ b/cpp/include/cudf/lists/count_elements.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,9 +44,9 @@ namespace lists { * Any null input element will result in a corresponding null entry * in the output column. * - * @param input Input lists column. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New INT32 column with the number of elements for each row. + * @param input Input lists column + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column with the number of elements for each row */ std::unique_ptr count_elements( lists_column_view const& input, diff --git a/cpp/include/cudf/lists/detail/combine.hpp b/cpp/include/cudf/lists/detail/combine.hpp index 9f28074173a..4bc45e48a9f 100644 --- a/cpp/include/cudf/lists/detail/combine.hpp +++ b/cpp/include/cudf/lists/detail/combine.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,22 +27,20 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr concatenate_rows( - table_view const& input, - concatenate_null_policy null_policy, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr concatenate_rows(table_view const& input, + concatenate_null_policy null_policy, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::lists::concatenate_list_elements * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr concatenate_list_elements( - column_view const& input, - concatenate_null_policy null_policy, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr concatenate_list_elements(column_view const& input, + concatenate_null_policy null_policy, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/lists/detail/concatenate.hpp b/cpp/include/cudf/lists/detail/concatenate.hpp index 5a8b4bc3bf3..a1f149d4ccf 100644 --- a/cpp/include/cudf/lists/detail/concatenate.hpp +++ b/cpp/include/cudf/lists/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,10 +43,9 @@ namespace detail { * @param mr Device memory resource used to allocate the returned column's device memory. * @return New column with concatenated results. */ -std::unique_ptr concatenate( - host_span columns, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr concatenate(host_span columns, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/lists/detail/contains.hpp b/cpp/include/cudf/lists/detail/contains.hpp index 24318e72e98..58ec18cb9ef 100644 --- a/cpp/include/cudf/lists/detail/contains.hpp +++ b/cpp/include/cudf/lists/detail/contains.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,12 +29,11 @@ namespace detail { * rmm::mr::device_memory_resource*) * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr index_of( - cudf::lists_column_view const& lists, - cudf::scalar const& search_key, - cudf::lists::duplicate_find_option find_option, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr index_of(cudf::lists_column_view const& lists, + cudf::scalar const& search_key, + cudf::lists::duplicate_find_option find_option, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::lists::index_of(cudf::lists_column_view const&, @@ -43,12 +42,11 @@ std::unique_ptr index_of( * rmm::mr::device_memory_resource*) * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr index_of( - cudf::lists_column_view const& lists, - cudf::column_view const& search_keys, - cudf::lists::duplicate_find_option find_option, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr index_of(cudf::lists_column_view const& lists, + cudf::column_view const& search_keys, + cudf::lists::duplicate_find_option find_option, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::lists::contains(cudf::lists_column_view const&, @@ -56,11 +54,10 @@ std::unique_ptr index_of( * rmm::mr::device_memory_resource*) * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr contains( - cudf::lists_column_view const& lists, - cudf::scalar const& search_key, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr contains(cudf::lists_column_view const& lists, + cudf::scalar const& search_key, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::lists::contains(cudf::lists_column_view const&, @@ -68,11 +65,10 @@ std::unique_ptr contains( * rmm::mr::device_memory_resource*) * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr contains( - cudf::lists_column_view const& lists, - cudf::column_view const& search_keys, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr contains(cudf::lists_column_view const& lists, + cudf::column_view const& search_keys, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace lists } // namespace cudf diff --git a/cpp/include/cudf/lists/detail/extract.hpp b/cpp/include/cudf/lists/detail/extract.hpp index 44c31c9ddb2..013f9b491dd 100644 --- a/cpp/include/cudf/lists/detail/extract.hpp +++ b/cpp/include/cudf/lists/detail/extract.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,22 +27,20 @@ namespace detail { * rmm::mr::device_memory_resource*) * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr extract_list_element( - lists_column_view lists_column, - size_type const index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr extract_list_element(lists_column_view lists_column, + size_type const index, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::lists::extract_list_element(lists_column_view, column_view const&, * rmm::mr::device_memory_resource*) * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr extract_list_element( - lists_column_view lists_column, - column_view const& indices, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr extract_list_element(lists_column_view lists_column, + column_view const& indices, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/lists/detail/gather.cuh b/cpp/include/cudf/lists/detail/gather.cuh index 48c0ed8f6e9..83710a49f6a 100644 --- a/cpp/include/cudf/lists/detail/gather.cuh +++ b/cpp/include/cudf/lists/detail/gather.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -285,11 +285,10 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column, * * @returns column with elements gathered based on `gather_data` */ -std::unique_ptr gather_list_nested( - lists_column_view const& list, - gather_data& gd, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr gather_list_nested(lists_column_view const& list, + gather_data& gd, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Gather a leaf column from a hierarchy of list columns. @@ -303,11 +302,10 @@ std::unique_ptr gather_list_nested( * * @returns column with elements gathered based on `gather_data` */ -std::unique_ptr gather_list_leaf( - column_view const& column, - gather_data const& gd, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr gather_list_leaf(column_view const& column, + gather_data const& gd, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::lists::segmented_gather(lists_column_view const& source_column, @@ -317,13 +315,11 @@ std::unique_ptr gather_list_leaf( * * @param stream CUDA stream on which to execute kernels */ -std::unique_ptr segmented_gather( - lists_column_view const& source_column, - lists_column_view const& gather_map_list, - out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK, - // Move before bounds_policy? - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr segmented_gather(lists_column_view const& source_column, + lists_column_view const& gather_map_list, + out_of_bounds_policy bounds_policy, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/lists/detail/interleave_columns.hpp b/cpp/include/cudf/lists/detail/interleave_columns.hpp index 7ae90779fdc..a5cf67c95b9 100644 --- a/cpp/include/cudf/lists/detail/interleave_columns.hpp +++ b/cpp/include/cudf/lists/detail/interleave_columns.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,11 +44,10 @@ namespace detail { * @param mr Device memory resource used to allocate the returned column's device memory. * @return The interleaved columns as a single column. */ -std::unique_ptr interleave_columns( - table_view const& input, - bool has_null_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr interleave_columns(table_view const& input, + bool has_null_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/lists/detail/reverse.hpp b/cpp/include/cudf/lists/detail/reverse.hpp index d467a9ac70e..6e3b952a3b0 100644 --- a/cpp/include/cudf/lists/detail/reverse.hpp +++ b/cpp/include/cudf/lists/detail/reverse.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,9 +23,8 @@ namespace cudf::lists::detail { * @copydoc cudf::lists::reverse * @param stream CUDA stream used for device memory operations and kernel launches */ -std::unique_ptr reverse( - lists_column_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr reverse(lists_column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace cudf::lists::detail diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh index c2b4778aac8..856914b445e 100644 --- a/cpp/include/cudf/lists/detail/scatter.cuh +++ b/cpp/include/cudf/lists/detail/scatter.cuh @@ -89,15 +89,14 @@ rmm::device_uvector list_vector_from_column( * @return New lists column. */ template -std::unique_ptr scatter_impl( - rmm::device_uvector const& source_vector, - rmm::device_uvector& target_vector, - MapIterator scatter_map_begin, - MapIterator scatter_map_end, - column_view const& source, - column_view const& target, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr scatter_impl(rmm::device_uvector const& source_vector, + rmm::device_uvector& target_vector, + MapIterator scatter_map_begin, + MapIterator scatter_map_end, + column_view const& source, + column_view const& target, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(column_types_equal(source, target), "Mismatched column types."); @@ -170,13 +169,12 @@ std::unique_ptr scatter_impl( * @return New lists column. */ template -std::unique_ptr scatter( - column_view const& source, - MapIterator scatter_map_begin, - MapIterator scatter_map_end, - column_view const& target, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr scatter(column_view const& source, + MapIterator scatter_map_begin, + MapIterator scatter_map_end, + column_view const& target, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto const num_rows = target.size(); if (num_rows == 0) { return cudf::empty_like(target); } @@ -227,13 +225,12 @@ std::unique_ptr scatter( * @return New lists column. */ template -std::unique_ptr scatter( - scalar const& slr, - MapIterator scatter_map_begin, - MapIterator scatter_map_end, - column_view const& target, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr scatter(scalar const& slr, + MapIterator scatter_map_begin, + MapIterator scatter_map_end, + column_view const& target, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto const num_rows = target.size(); if (num_rows == 0) { return cudf::empty_like(target); } diff --git a/cpp/include/cudf/lists/detail/set_operations.hpp b/cpp/include/cudf/lists/detail/set_operations.hpp index ef4255de430..1411c65448e 100644 --- a/cpp/include/cudf/lists/detail/set_operations.hpp +++ b/cpp/include/cudf/lists/detail/set_operations.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,52 +30,48 @@ namespace cudf::lists::detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr have_overlap( - lists_column_view const& lhs, - lists_column_view const& rhs, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr have_overlap(lists_column_view const& lhs, + lists_column_view const& rhs, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::list::intersect_distinct * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr intersect_distinct( - lists_column_view const& lhs, - lists_column_view const& rhs, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr intersect_distinct(lists_column_view const& lhs, + lists_column_view const& rhs, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::list::union_distinct * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr union_distinct( - lists_column_view const& lhs, - lists_column_view const& rhs, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr union_distinct(lists_column_view const& lhs, + lists_column_view const& rhs, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::list::difference_distinct * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr difference_distinct( - lists_column_view const& lhs, - lists_column_view const& rhs, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr difference_distinct(lists_column_view const& lhs, + lists_column_view const& rhs, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** @} */ // end of group } // namespace cudf::lists::detail diff --git a/cpp/include/cudf/lists/detail/sorting.hpp b/cpp/include/cudf/lists/detail/sorting.hpp index 1068a4c4b69..c378ca8cf06 100644 --- a/cpp/include/cudf/lists/detail/sorting.hpp +++ b/cpp/include/cudf/lists/detail/sorting.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,24 +28,22 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr sort_lists( - lists_column_view const& input, - order column_order, - null_order null_precedence, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr sort_lists(lists_column_view const& input, + order column_order, + null_order null_precedence, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::lists::stable_sort_lists * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr stable_sort_lists( - lists_column_view const& input, - order column_order, - null_order null_precedence, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr stable_sort_lists(lists_column_view const& input, + order column_order, + null_order null_precedence, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/lists/detail/stream_compaction.hpp b/cpp/include/cudf/lists/detail/stream_compaction.hpp index ba3dbb6594b..7ab9cf9a343 100644 --- a/cpp/include/cudf/lists/detail/stream_compaction.hpp +++ b/cpp/include/cudf/lists/detail/stream_compaction.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,22 +28,20 @@ namespace cudf::lists::detail { * * @param stream CUDA stream used for device memory operations and kernel launches */ -std::unique_ptr apply_boolean_mask( - lists_column_view const& input, - lists_column_view const& boolean_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr apply_boolean_mask(lists_column_view const& input, + lists_column_view const& boolean_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::list::distinct * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr distinct( - lists_column_view const& input, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr distinct(lists_column_view const& input, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace cudf::lists::detail diff --git a/cpp/include/cudf/lists/lists_column_factories.hpp b/cpp/include/cudf/lists/lists_column_factories.hpp index a6eacb97e91..fea1118748c 100644 --- a/cpp/include/cudf/lists/lists_column_factories.hpp +++ b/cpp/include/cudf/lists/lists_column_factories.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,11 +35,10 @@ namespace detail { * @param[in] stream CUDA stream used for device memory operations and kernel launches. * @param[in] mr Device memory resource used to allocate the returned column's device memory. */ -std::unique_ptr make_lists_column_from_scalar( - list_scalar const& value, - size_type size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr make_lists_column_from_scalar(list_scalar const& value, + size_type size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/detail/reduction.cuh b/cpp/include/cudf/reduction/detail/reduction.cuh similarity index 99% rename from cpp/include/cudf/detail/reduction.cuh rename to cpp/include/cudf/reduction/detail/reduction.cuh index 9dc3b996afc..1620635e0e3 100644 --- a/cpp/include/cudf/detail/reduction.cuh +++ b/cpp/include/cudf/reduction/detail/reduction.cuh @@ -16,7 +16,7 @@ #pragma once -#include +#include "reduction_operators.cuh" #include #include @@ -31,6 +31,8 @@ #include #include +#include + namespace cudf { namespace reduction { namespace detail { diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/reduction/detail/reduction_functions.hpp similarity index 70% rename from cpp/include/cudf/detail/reduction_functions.hpp rename to cpp/include/cudf/reduction/detail/reduction_functions.hpp index 1f892bb90c5..014a6ba70eb 100644 --- a/cpp/include/cudf/detail/reduction_functions.hpp +++ b/cpp/include/cudf/reduction/detail/reduction_functions.hpp @@ -27,6 +27,7 @@ namespace cudf { namespace reduction { +namespace detail { /** * @brief Computes sum of elements in input column * @@ -42,12 +43,11 @@ namespace reduction { * @param mr Device memory resource used to allocate the returned scalar's device memory * @return Sum as scalar of type `output_dtype` */ -std::unique_ptr sum( - column_view const& col, - data_type const output_dtype, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr sum(column_view const& col, + data_type const output_dtype, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Computes minimum of elements in input column @@ -63,12 +63,11 @@ std::unique_ptr sum( * @param mr Device memory resource used to allocate the returned scalar's device memory * @return Minimum element as scalar of type `output_dtype` */ -std::unique_ptr min( - column_view const& col, - data_type const output_dtype, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr min(column_view const& col, + data_type const output_dtype, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Computes maximum of elements in input column @@ -84,12 +83,11 @@ std::unique_ptr min( * @param mr Device memory resource used to allocate the returned scalar's device memory * @return Maximum element as scalar of type `output_dtype` */ -std::unique_ptr max( - column_view const& col, - data_type const output_dtype, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr max(column_view const& col, + data_type const output_dtype, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Computes any of elements in input column is true when typecasted to bool @@ -106,12 +104,11 @@ std::unique_ptr max( * @param mr Device memory resource used to allocate the returned scalar's device memory * @return bool scalar if any of elements is true when typecasted to bool */ -std::unique_ptr any( - column_view const& col, - data_type const output_dtype, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr any(column_view const& col, + data_type const output_dtype, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Computes all of elements in input column is true when typecasted to bool @@ -128,12 +125,11 @@ std::unique_ptr any( * @param mr Device memory resource used to allocate the returned scalar's device memory * @return bool scalar if all of elements is true when typecasted to bool */ -std::unique_ptr all( - column_view const& col, - data_type const output_dtype, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr all(column_view const& col, + data_type const output_dtype, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Computes product of elements in input column @@ -150,12 +146,11 @@ std::unique_ptr all( * @param mr Device memory resource used to allocate the returned scalar's device memory * @return Product as scalar of type `output_dtype` */ -std::unique_ptr product( - column_view const& col, - data_type const output_dtype, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr product(column_view const& col, + data_type const output_dtype, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Computes sum of squares of elements in input column @@ -171,11 +166,10 @@ std::unique_ptr product( * @param mr Device memory resource used to allocate the returned scalar's device memory * @return Sum of squares as scalar of type `output_dtype` */ -std::unique_ptr sum_of_squares( - column_view const& col, - data_type const output_dtype, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr sum_of_squares(column_view const& col, + data_type const output_dtype, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Computes mean of elements in input column @@ -191,11 +185,10 @@ std::unique_ptr sum_of_squares( * @param mr Device memory resource used to allocate the returned scalar's device memory * @return Mean as scalar of type `output_dtype` */ -std::unique_ptr mean( - column_view const& col, - data_type const output_dtype, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr mean(column_view const& col, + data_type const output_dtype, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Computes variance of elements in input column @@ -213,12 +206,11 @@ std::unique_ptr mean( * @param mr Device memory resource used to allocate the returned scalar's device memory * @return Variance as scalar of type `output_dtype` */ -std::unique_ptr variance( - column_view const& col, - data_type const output_dtype, - size_type ddof, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr variance(column_view const& col, + data_type const output_dtype, + size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Computes standard deviation of elements in input column @@ -236,12 +228,11 @@ std::unique_ptr variance( * @param mr Device memory resource used to allocate the returned scalar's device memory * @return Standard deviation as scalar of type `output_dtype` */ -std::unique_ptr standard_deviation( - column_view const& col, - data_type const output_dtype, - size_type ddof, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr standard_deviation(column_view const& col, + data_type const output_dtype, + size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Returns nth element in input column @@ -267,12 +258,11 @@ std::unique_ptr standard_deviation( * @param mr Device memory resource used to allocate the returned scalar's device memory * @return nth element as scalar */ -std::unique_ptr nth_element( - column_view const& col, - size_type n, - null_policy null_handling, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr nth_element(column_view const& col, + size_type n, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Collect input column into a (list) scalar @@ -283,11 +273,10 @@ std::unique_ptr nth_element( * @param mr Device memory resource used to allocate the returned scalar's device memory * @return collected list as scalar */ -std::unique_ptr collect_list( - column_view const& col, - null_policy null_handling, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr collect_list(column_view const& col, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Merge a bunch of list scalars into single list scalar @@ -297,10 +286,9 @@ std::unique_ptr collect_list( * @param mr Device memory resource used to allocate the returned scalar's device memory * @return merged list as scalar */ -std::unique_ptr merge_lists( - lists_column_view const& col, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr merge_lists(lists_column_view const& col, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Collect input column into a (list) scalar without duplicated elements @@ -313,13 +301,12 @@ std::unique_ptr merge_lists( * @param mr Device memory resource used to allocate the returned scalar's device memory * @return collected list with unique elements as scalar */ -std::unique_ptr collect_set( - column_view const& col, - null_policy null_handling, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr collect_set(column_view const& col, + null_policy null_handling, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Merge a bunch of list scalars into single list scalar then drop duplicated elements @@ -331,12 +318,12 @@ std::unique_ptr collect_set( * @param mr Device memory resource used to allocate the returned scalar's device memory * @return collected list with unique elements as scalar */ -std::unique_ptr merge_sets( - lists_column_view const& col, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr merge_sets(lists_column_view const& col, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/include/cudf/detail/reduction_operators.cuh b/cpp/include/cudf/reduction/detail/reduction_operators.cuh similarity index 97% rename from cpp/include/cudf/detail/reduction_operators.cuh rename to cpp/include/cudf/reduction/detail/reduction_operators.cuh index 5a0cb4c1714..0dba84a0b28 100644 --- a/cpp/include/cudf/detail/reduction_operators.cuh +++ b/cpp/include/cudf/reduction/detail/reduction_operators.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ namespace cudf { namespace reduction { +namespace detail { // intermediate data structure to compute `var`, `std` template struct var_std { @@ -244,7 +245,7 @@ struct variance : public compound_op { using op = cudf::DeviceSum; template - using transformer = cudf::reduction::transformer_var_std; + using transformer = cudf::reduction::detail::transformer_var_std; template struct intermediate { @@ -270,7 +271,7 @@ struct standard_deviation : public compound_op { using op = cudf::DeviceSum; template - using transformer = cudf::reduction::transformer_var_std; + using transformer = cudf::reduction::detail::transformer_var_std; template struct intermediate { @@ -288,7 +289,7 @@ struct standard_deviation : public compound_op { }; }; }; - } // namespace op +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/include/cudf/detail/segmented_reduction.cuh b/cpp/include/cudf/reduction/detail/segmented_reduction.cuh similarity index 99% rename from cpp/include/cudf/detail/segmented_reduction.cuh rename to cpp/include/cudf/reduction/detail/segmented_reduction.cuh index 1c39d5eab1e..5c2eaf8cdcb 100644 --- a/cpp/include/cudf/detail/segmented_reduction.cuh +++ b/cpp/include/cudf/reduction/detail/segmented_reduction.cuh @@ -16,7 +16,7 @@ #pragma once -#include +#include "reduction_operators.cuh" #include #include diff --git a/cpp/include/cudf/detail/segmented_reduction_functions.hpp b/cpp/include/cudf/reduction/detail/segmented_reduction_functions.hpp similarity index 63% rename from cpp/include/cudf/detail/segmented_reduction_functions.hpp rename to cpp/include/cudf/reduction/detail/segmented_reduction_functions.hpp index 7b5628fa49a..3902a7200a9 100644 --- a/cpp/include/cudf/detail/segmented_reduction_functions.hpp +++ b/cpp/include/cudf/reduction/detail/segmented_reduction_functions.hpp @@ -27,6 +27,7 @@ namespace cudf { namespace reduction { +namespace detail { /** * @brief Compute sum of each segment in the input column @@ -50,14 +51,13 @@ namespace reduction { * @param mr Device memory resource used to allocate the returned column's device memory * @return Sums of segments as type `output_dtype` */ -std::unique_ptr segmented_sum( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr segmented_sum(column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Computes product of each segment in the input column @@ -81,14 +81,13 @@ std::unique_ptr segmented_sum( * @param mr Device memory resource used to allocate the returned column's device memory * @return Product of segments as type `output_dtype` */ -std::unique_ptr segmented_product( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr segmented_product(column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Compute minimum of each segment in the input column @@ -111,14 +110,13 @@ std::unique_ptr segmented_product( * @param mr Device memory resource used to allocate the returned column's device memory * @return Minimums of segments as type `output_dtype` */ -std::unique_ptr segmented_min( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr segmented_min(column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Compute maximum of each segment in the input column @@ -141,14 +139,13 @@ std::unique_ptr segmented_min( * @param mr Device memory resource used to allocate the returned column's device memory * @return Maximums of segments as type `output_dtype` */ -std::unique_ptr segmented_max( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr segmented_max(column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Compute if any of the values in the segment are true when typecasted to bool @@ -172,14 +169,13 @@ std::unique_ptr segmented_max( * @param mr Device memory resource used to allocate the returned column's device memory * @return Column of type BOOL8 for the results of the segments */ -std::unique_ptr segmented_any( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr segmented_any(column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Compute if all of the values in the segment are true when typecasted to bool @@ -203,14 +199,13 @@ std::unique_ptr segmented_any( * @param mr Device memory resource used to allocate the returned column's device memory * @return Column of BOOL8 for the results of the segments */ -std::unique_ptr segmented_all( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr segmented_all(column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Computes mean of elements of segments in the input column @@ -233,13 +228,12 @@ std::unique_ptr segmented_all( * @param mr Device memory resource used to allocate the returned column's device memory * @return Column of `output_dtype` for the reduction results of the segments */ -std::unique_ptr segmented_mean( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr segmented_mean(column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Computes sum of squares of elements of segments in the input column @@ -262,13 +256,12 @@ std::unique_ptr segmented_mean( * @param mr Device memory resource used to allocate the returned column's device memory * @return Column of `output_dtype` for the reduction results of the segments */ -std::unique_ptr segmented_sum_of_squares( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr segmented_sum_of_squares(column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Computes the standard deviation of elements of segments in the input column @@ -293,14 +286,13 @@ std::unique_ptr segmented_sum_of_squares( * @param mr Device memory resource used to allocate the returned column's device memory * @return Column of `output_dtype` for the reduction results of the segments */ -std::unique_ptr segmented_standard_deviation( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - size_type ddof, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr segmented_standard_deviation(column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Computes the variance of elements of segments in the input column @@ -325,14 +317,42 @@ std::unique_ptr segmented_standard_deviation( * @param mr Device memory resource used to allocate the returned column's device memory * @return Column of `output_dtype` for the reduction results of the segments */ -std::unique_ptr segmented_variance( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - size_type ddof, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr segmented_variance(column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); +/** + * @brief Counts the number of unique values within each segment of a column + * + * Unique entries are counted by comparing adjacent values so the column segments + * are expected to be sorted before calling this function otherwise the results + * are undefined. + * + * If any input segment is empty, that segment's result is null. + * + * If `null_handling==null_policy::INCLUDE`, the segment count is the number of + * unique values +1 which includes all the null entries in that segment. + * If `null_handling==null_policy::EXCLUDE`, the segment count does not include nulls. + * + * @throw cudf::logic_error if input column type is a nested type + * + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param null_handling Specifies how null elements are processed for each segment + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Column of unique counts per segment + */ +std::unique_ptr segmented_nunique(column_view const& col, + device_span offsets, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/include/cudf/search.hpp b/cpp/include/cudf/search.hpp index bd9520df644..fee22786d7a 100644 --- a/cpp/include/cudf/search.hpp +++ b/cpp/include/cudf/search.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -64,7 +64,7 @@ namespace cudf { * @param column_order Vector of column sort order * @param null_precedence Vector of null_precedence enums needles * @param mr Device memory resource used to allocate the returned column's device memory - * @return A non-nullable column of cudf::size_type elements containing the insertion points + * @return A non-nullable column of elements containing the insertion points */ std::unique_ptr lower_bound( table_view const& haystack, @@ -104,7 +104,7 @@ std::unique_ptr lower_bound( * @param column_order Vector of column sort order * @param null_precedence Vector of null_precedence enums needles * @param mr Device memory resource used to allocate the returned column's device memory - * @return A non-nullable column of cudf::size_type elements containing the insertion points + * @return A non-nullable column of elements containing the insertion points */ std::unique_ptr upper_bound( table_view const& haystack, diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index 922bed3b1ea..6924e77ae9b 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -44,7 +44,7 @@ namespace cudf { * for each column. Size must be equal to `input.num_columns()` or empty. * If empty, all columns will be sorted in `null_order::BEFORE`. * @param mr Device memory resource used to allocate the returned column's device memory - * @return A non-nullable column of `size_type` elements containing the permuted row indices of + * @return A non-nullable column of elements containing the permuted row indices of * `input` if it were sorted */ std::unique_ptr sorted_order( diff --git a/cpp/include/cudf/strings/attributes.hpp b/cpp/include/cudf/strings/attributes.hpp index f0f7c667697..85086e44a26 100644 --- a/cpp/include/cudf/strings/attributes.hpp +++ b/cpp/include/cudf/strings/attributes.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,8 +32,8 @@ namespace strings { */ /** - * @brief Returns an integer numeric column containing the length of each string in - * characters. + * @brief Returns a column containing character lengths + * of each string in the given column * * The output column will have the same number of rows as the * specified strings column. Each row value will be the number of @@ -41,17 +41,17 @@ namespace strings { * * Any null string will result in a null entry for that row in the output column. * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New INT32 column with lengths for each string. + * @param input Strings instance for this operation + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column with lengths for each string */ std::unique_ptr count_characters( - strings_column_view const& strings, + strings_column_view const& input, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Returns a numeric column containing the length of each string in - * bytes. + * @brief Returns a column containing byte lengths + * of each string in the given column * * The output column will have the same number of rows as the * specified strings column. Each row value will be the number of @@ -59,17 +59,17 @@ std::unique_ptr count_characters( * * Any null string will result in a null entry for that row in the output column. * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New INT32 column with the number of bytes for each string. + * @param input Strings instance for this operation + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column with the number of bytes for each string */ std::unique_ptr count_bytes( - strings_column_view const& strings, + strings_column_view const& input, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Creates a numeric column with code point values (integers) for each - * character of each string. + * character of each string * * A code point is the integer value representation of a character. * For example, the code point value for the character 'A' in UTF-8 is 65. @@ -79,12 +79,12 @@ std::unique_ptr count_bytes( * * Any null string is ignored. No null entries will appear in the output column. * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New INT32 column with code point integer values for each character. + * @param input Strings instance for this operation + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New INT32 column with code point integer values for each character */ std::unique_ptr code_points( - strings_column_view const& strings, + strings_column_view const& input, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of strings_apis group diff --git a/cpp/include/cudf/strings/contains.hpp b/cpp/include/cudf/strings/contains.hpp index aebc4ae7dab..92914bc810f 100644 --- a/cpp/include/cudf/strings/contains.hpp +++ b/cpp/include/cudf/strings/contains.hpp @@ -165,7 +165,7 @@ std::unique_ptr matches_re( * @param strings Strings instance for this operation * @param prog Regex program instance * @param mr Device memory resource used to allocate the returned column's device memory - * @return New INT32 column with counts for each string + * @return New column of match counts for each string */ std::unique_ptr count_re( strings_column_view const& strings, diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh index 09e0f3bb079..02a65c01178 100644 --- a/cpp/include/cudf/strings/detail/strings_children.cuh +++ b/cpp/include/cudf/strings/detail/strings_children.cuh @@ -59,7 +59,7 @@ auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn, rmm::mr::device_memory_resource* mr) { auto offsets_column = make_numeric_column( - data_type{type_id::INT32}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); + data_type{type_to_id()}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); auto offsets_view = offsets_column->mutable_view(); auto d_offsets = offsets_view.template data(); size_and_exec_fn.d_offsets = d_offsets; diff --git a/cpp/include/cudf/strings/detail/strings_column_factories.cuh b/cpp/include/cudf/strings/detail/strings_column_factories.cuh index 2939c47e6af..a3a5946fe55 100644 --- a/cpp/include/cudf/strings/detail/strings_column_factories.cuh +++ b/cpp/include/cudf/strings/detail/strings_column_factories.cuh @@ -175,7 +175,7 @@ std::unique_ptr make_strings_column(CharIterator chars_begin, // build offsets column -- this is the number of strings + 1 auto offsets_column = make_numeric_column( - data_type{type_id::INT32}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); + data_type{type_to_id()}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); auto offsets_view = offsets_column->mutable_view(); thrust::transform(rmm::exec_policy(stream), offsets_begin, diff --git a/cpp/include/cudf/structs/detail/concatenate.hpp b/cpp/include/cudf/structs/detail/concatenate.hpp index a098703e4b0..82ccca188e2 100644 --- a/cpp/include/cudf/structs/detail/concatenate.hpp +++ b/cpp/include/cudf/structs/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,10 +48,9 @@ namespace detail { * @param mr Device memory resource used to allocate the returned column's device memory. * @return New column with concatenated results. */ -std::unique_ptr concatenate( - host_span columns, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr concatenate(host_span columns, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace structs diff --git a/cpp/include/cudf/timezone.hpp b/cpp/include/cudf/timezone.hpp new file mode 100644 index 00000000000..56678c73811 --- /dev/null +++ b/cpp/include/cudf/timezone.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include +#include + +namespace cudf { +class table; + +// Cycle in which the time offsets repeat in Gregorian calendar +static constexpr int32_t solar_cycle_years = 400; +// Number of future entries in the timezone transition table: +// Two entries per year, over the length of the Gregorian calendar's solar cycle +static constexpr uint32_t solar_cycle_entry_count = 2 * solar_cycle_years; + +/** + * @brief Creates a transition table to convert ORC timestamps to UTC. + * + * Uses system's TZif files. Assumes little-endian platform when parsing these files. + * The transition table starts with the entries from the TZif file. For timestamps after the file's + * last transition, the table includes entries that form a `solar_cycle_years`-year cycle (future + * entries). This portion of the table has `solar_cycle_entry_count` elements, as it assumes two + * transitions per year from Daylight Saving Time. If the timezone does not have DST, the table will + * still include the future entries, which will all have the same offset. + * + * @param tzif_dir The directory where the TZif files are located + * @param timezone_name standard timezone name (for example, "America/Los_Angeles") + * @param mr Device memory resource used to allocate the returned table's device memory. + * + * @return The transition table for the given timezone + */ +std::unique_ptr
make_timezone_transition_table( + std::optional tzif_dir, + std::string_view timezone_name, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +} // namespace cudf diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index 4fa712fe7c3..b925fc8ae92 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,4 +36,15 @@ namespace cudf { */ bool column_types_equal(column_view const& lhs, column_view const& rhs); +/** + * @brief Compare the type IDs of two `column_view`s + * This function returns true if the type of `lhs` equals that of `rhs`. + * - For fixed point types, the scale is ignored. + * + * @param lhs The first `column_view` to compare + * @param rhs The second `column_view` to compare + * @return true if column types match + */ +bool column_types_equivalent(column_view const& lhs, column_view const& rhs); + } // namespace cudf diff --git a/cpp/include/cudf_test/base_fixture.hpp b/cpp/include/cudf_test/base_fixture.hpp index be4d5bccd7b..1477314c592 100644 --- a/cpp/include/cudf_test/base_fixture.hpp +++ b/cpp/include/cudf_test/base_fixture.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,8 +23,9 @@ #include #include #include +#include #include -#include +#include #include #include @@ -243,7 +244,10 @@ inline auto make_managed() { return std::make_shared(make_cuda()); + auto const [free, total] = rmm::detail::available_device_memory(); + auto min_alloc = + rmm::detail::align_down(std::min(free, total / 10), rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + return rmm::mr::make_owning_wrapper(make_cuda(), min_alloc); } inline auto make_arena() @@ -308,16 +312,33 @@ inline auto parse_cudf_test_opts(int argc, char** argv) const char* env_rmm_mode = std::getenv("GTEST_CUDF_RMM_MODE"); // Overridden by CLI options const char* env_stream_mode = std::getenv("GTEST_CUDF_STREAM_MODE"); // Overridden by CLI options - auto default_rmm_mode = env_rmm_mode ? env_rmm_mode : "pool"; - auto default_stream_mode = env_stream_mode ? env_stream_mode : "default"; + const char* env_stream_error_mode = + std::getenv("GTEST_CUDF_STREAM_ERROR_MODE"); // Overridden by CLI options + auto default_rmm_mode = env_rmm_mode ? env_rmm_mode : "pool"; + auto default_stream_mode = env_stream_mode ? env_stream_mode : "default"; + auto default_stream_error_mode = env_stream_error_mode ? env_stream_error_mode : "error"; options.allow_unrecognised_options().add_options()( "rmm_mode", "RMM allocation mode", cxxopts::value()->default_value(default_rmm_mode)); + // `new_cudf_default` means that cudf::get_default_stream has been patched, + // so we raise errors anywhere that a CUDA default stream is observed + // instead of cudf::get_default_stream(). This corresponds to compiling + // identify_stream_usage with STREAM_MODE_TESTING=OFF (must do both at the + // same time). + // `new_testing_default` means that cudf::test::get_default_stream has been + // patched, so we raise errors anywhere that _any_ other stream is + // observed. This corresponds to compiling identify_stream_usage with + // STREAM_MODE_TESTING=ON (must do both at the same time). options.allow_unrecognised_options().add_options()( "stream_mode", "Whether to use a non-default stream", cxxopts::value()->default_value(default_stream_mode)); + options.allow_unrecognised_options().add_options()( + "stream_error_mode", + "Whether to error or print to stdout when a non-default stream is observed and stream_mode " + "is not \"default\"", + cxxopts::value()->default_value(default_stream_error_mode)); return options.parse(argc, argv); } catch (const cxxopts::OptionException& e) { CUDF_FAIL("Error parsing command line options"); @@ -334,21 +355,24 @@ inline auto parse_cudf_test_opts(int argc, char** argv) * function parses the command line to customize test behavior, like the * allocation mode used for creating the default memory resource. */ -#define CUDF_TEST_PROGRAM_MAIN() \ - int main(int argc, char** argv) \ - { \ - ::testing::InitGoogleTest(&argc, argv); \ - auto const cmd_opts = parse_cudf_test_opts(argc, argv); \ - auto const rmm_mode = cmd_opts["rmm_mode"].as(); \ - auto resource = cudf::test::create_memory_resource(rmm_mode); \ - rmm::mr::set_current_device_resource(resource.get()); \ - \ - auto const stream_mode = cmd_opts["stream_mode"].as(); \ - rmm::cuda_stream const new_default_stream{}; \ - if (stream_mode == "custom") { \ - auto adapter = make_stream_checking_resource_adaptor(resource.get()); \ - rmm::mr::set_current_device_resource(&adapter); \ - } \ - \ - return RUN_ALL_TESTS(); \ +#define CUDF_TEST_PROGRAM_MAIN() \ + int main(int argc, char** argv) \ + { \ + ::testing::InitGoogleTest(&argc, argv); \ + auto const cmd_opts = parse_cudf_test_opts(argc, argv); \ + auto const rmm_mode = cmd_opts["rmm_mode"].as(); \ + auto resource = cudf::test::create_memory_resource(rmm_mode); \ + rmm::mr::set_current_device_resource(resource.get()); \ + \ + auto const stream_mode = cmd_opts["stream_mode"].as(); \ + if ((stream_mode == "new_cudf_default") || (stream_mode == "new_testing_default")) { \ + auto const stream_error_mode = cmd_opts["stream_error_mode"].as(); \ + auto const error_on_invalid_stream = (stream_error_mode == "error"); \ + auto const check_default_stream = (stream_mode == "new_cudf_default"); \ + auto adaptor = make_stream_checking_resource_adaptor( \ + resource.get(), error_on_invalid_stream, check_default_stream); \ + rmm::mr::set_current_device_resource(&adaptor); \ + } \ + \ + return RUN_ALL_TESTS(); \ } diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index 91773b2c3f1..6341e2e10b0 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -732,9 +732,11 @@ class strings_column_wrapper : public detail::column_wrapper { { auto all_valid = thrust::make_constant_iterator(true); auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, all_valid); - auto d_chars = cudf::detail::make_device_uvector_sync(chars, cudf::get_default_stream()); - auto d_offsets = cudf::detail::make_device_uvector_sync(offsets, cudf::get_default_stream()); - wrapped = cudf::make_strings_column(d_chars, d_offsets); + auto d_chars = cudf::detail::make_device_uvector_sync( + chars, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto d_offsets = cudf::detail::make_device_uvector_sync( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + wrapped = cudf::make_strings_column(d_chars, d_offsets); } /** @@ -772,10 +774,13 @@ class strings_column_wrapper : public detail::column_wrapper { size_type num_strings = std::distance(begin, end); auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, v); auto null_mask = detail::make_null_mask_vector(v, v + num_strings); - auto d_chars = cudf::detail::make_device_uvector_sync(chars, cudf::get_default_stream()); - auto d_offsets = cudf::detail::make_device_uvector_sync(offsets, cudf::get_default_stream()); - auto d_bitmask = cudf::detail::make_device_uvector_sync(null_mask, cudf::get_default_stream()); - wrapped = cudf::make_strings_column(d_chars, d_offsets, d_bitmask); + auto d_chars = cudf::detail::make_device_uvector_sync( + chars, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto d_offsets = cudf::detail::make_device_uvector_sync( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto d_bitmask = cudf::detail::make_device_uvector_sync( + null_mask, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + wrapped = cudf::make_strings_column(d_chars, d_offsets, d_bitmask); } /** diff --git a/cpp/include/cudf_test/default_stream.hpp b/cpp/include/cudf_test/default_stream.hpp new file mode 100644 index 00000000000..1da97d71f44 --- /dev/null +++ b/cpp/include/cudf_test/default_stream.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cudf { +namespace test { + +/** + * @brief Get the default stream to use for tests. + * + * The standard behavior of this function is to return cudf's default stream + * (cudf::get_default_stream). This function is primarily provided as an + * overload target for preload libraries (via LD_PRELOAD) so that the default + * stream used for tests may be modified for tracking purposes. All tests of + * public APIs that accept streams should pass `cudf::test::get_default_stream` + * as the stream argument so that a preload library changing the behavior of + * this function will trigger those tests to run on a different stream than + * `cudf::get_default_stream`. + * + * @return The default stream to use for tests. + */ +rmm::cuda_stream_view const get_default_stream(); + +} // namespace test +} // namespace cudf diff --git a/cpp/include/cudf_test/stream_checking_resource_adapter.hpp b/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp similarity index 69% rename from cpp/include/cudf_test/stream_checking_resource_adapter.hpp rename to cpp/include/cudf_test/stream_checking_resource_adaptor.hpp index 4a22ff148ae..e6108309ae2 100644 --- a/cpp/include/cudf_test/stream_checking_resource_adapter.hpp +++ b/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,8 @@ */ #pragma once +#include + #include /** @@ -33,7 +35,12 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res * * @param upstream The resource used for allocating/deallocating device memory */ - stream_checking_resource_adaptor(Upstream* upstream) : upstream_{upstream} + stream_checking_resource_adaptor(Upstream* upstream, + bool error_on_invalid_stream, + bool check_default_stream) + : upstream_{upstream}, + error_on_invalid_stream_{error_on_invalid_stream}, + check_default_stream_{check_default_stream} { CUDF_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); } @@ -87,7 +94,7 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res */ void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override { - verify_non_default_stream(stream); + verify_stream(stream); return upstream_->allocate(bytes, stream); } @@ -102,7 +109,7 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res */ void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) override { - verify_non_default_stream(stream); + verify_stream(stream); upstream_->deallocate(ptr, bytes, stream); } @@ -131,25 +138,44 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res */ std::pair do_get_mem_info(rmm::cuda_stream_view stream) const override { - verify_non_default_stream(stream); + verify_stream(stream); return upstream_->get_mem_info(stream); } /** - * @brief Throw an error if given one of CUDA's default stream specifiers. + * @brief Throw an error if the provided stream is invalid. + * + * A stream is invalid if: + * - check_default_stream_ is true and this function is passed one of CUDA's + * default stream specifiers, or + * - check_default_stream_ is false and this function is passed any stream + * other than the result of cudf::test::get_default_stream(). * - * @throws `std::runtime_error` if provided a default stream + * @throws `std::runtime_error` if provided an invalid stream */ - void verify_non_default_stream(rmm::cuda_stream_view const stream) const + void verify_stream(rmm::cuda_stream_view const stream) const { auto cstream{stream.value()}; - if (cstream == cudaStreamDefault || (cstream == cudaStreamLegacy) || - (cstream == cudaStreamPerThread)) { - throw std::runtime_error("Attempted to perform an operation on a default stream!"); + auto const invalid_stream = + check_default_stream_ ? ((cstream == cudaStreamDefault) || (cstream == cudaStreamLegacy) || + (cstream == cudaStreamPerThread)) + : (cstream != cudf::test::get_default_stream().value()); + + if (invalid_stream) { + if (error_on_invalid_stream_) { + throw std::runtime_error("Attempted to perform an operation on an unexpected stream!"); + } else { + std::cout << "Attempted to perform an operation on an unexpected stream!" << std::endl; + } } } - Upstream* upstream_; // the upstream resource used for satisfying allocation requests + Upstream* upstream_; // the upstream resource used for satisfying allocation requests + bool error_on_invalid_stream_; // If true, throw an exception when the wrong stream is detected. + // If false, simply print to stdout. + bool check_default_stream_; // If true, throw an exception when the default stream is observed. + // If false, throw an exception when anything other than + // cudf::test::get_default_stream() is observed. }; /** @@ -160,7 +186,9 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res * @param upstream Pointer to the upstream resource */ template -stream_checking_resource_adaptor make_stream_checking_resource_adaptor(Upstream* upstream) +stream_checking_resource_adaptor make_stream_checking_resource_adaptor( + Upstream* upstream, bool error_on_invalid_stream, bool check_default_stream) { - return stream_checking_resource_adaptor{upstream}; + return stream_checking_resource_adaptor{ + upstream, error_on_invalid_stream, check_default_stream}; } diff --git a/cpp/include/cudf_test/tdigest_utilities.cuh b/cpp/include/cudf_test/tdigest_utilities.cuh index ce45ad91be1..df1900bfa0c 100644 --- a/cpp/include/cudf_test/tdigest_utilities.cuh +++ b/cpp/include/cudf_test/tdigest_utilities.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,9 @@ #include #include +#include +#include + #include #include #include @@ -32,8 +35,6 @@ #include #include -#include - // for use with groupby and reduction aggregation tests. namespace cudf { @@ -168,7 +169,8 @@ void tdigest_minmax_compare(cudf::tdigest::tdigest_column_view const& tdv, // verify min/max thrust::host_vector> h_spans; h_spans.push_back({input_values.begin(), static_cast(input_values.size())}); - auto spans = cudf::detail::make_device_uvector_async(h_spans, cudf::get_default_stream()); + auto spans = cudf::detail::make_device_uvector_async( + h_spans, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto expected_min = cudf::make_fixed_width_column( data_type{type_id::FLOAT64}, spans.size(), mask_state::UNALLOCATED); @@ -267,7 +269,8 @@ void tdigest_simple_all_nulls_aggregation(Func op) static_cast(values).type(), tdigest_gen{}, op, values, delta); // NOTE: an empty tdigest column still has 1 row. - auto expected = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream()); + auto expected = cudf::tdigest::detail::make_empty_tdigest_column( + cudf::get_default_stream(), rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected); } @@ -558,9 +561,12 @@ template void tdigest_merge_empty(MergeFunc merge_op) { // 3 empty tdigests all in the same group - auto a = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream()); - auto b = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream()); - auto c = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream()); + auto a = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); + auto b = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); + auto c = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); std::vector cols; cols.push_back(*a); cols.push_back(*b); @@ -570,7 +576,8 @@ void tdigest_merge_empty(MergeFunc merge_op) auto const delta = 1000; auto result = merge_op(*values, delta); - auto expected = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream()); + auto expected = cudf::tdigest::detail::make_empty_tdigest_column( + cudf::get_default_stream(), rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *result); } diff --git a/cpp/include/nvtext/detail/tokenize.hpp b/cpp/include/nvtext/detail/tokenize.hpp index 38b49e63590..80a6edc496b 100644 --- a/cpp/include/nvtext/detail/tokenize.hpp +++ b/cpp/include/nvtext/detail/tokenize.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,12 +28,7 @@ namespace detail { * @copydoc nvtext::tokenize(strings_column_view const&,string_scalar * const&,rmm::mr::device_memory_resource*) * - * @param strings Strings column tokenize. - * @param delimiter UTF-8 characters used to separate each string into tokens. - * The default of empty string will separate tokens using whitespace. - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings columns of tokens. + * @param stream CUDA stream used for device memory operations and kernel launches */ std::unique_ptr tokenize(cudf::strings_column_view const& strings, cudf::string_scalar const& delimiter, @@ -44,11 +39,7 @@ std::unique_ptr tokenize(cudf::strings_column_view const& strings, * @copydoc nvtext::tokenize(strings_column_view const&,strings_column_view * const&,rmm::mr::device_memory_resource*) * - * @param strings Strings column to tokenize. - * @param delimiters Strings used to separate individual strings into tokens. - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings columns of tokens. + * @param stream CUDA stream used for device memory operations and kernel launches */ std::unique_ptr tokenize(cudf::strings_column_view const& strings, cudf::strings_column_view const& delimiters, @@ -59,12 +50,7 @@ std::unique_ptr tokenize(cudf::strings_column_view const& strings, * @copydoc nvtext::count_tokens(strings_column_view const&, string_scalar * const&,rmm::mr::device_memory_resource*) * - * @param strings Strings column to use for this operation. - * @param delimiter Strings used to separate each string into tokens. - * The default of empty string will separate tokens using whitespace. - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New INT32 column of token counts. + * @param stream CUDA stream used for device memory operations and kernel launches */ std::unique_ptr count_tokens(cudf::strings_column_view const& strings, cudf::string_scalar const& delimiter, @@ -75,11 +61,7 @@ std::unique_ptr count_tokens(cudf::strings_column_view const& stri * @copydoc nvtext::count_tokens(strings_column_view const&,strings_column_view * const&,rmm::mr::device_memory_resource*) * - * @param strings Strings column to use for this operation. - * @param delimiters Strings used to separate each string into tokens. - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New INT32 column of token counts. + * @param stream CUDA stream used for device memory operations and kernel launches */ std::unique_ptr count_tokens(cudf::strings_column_view const& strings, cudf::strings_column_view const& delimiters, diff --git a/cpp/include/nvtext/tokenize.hpp b/cpp/include/nvtext/tokenize.hpp index 10a9f746d76..a72f7dcfa59 100644 --- a/cpp/include/nvtext/tokenize.hpp +++ b/cpp/include/nvtext/tokenize.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -112,11 +112,11 @@ std::unique_ptr tokenize( * All null row entries are ignored and the output contains all valid rows. * The number of tokens for a null element is set to 0 in the output column. * - * @param strings Strings column to use for this operation. - * @param delimiter Strings used to separate each string into tokens. + * @param strings Strings column to use for this operation + * @param delimiter Strings used to separate each string into tokens; * The default of empty string will separate tokens using whitespace. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New INT32 column of token counts. + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of token counts */ std::unique_ptr count_tokens( cudf::strings_column_view const& strings, @@ -141,12 +141,12 @@ std::unique_ptr count_tokens( * All null row entries are ignored and the output contains all valid rows. * The number of tokens for a null element is set to 0 in the output column. * - * @throw cudf::logic_error if the delimiters column is empty or contains nulls. + * @throw cudf::logic_error if the delimiters column is empty or contains nulls * - * @param strings Strings column to use for this operation. - * @param delimiters Strings used to separate each string into tokens. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New INT32 column of token counts. + * @param strings Strings column to use for this operation + * @param delimiters Strings used to separate each string into tokens + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of token counts */ std::unique_ptr count_tokens( cudf::strings_column_view const& strings, diff --git a/cpp/libcudf_kafka/tests/CMakeLists.txt b/cpp/libcudf_kafka/tests/CMakeLists.txt index afa10f02c16..68a5327b455 100644 --- a/cpp/libcudf_kafka/tests/CMakeLists.txt +++ b/cpp/libcudf_kafka/tests/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -13,7 +13,12 @@ # ============================================================================= # ################################################################################################## -# * compiler function ----------------------------------------------------------------------------- +# enable testing ################################################################################ +# ################################################################################################## +enable_testing() + +include(rapids-test) +rapids_test_init() # This function takes in a test name and test source and handles setting all of the associated # properties and linking to build the test @@ -27,12 +32,12 @@ function(ConfigureTest test_name) target_link_libraries( ${test_name} PRIVATE GTest::gmock GTest::gmock_main GTest::gtest_main cudf_kafka ) - add_test(NAME ${test_name} COMMAND ${test_name}) - install( - TARGETS ${test_name} - COMPONENT testing - DESTINATION bin/gtests/libcudf_kafka - EXCLUDE_FROM_ALL + rapids_test_add( + NAME ${test_name} + COMMAND ${test_name} + GPUS 1 + PERCENT 25 + INSTALL_COMPONENT_SET testing ) endfunction() @@ -40,3 +45,5 @@ endfunction() # * Kafka host tests # ---------------------------------------------------------------------------------- ConfigureTest(KAFKA_HOST_TEST kafka_consumer_tests.cpp) + +rapids_test_install_relocatable(INSTALL_COMPONENT_SET testing DESTINATION bin/gtests/libcudf_kafka) diff --git a/cpp/scripts/sort_ninja_log.py b/cpp/scripts/sort_ninja_log.py index 9cb8afbff9f..3fe503f749e 100755 --- a/cpp/scripts/sort_ninja_log.py +++ b/cpp/scripts/sort_ninja_log.py @@ -1,10 +1,11 @@ # -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. # import argparse import os import sys import xml.etree.ElementTree as ET +from pathlib import Path from xml.dom import minidom parser = argparse.ArgumentParser() @@ -22,52 +23,50 @@ "--msg", type=str, default=None, - help="optional message to include in html output", + help="optional text file to include at the top of the html output", +) +parser.add_argument( + "--cmp_log", + type=str, + default=None, + help="optional baseline ninja_log to compare results", ) args = parser.parse_args() log_file = args.log_file -log_path = os.path.dirname(os.path.abspath(log_file)) - output_fmt = args.fmt +cmp_file = args.cmp_log # build a map of the log entries -entries = {} -with open(log_file) as log: - last = 0 - files = {} - for line in log: - entry = line.split() - if len(entry) > 4: - obj_file = entry[3] - file_size = ( - os.path.getsize(os.path.join(log_path, obj_file)) - if os.path.exists(obj_file) - else 0 - ) - start = int(entry[0]) - end = int(entry[1]) - # logic based on ninjatracing - if end < last: - files = {} - last = end - files.setdefault(entry[4], (entry[3], start, end, file_size)) - - # build entries from files dict - for entry in files.values(): - entries[entry[0]] = (entry[1], entry[2], entry[3]) - -# check file could be loaded and we have entries to report -if len(entries) == 0: - print("Could not parse", log_file) - exit() +def build_log_map(log_file): + entries = {} + log_path = os.path.dirname(os.path.abspath(log_file)) + with open(log_file) as log: + last = 0 + files = {} + for line in log: + entry = line.split() + if len(entry) > 4: + obj_file = entry[3] + file_size = ( + os.path.getsize(os.path.join(log_path, obj_file)) + if os.path.exists(obj_file) + else 0 + ) + start = int(entry[0]) + end = int(entry[1]) + # logic based on ninjatracing + if end < last: + files = {} + last = end + files.setdefault(entry[4], (entry[3], start, end, file_size)) + + # build entries from files dict + for entry in files.values(): + entries[entry[0]] = (entry[1], entry[2], entry[3]) + + return entries -# sort the entries by build-time (descending order) -sorted_list = sorted( - list(entries.keys()), - key=lambda k: entries[k][1] - entries[k][0], - reverse=True, -) # output results in XML format def output_xml(entries, sorted_list, args): @@ -147,14 +146,46 @@ def assign_entries_to_threads(entries): return (results, end_time) -# output chart results in HTML format -def output_html(entries, sorted_list, args): +# format the build-time +def format_build_time(input_time): + build_time = abs(input_time) + build_time_str = str(build_time) + " ms" + if build_time > 120000: # 2 minutes + minutes = int(build_time / 60000) + seconds = int(((build_time / 60000) - minutes) * 60) + build_time_str = "{:d}:{:02d} min".format(minutes, seconds) + elif build_time > 1000: + build_time_str = "{:.3f} s".format(build_time / 1000) + if input_time < 0: + build_time_str = "-" + build_time_str + return build_time_str + + +# format file size +def format_file_size(input_size): + file_size = abs(input_size) + file_size_str = "" + if file_size > 1000000: + file_size_str = "{:.3f} MB".format(file_size / 1000000) + elif file_size > 1000: + file_size_str = "{:.3f} KB".format(file_size / 1000) + elif file_size > 0: + file_size_str = str(file_size) + " bytes" + if input_size < 0: + file_size_str = "-" + file_size_str + return file_size_str + + +# Output chart results in HTML format +# Builds a standalone html file with no javascript or styles +def output_html(entries, sorted_list, cmp_entries, args): print("Build Metrics Report") - # Note: Jenkins does not support javascript nor style defined in the html - # https://www.jenkins.io/doc/book/security/configuring-content-security-policy/ print("") if args.msg is not None: - print("

", args.msg, "

") + msg_file = Path(args.msg) + if msg_file.is_file(): + msg = msg_file.read_text() + print("

", msg, "

") # map entries to threads # the end_time is used to scale all the entries to a fixed output width @@ -201,15 +232,8 @@ def output_html(entries, sorted_list, args): # adjust for the cellspacing prev_end = end + int(end_time / 500) - # format the build-time build_time = end - start - build_time_str = str(build_time) + " ms" - if build_time > 120000: # 2 minutes - minutes = int(build_time / 60000) - seconds = int(((build_time / 60000) - minutes) * 60) - build_time_str = "{:d}:{:02d} min".format(minutes, seconds) - elif build_time > 1000: - build_time_str = "{:.3f} s".format(build_time / 1000) + build_time_str = format_build_time(build_time) # assign color and accumulate legend values color = white @@ -248,7 +272,7 @@ def output_html(entries, sorted_list, args): # done with this entry print("") # update the entry with just the computed output info - entries[name] = (build_time_str, color, entry[2]) + entries[name] = (build_time, color, entry[2]) # add a filler column at the end of each row print("
") @@ -259,30 +283,53 @@ def output_html(entries, sorted_list, args): # output detail table in build-time descending order print("") print( - "", - "", - "", - sep="", + "", "", "", sep="" ) + if cmp_entries: + print("", sep="") + print("") + for name in sorted_list: entry = entries[name] - build_time_str = entry[0] + build_time = entry[0] color = entry[1] file_size = entry[2] - # format file size - file_size_str = "" - if file_size > 1000000: - file_size_str = "{:.3f} MB".format(file_size / 1000000) - elif file_size > 1000: - file_size_str = "{:.3f} KB".format(file_size / 1000) - elif file_size > 0: - file_size_str = str(file_size) + " bytes" + build_time_str = format_build_time(build_time) + file_size_str = format_file_size(file_size) # output entry row print("", sep="", end="") print("", sep="", end="") - print("", sep="") + print("", sep="", end="") + # output diff column + cmp_entry = ( + cmp_entries[name] if cmp_entries and name in cmp_entries else None + ) + if cmp_entry: + diff_time = build_time - (cmp_entry[1] - cmp_entry[0]) + diff_time_str = format_build_time(diff_time) + diff_color = white + diff_percent = int((diff_time / build_time) * 100) + if build_time > 60000: + if diff_percent > 20: + diff_color = red + diff_time_str = "" + diff_time_str + "" + elif diff_percent < -20: + diff_color = green + diff_time_str = "" + diff_time_str + "" + elif diff_percent > 0: + diff_color = yellow + print( + "", + sep="", + end="", + ) + print("") print("
FileCompile timeSize
FileCompile timeSizet-cmp
", name, "", build_time_str, "", file_size_str, "
", file_size_str, "", + diff_time_str, + "

") @@ -296,22 +343,62 @@ def output_html(entries, sorted_list, args): print("", summary["green"], "") print("time < 1 second") print("", summary["white"], "") - print("") + print("") + + if cmp_entries: + print("") + print("time increase > 20%") + print("time increase > 0") + print("time decrease > 20%") + print( + "time change < 20%% or build time < 1 minute", + ) + print("
") + + print("") # output results in CSV format -def output_csv(entries, sorted_list, args): - print("time,size,file") +def output_csv(entries, sorted_list, cmp_entries, args): + print("time,size,file", end="") + if cmp_entries: + print(",diff", end="") + print() for name in sorted_list: entry = entries[name] build_time = entry[1] - entry[0] file_size = entry[2] - print(build_time, file_size, name, sep=",") + cmp_entry = ( + cmp_entries[name] if cmp_entries and name in cmp_entries else None + ) + print(build_time, file_size, name, sep=",", end="") + if cmp_entry: + diff_time = build_time - (cmp_entry[1] - cmp_entry[0]) + print(",", diff_time, sep="", end="") + print() + + +# parse log file into map +entries = build_log_map(log_file) +if len(entries) == 0: + print("Could not parse", log_file) + exit() + +# sort the entries by build-time (descending order) +sorted_list = sorted( + list(entries.keys()), + key=lambda k: entries[k][1] - entries[k][0], + reverse=True, +) +# load the comparison build log if available +cmp_entries = build_log_map(cmp_file) if cmp_file else None if output_fmt == "xml": output_xml(entries, sorted_list, args) elif output_fmt == "html": - output_html(entries, sorted_list, args) + output_html(entries, sorted_list, cmp_entries, args) else: - output_csv(entries, sorted_list, args) + output_csv(entries, sorted_list, cmp_entries, args) diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index 07c53b3a421..2e6a643484e 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -605,6 +605,8 @@ template std::unique_ptr make_nunique_aggregation make_nunique_aggregation( null_policy null_handling); +template std::unique_ptr +make_nunique_aggregation(null_policy null_handling); /// Factory to create an NTH_ELEMENT aggregation template diff --git a/cpp/src/binaryop/compiled/binary_ops.hpp b/cpp/src/binaryop/compiled/binary_ops.hpp index c51993409ef..47fd50c5d97 100644 --- a/cpp/src/binaryop/compiled/binary_ops.hpp +++ b/cpp/src/binaryop/compiled/binary_ops.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,29 +32,26 @@ class mutable_column_device_view; namespace binops { namespace compiled { -std::unique_ptr string_null_min_max( - scalar const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr string_null_min_max(scalar const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); -std::unique_ptr string_null_min_max( - column_view const& lhs, - scalar const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr string_null_min_max(column_view const& lhs, + scalar const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); -std::unique_ptr string_null_min_max( - column_view const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr string_null_min_max(column_view const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Performs a binary operation between a string scalar and a string @@ -75,13 +72,12 @@ std::unique_ptr string_null_min_max( * @param mr Device memory resource used to allocate the returned column's device memory * @return std::unique_ptr Output column */ -std::unique_ptr binary_operation( - scalar const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr binary_operation(scalar const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Performs a binary operation between a string column and a string @@ -102,13 +98,12 @@ std::unique_ptr binary_operation( * @param mr Device memory resource used to allocate the returned column's device memory * @return std::unique_ptr Output column */ -std::unique_ptr binary_operation( - column_view const& lhs, - scalar const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr binary_operation(column_view const& lhs, + scalar const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Performs a binary operation between two string columns. @@ -128,13 +123,12 @@ std::unique_ptr binary_operation( * @param mr Device memory resource used to allocate the returned column's device memory * @return std::unique_ptr Output column */ -std::unique_ptr binary_operation( - column_view const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr binary_operation(column_view const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); void binary_operation(mutable_column_view& out, scalar const& lhs, diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh index 8418493318f..2299df5a9bb 100644 --- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh +++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh @@ -70,8 +70,8 @@ void apply_struct_binary_op(mutable_column_view& out, column_view const& rhs, bool is_lhs_scalar, bool is_rhs_scalar, - PhysicalElementComparator comparator = {}, - rmm::cuda_stream_view stream = cudf::get_default_stream()) + PhysicalElementComparator comparator, + rmm::cuda_stream_view stream) { auto const compare_orders = std::vector( lhs.size(), @@ -144,8 +144,8 @@ void apply_struct_equality_op(mutable_column_view& out, bool is_lhs_scalar, bool is_rhs_scalar, binary_operator op, - PhysicalEqualityComparator comparator = {}, - rmm::cuda_stream_view stream = cudf::get_default_stream()) + PhysicalEqualityComparator comparator, + rmm::cuda_stream_view stream) { CUDF_EXPECTS(op == binary_operator::EQUAL || op == binary_operator::NOT_EQUAL || op == binary_operator::NULL_EQUALS, diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index 5d36d70696c..6d6ef9fd7b0 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -76,7 +76,8 @@ auto create_device_views(host_span views, rmm::cuda_stream_vi std::back_inserter(device_views), [](auto const& col) { return *col; }); - auto d_views = make_device_uvector_async(device_views, stream); + auto d_views = + make_device_uvector_async(device_views, stream, rmm::mr::get_current_device_resource()); // Compute the partition offsets auto offsets = thrust::host_vector(views.size() + 1); @@ -87,7 +88,8 @@ auto create_device_views(host_span views, rmm::cuda_stream_vi std::next(offsets.begin()), [](auto const& col) { return col.size(); }, thrust::plus{}); - auto d_offsets = make_device_uvector_async(offsets, stream); + auto d_offsets = + make_device_uvector_async(offsets, stream, rmm::mr::get_current_device_resource()); auto const output_size = offsets.back(); return std::make_tuple( diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index 0978cf441d8..9ec00612f2f 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ #include #include +#include #include #include @@ -174,7 +175,8 @@ std::unique_ptr scatter_gather_based_if_else(cudf::column_view const& lh gather_map, out_of_bounds_policy::DONT_CHECK, negative_index_policy::NOT_ALLOWED, - stream); + stream, + rmm::mr::get_current_device_resource()); auto result = cudf::detail::scatter( table_view{std::vector{scatter_src_lhs->get_column(0).view()}}, diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index 5e76b4adbbe..cc12aaa1382 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,11 +37,10 @@ namespace { struct get_element_functor { template () && !is_fixed_point()>* p = nullptr> - std::unique_ptr operator()( - column_view const& input, - size_type index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::unique_ptr operator()(column_view const& input, + size_type index, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto s = make_fixed_width_scalar(data_type(type_to_id()), stream, mr); @@ -61,11 +60,10 @@ struct get_element_functor { } template >* p = nullptr> - std::unique_ptr operator()( - column_view const& input, - size_type index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::unique_ptr operator()(column_view const& input, + size_type index, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto device_col = column_device_view::create(input, stream); @@ -86,11 +84,10 @@ struct get_element_functor { } template >* p = nullptr> - std::unique_ptr operator()( - column_view const& input, - size_type index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::unique_ptr operator()(column_view const& input, + size_type index, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto dict_view = dictionary_column_view(input); auto indices_iter = detail::indexalator_factory::make_input_iterator(dict_view.indices()); @@ -122,11 +119,10 @@ struct get_element_functor { } template >* p = nullptr> - std::unique_ptr operator()( - column_view const& input, - size_type index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::unique_ptr operator()(column_view const& input, + size_type index, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { bool valid = is_element_valid_sync(input, index, stream); auto const child_col_idx = lists_column_view::child_column_index; @@ -147,11 +143,10 @@ struct get_element_functor { } template ()>* p = nullptr> - std::unique_ptr operator()( - column_view const& input, - size_type index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::unique_ptr operator()(column_view const& input, + size_type index, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { using Type = typename T::rep; @@ -178,11 +173,10 @@ struct get_element_functor { } template >* p = nullptr> - std::unique_ptr operator()( - column_view const& input, - size_type index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::unique_ptr operator()(column_view const& input, + size_type index, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { bool valid = is_element_valid_sync(input, index, stream); auto row_contents = diff --git a/cpp/src/copying/purge_nonempty_nulls.cu b/cpp/src/copying/purge_nonempty_nulls.cu index 5bdf10c8af6..20a8ce986aa 100644 --- a/cpp/src/copying/purge_nonempty_nulls.cu +++ b/cpp/src/copying/purge_nonempty_nulls.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,6 +38,8 @@ bool has_nonempty_null_rows(cudf::column_view const& input, rmm::cuda_stream_vie { if (not input.has_nulls()) { return false; } // No nulls => no dirty rows. + if ((input.size() == input.null_count()) && (input.num_children() == 0)) { return false; } + // Cross-reference nullmask and offsets. auto const type = input.type().id(); auto const offsets = (type == type_id::STRING) ? (strings_column_view{input}).offsets() diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index dd4912a216e..316f39b616c 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -253,7 +253,8 @@ struct column_scalar_scatterer_impl { auto scatter_functor = column_scalar_scatterer{}; auto fields_iter_begin = make_counting_transform_iterator(0, [&](auto const& i) { - auto row_slr = get_element(typed_s->view().column(i), 0, stream); + auto row_slr = + get_element(typed_s->view().column(i), 0, stream, rmm::mr::get_current_device_resource()); return type_dispatcher(row_slr->type(), scatter_functor, *row_slr, @@ -392,8 +393,8 @@ std::unique_ptr boolean_mask_scatter(column_view const& input, 0); // The scatter map is actually a table with only one column, which is scatter map. - auto scatter_map = - detail::apply_boolean_mask(table_view{{indices->view()}}, boolean_mask, stream); + auto scatter_map = detail::apply_boolean_mask( + table_view{{indices->view()}}, boolean_mask, stream, rmm::mr::get_current_device_resource()); auto output_table = detail::scatter( table_view{{input}}, scatter_map->get_column(0).view(), table_view{{target}}, stream, mr); diff --git a/cpp/src/io/orc/timezone.cpp b/cpp/src/datetime/timezone.cpp similarity index 79% rename from cpp/src/io/orc/timezone.cpp rename to cpp/src/datetime/timezone.cpp index 810dfe87320..55d68fe4a1a 100644 --- a/cpp/src/io/orc/timezone.cpp +++ b/cpp/src/datetime/timezone.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,22 +13,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "timezone.cuh" +#include +#include +#include #include +#include #include +#include #include namespace cudf { -namespace io { + +namespace { constexpr uint32_t tzif_magic = ('T' << 0) | ('Z' << 8) | ('i' << 16) | ('f' << 24); std::string const tzif_system_directory = "/usr/share/zoneinfo/"; -// Seconds from Jan 1st, 1970 to Jan 1st, 2015 -constexpr int64_t orc_utc_offset = 1420070400; - #pragma pack(push, 1) /** * @brief 32-bit TZif header @@ -127,12 +129,13 @@ struct timezone_file { "Number of transition times is larger than the file size."); } - timezone_file(std::string const& timezone_name) + timezone_file(std::optional tzif_dir, std::string_view timezone_name) { using std::ios_base; // Open the input file - std::string const tz_filename = tzif_system_directory + timezone_name; + auto const tz_filename = + std::filesystem::path{tzif_dir.value_or(tzif_system_directory)} / timezone_name; std::ifstream fin; fin.open(tz_filename, ios_base::in | ios_base::binary | ios_base::ate); CUDF_EXPECTS(fin, "Failed to open the timezone file."); @@ -373,45 +376,62 @@ static int64_t get_transition_time(dst_transition_s const& trans, int year) return trans.time + cuda::std::chrono::duration_cast(duration_D{day}).count(); } -timezone_table build_timezone_transition_table(std::string const& timezone_name, - rmm::cuda_stream_view stream) +} // namespace + +std::unique_ptr make_timezone_transition_table(std::optional tzif_dir, + std::string_view timezone_name, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::make_timezone_transition_table( + tzif_dir, timezone_name, cudf::get_default_stream(), mr); +} + +namespace detail { + +std::unique_ptr
make_timezone_transition_table(std::optional tzif_dir, + std::string_view timezone_name, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { if (timezone_name == "UTC" || timezone_name.empty()) { // Return an empty table for UTC - return {}; + return std::make_unique(); } - timezone_file const tzf(timezone_name); + timezone_file const tzf(tzif_dir, timezone_name); - std::vector ttimes(1); - std::vector offsets(1); + std::vector transition_times(1); + std::vector offsets(1); // One ancient rule entry, one per TZ file entry, 2 entries per year in the future cycle - ttimes.reserve(1 + tzf.timecnt() + cycle_entry_cnt); - offsets.reserve(1 + tzf.timecnt() + cycle_entry_cnt); + transition_times.reserve(1 + tzf.timecnt() + solar_cycle_entry_count); + offsets.reserve(1 + tzf.timecnt() + solar_cycle_entry_count); size_t earliest_std_idx = 0; for (size_t t = 0; t < tzf.timecnt(); t++) { auto const ttime = tzf.transition_times[t]; auto const idx = tzf.ttime_idx[t]; CUDF_EXPECTS(idx < tzf.typecnt(), "Out-of-range type index"); auto const utcoff = tzf.ttype[idx].utcoff; - ttimes.push_back(ttime); + transition_times.push_back(ttime); offsets.push_back(utcoff); - if (!earliest_std_idx && !tzf.ttype[idx].isdst) { earliest_std_idx = ttimes.size() - 1; } + if (!earliest_std_idx && !tzf.ttype[idx].isdst) { + earliest_std_idx = transition_times.size() - 1; + } } if (tzf.timecnt() != 0) { if (!earliest_std_idx) { earliest_std_idx = 1; } - ttimes[0] = ttimes[earliest_std_idx]; - offsets[0] = offsets[earliest_std_idx]; + transition_times[0] = transition_times[earliest_std_idx]; + offsets[0] = offsets[earliest_std_idx]; } else { if (tzf.typecnt() == 0 || tzf.ttype[0].utcoff == 0) { // No transitions, offset is zero; Table would be a no-op. // Return an empty table to speed up parsing. - return {}; + return std::make_unique(); } // No transitions to use for the time/offset - use the first offset and apply to all timestamps - ttimes[0] = std::numeric_limits::max(); - offsets[0] = tzf.ttype[0].utcoff; + transition_times[0] = std::numeric_limits::max(); + offsets[0] = tzf.ttype[0].utcoff; } // Generate entries for times after the last transition @@ -440,19 +460,19 @@ timezone_table build_timezone_transition_table(std::string const& timezone_name, // Add entries to fill the transition cycle int64_t year_timestamp = 0; - for (int32_t year = 1970; year < 1970 + cycle_years; ++year) { + for (int32_t year = 1970; year < 1970 + solar_cycle_years; ++year) { auto const dst_start_time = get_transition_time(dst_start, year); auto const dst_end_time = get_transition_time(dst_end, year); // Two entries per year, since there are two transitions - ttimes.push_back(year_timestamp + dst_start_time - future_std_offset); + transition_times.push_back(year_timestamp + dst_start_time - future_std_offset); offsets.push_back(future_dst_offset); - ttimes.push_back(year_timestamp + dst_end_time - future_dst_offset); + transition_times.push_back(year_timestamp + dst_end_time - future_dst_offset); offsets.push_back(future_std_offset); // Swap the newly added transitions if in descending order - if (ttimes.rbegin()[1] > ttimes.rbegin()[0]) { - std::swap(ttimes.rbegin()[0], ttimes.rbegin()[1]); + if (transition_times.rbegin()[1] > transition_times.rbegin()[0]) { + std::swap(transition_times.rbegin()[0], transition_times.rbegin()[1]); std::swap(offsets.rbegin()[0], offsets.rbegin()[1]); } @@ -461,13 +481,33 @@ timezone_table build_timezone_transition_table(std::string const& timezone_name, .count(); } - rmm::device_uvector d_ttimes = cudf::detail::make_device_uvector_async(ttimes, stream); - rmm::device_uvector d_offsets = cudf::detail::make_device_uvector_async(offsets, stream); - auto const gmt_offset = get_gmt_offset(ttimes, offsets, orc_utc_offset); + CUDF_EXPECTS(transition_times.size() == offsets.size(), + "Error reading TZif file for timezone " + std::string{timezone_name}); + + std::vector ttimes_typed; + ttimes_typed.reserve(transition_times.size()); + std::transform(transition_times.cbegin(), + transition_times.cend(), + std::back_inserter(ttimes_typed), + [](auto ts) { return timestamp_s{duration_s{ts}}; }); + std::vector offsets_typed; + offsets_typed.reserve(offsets.size()); + std::transform(offsets.cbegin(), offsets.cend(), std::back_inserter(offsets_typed), [](auto ts) { + return duration_s{ts}; + }); + + auto d_ttimes = cudf::detail::make_device_uvector_async(ttimes_typed, stream, mr); + auto d_offsets = cudf::detail::make_device_uvector_async(offsets_typed, stream, mr); + + std::vector> tz_table_columns; + tz_table_columns.emplace_back(std::make_unique(std::move(d_ttimes))); + tz_table_columns.emplace_back(std::make_unique(std::move(d_offsets))); + + // Need to finish copies before transition_times and offsets go out of scope stream.synchronize(); - return {gmt_offset, std::move(d_ttimes), std::move(d_offsets)}; + return std::make_unique(std::move(tz_table_columns)); } -} // namespace io +} // namespace detail } // namespace cudf diff --git a/cpp/src/dictionary/add_keys.cu b/cpp/src/dictionary/add_keys.cu index 486e7d2d24b..d543225d3eb 100644 --- a/cpp/src/dictionary/add_keys.cu +++ b/cpp/src/dictionary/add_keys.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,6 +30,8 @@ #include #include +#include + namespace cudf { namespace dictionary { namespace detail { @@ -54,8 +56,8 @@ std::unique_ptr add_keys(dictionary_column_view const& dictionary_column CUDF_EXPECTS(new_keys.type() == old_keys.type(), "Keys must be the same type"); // first, concatenate the keys together // [a,b,c,d,f] + [d,b,e] = [a,b,c,d,f,d,b,e] - auto combined_keys = - cudf::detail::concatenate(std::vector{old_keys, new_keys}, stream); + auto combined_keys = cudf::detail::concatenate( + std::vector{old_keys, new_keys}, stream, rmm::mr::get_current_device_resource()); // Drop duplicates from the combined keys, then sort the result. // sort(distinct([a,b,c,d,f,d,b,e])) = [a,b,c,d,e,f] diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu index d4f3a9ca495..98ad108655f 100644 --- a/cpp/src/dictionary/detail/concatenate.cu +++ b/cpp/src/dictionary/detail/concatenate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -114,7 +115,8 @@ struct compute_children_offsets_fn { [](auto lhs, auto rhs) { return offsets_pair{lhs.first + rhs.first, lhs.second + rhs.second}; }); - return cudf::detail::make_device_uvector_sync(offsets, stream); + return cudf::detail::make_device_uvector_sync( + offsets, stream, rmm::mr::get_current_device_resource()); } private: @@ -219,7 +221,8 @@ std::unique_ptr concatenate(host_span columns, CUDF_EXPECTS(keys.type() == keys_type, "key types of all dictionary columns must match"); return keys; }); - auto all_keys = cudf::detail::concatenate(keys_views, stream); + auto all_keys = + cudf::detail::concatenate(keys_views, stream, rmm::mr::get_current_device_resource()); // sort keys and remove duplicates; // this becomes the keys child for the output dictionary column diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index 075fb6115e3..36f5021d305 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -181,7 +181,7 @@ std::vector> match_dictionaries( { std::vector keys(input.size()); std::transform(input.begin(), input.end(), keys.begin(), [](auto& col) { return col.keys(); }); - auto new_keys = cudf::detail::concatenate(keys, stream); + auto new_keys = cudf::detail::concatenate(keys, stream, rmm::mr::get_current_device_resource()); auto keys_view = new_keys->view(); std::vector> result(input.size()); std::transform(input.begin(), input.end(), result.begin(), [keys_view, mr, stream](auto& col) { diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu index ecd66f1b0c9..a747cc195ae 100644 --- a/cpp/src/filling/fill.cu +++ b/cpp/src/filling/fill.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -104,11 +104,10 @@ struct out_of_place_fill_range_dispatch { template () or cudf::is_fixed_point())> - std::unique_ptr operator()( - cudf::size_type begin, - cudf::size_type end, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::unique_ptr operator()(cudf::size_type begin, + cudf::size_type end, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(input.type() == value.type(), "Data type mismatch."); auto p_ret = std::make_unique(input, stream, mr); diff --git a/cpp/src/filling/sequence.cu b/cpp/src/filling/sequence.cu index 284e7c46347..b4bab369c61 100644 --- a/cpp/src/filling/sequence.cu +++ b/cpp/src/filling/sequence.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -134,11 +134,10 @@ std::unique_ptr sequence(size_type size, return type_dispatcher(init.type(), sequence_functor{}, size, init, step, stream, mr); } -std::unique_ptr sequence( - size_type size, - scalar const& init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr sequence(size_type size, + scalar const& init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(size >= 0, "size must be >= 0"); CUDF_EXPECTS(is_numeric(init.type()), "init scalar type must be numeric"); diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index 1979108eaa2..df590c0c4b9 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -37,6 +37,7 @@ #include #include +#include #include @@ -305,7 +306,8 @@ std::pair, std::unique_ptr
> groupby::shift( thrust::make_counting_iterator(values.num_columns()), std::back_inserter(results), [&](size_type i) { - auto grouped_values = helper().grouped_values(values.column(i), stream); + auto grouped_values = + helper().grouped_values(values.column(i), stream, rmm::mr::get_current_device_resource()); return cudf::detail::segmented_shift( grouped_values->view(), group_offsets, offsets[i], fill_values[i].get(), stream, mr); }); diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 72ac6255549..6c55b1438ee 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -481,12 +481,15 @@ void compute_single_pass_aggs(table_view const& keys, // prepare to launch kernel to do the actual aggregation auto d_sparse_table = mutable_table_device_view::create(sparse_table, stream); auto d_values = table_device_view::create(flattened_values, stream); - auto const d_aggs = cudf::detail::make_device_uvector_async(agg_kinds, stream); + auto const d_aggs = cudf::detail::make_device_uvector_async( + agg_kinds, stream, rmm::mr::get_current_device_resource()); auto const skip_key_rows_with_nulls = keys_have_nulls and include_null_keys == null_policy::EXCLUDE; auto row_bitmask = - skip_key_rows_with_nulls ? cudf::detail::bitmask_and(keys, stream).first : rmm::device_buffer{}; + skip_key_rows_with_nulls + ? cudf::detail::bitmask_and(keys, stream, rmm::mr::get_current_device_resource()).first + : rmm::device_buffer{}; thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), diff --git a/cpp/src/groupby/sort/functors.hpp b/cpp/src/groupby/sort/functors.hpp index bcc190c745b..be36956b929 100644 --- a/cpp/src/groupby/sort/functors.hpp +++ b/cpp/src/groupby/sort/functors.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -78,7 +78,7 @@ struct store_result_functor { // It's overridden in scan implementation. return sorted_values->view(); else - return (grouped_values = helper.grouped_values(values, stream))->view(); + return (grouped_values = helper.grouped_values(values, stream, mr))->view(); }; /** @@ -90,7 +90,7 @@ struct store_result_functor { column_view get_sorted_values() { return sorted_values ? sorted_values->view() - : (sorted_values = helper.sorted_values(values, stream))->view(); + : (sorted_values = helper.sorted_values(values, stream, mr))->view(); }; protected: diff --git a/cpp/src/groupby/sort/group_m2.cu b/cpp/src/groupby/sort/group_m2.cu index edc8b089120..70b05100fb0 100644 --- a/cpp/src/groupby/sort/group_m2.cu +++ b/cpp/src/groupby/sort/group_m2.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,10 +25,12 @@ #include #include +#include #include #include #include +#include namespace cudf { namespace groupby { @@ -62,15 +64,19 @@ void compute_m2_fn(column_device_view const& values, ResultType* d_result, rmm::cuda_stream_view stream) { - auto const var_iter = cudf::detail::make_counting_transform_iterator( - size_type{0}, - m2_transform{ - values, values_iter, d_means, group_labels.data()}); + auto m2_fn = m2_transform{ + values, values_iter, d_means, group_labels.data()}; + auto const itr = thrust::counting_iterator(0); + // Using a temporary buffer for intermediate transform results instead of + // using the transform-iterator directly in thrust::reduce_by_key + // improves compile-time significantly. + auto m2_vals = rmm::device_uvector(values.size(), stream); + thrust::transform(rmm::exec_policy(stream), itr, itr + values.size(), m2_vals.begin(), m2_fn); thrust::reduce_by_key(rmm::exec_policy(stream), group_labels.begin(), group_labels.end(), - var_iter, + m2_vals.begin(), thrust::make_discard_iterator(), d_result); } diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu index cf81253483e..1a5f1691d5b 100644 --- a/cpp/src/groupby/sort/group_nunique.cu +++ b/cpp/src/groupby/sort/group_nunique.cu @@ -94,21 +94,20 @@ std::unique_ptr group_nunique(column_view const& values, auto const d_values_view = column_device_view::create(values, stream); + auto d_result = rmm::device_uvector(group_labels.size(), stream); + auto const comparator_helper = [&](auto const d_equal) { - auto const is_unique_iterator = - thrust::make_transform_iterator(thrust::counting_iterator(0), - is_unique_iterator_fn{nullate::DYNAMIC{values.has_nulls()}, - *d_values_view, - d_equal, - null_handling, - group_offsets.data(), - group_labels.data()}); - thrust::reduce_by_key(rmm::exec_policy(stream), - group_labels.begin(), - group_labels.end(), - is_unique_iterator, - thrust::make_discard_iterator(), - result->mutable_view().begin()); + auto fn = is_unique_iterator_fn{nullate::DYNAMIC{values.has_nulls()}, + *d_values_view, + d_equal, + null_handling, + group_offsets.data(), + group_labels.data()}; + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(values.size()), + d_result.begin(), + fn); }; if (cudf::detail::has_nested_columns(values_view)) { @@ -121,6 +120,15 @@ std::unique_ptr group_nunique(column_view const& values, comparator_helper(d_equal); } + // calling this with a vector instead of a transform iterator is 10x faster to compile; + // it also helps that we are only calling it once for both conditions + thrust::reduce_by_key(rmm::exec_policy(stream), + group_labels.begin(), + group_labels.end(), + d_result.begin(), + thrust::make_discard_iterator(), + result->mutable_view().begin()); + return result; } diff --git a/cpp/src/groupby/sort/group_quantiles.cu b/cpp/src/groupby/sort/group_quantiles.cu index 90ca5a5c90e..a9edcfecbf7 100644 --- a/cpp/src/groupby/sort/group_quantiles.cu +++ b/cpp/src/groupby/sort/group_quantiles.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -156,7 +156,8 @@ std::unique_ptr group_quantiles(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto dv_quantiles = cudf::detail::make_device_uvector_async(quantiles, stream); + auto dv_quantiles = cudf::detail::make_device_uvector_async( + quantiles, stream, rmm::mr::get_current_device_resource()); auto values_type = cudf::is_dictionary(values.type()) ? dictionary_column_view(values).keys().type() diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index cb954e614f2..f12efd3cd24 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -124,7 +124,7 @@ struct group_scan_functor() make_null_replacement_iterator(*values_view, OpType::template identity()), thrust::identity{}); do_scan(input, result_view->begin(), OpType{}); - result->set_null_mask(cudf::detail::copy_bitmask(values, stream)); + result->set_null_mask(cudf::detail::copy_bitmask(values, stream, mr)); } else { auto input = thrust::make_transform_iterator(values_view->begin(), thrust::identity{}); @@ -175,7 +175,7 @@ struct group_scan_functorset_null_mask(cudf::detail::copy_bitmask(values, stream), values.null_count()); + results->set_null_mask(cudf::detail::copy_bitmask(values, stream, mr), values.null_count()); return results; } }; diff --git a/cpp/src/groupby/sort/group_std.cu b/cpp/src/groupby/sort/group_std.cu index a3efc1f172a..8cd2d8baf4e 100644 --- a/cpp/src/groupby/sort/group_std.cu +++ b/cpp/src/groupby/sort/group_std.cu @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -33,6 +34,7 @@ #include #include #include +#include namespace cudf { namespace groupby { @@ -48,7 +50,7 @@ struct var_transform { size_type const* d_group_labels; size_type ddof; - __device__ ResultType operator()(size_type i) + __device__ ResultType operator()(size_type i) const { if (d_values.is_null(i)) return 0.0; @@ -75,15 +77,19 @@ void reduce_by_key_fn(column_device_view const& values, ResultType* d_result, rmm::cuda_stream_view stream) { - auto var_iter = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - var_transform{ - values, values_iter, d_means, d_group_sizes, group_labels.data(), ddof}); + auto var_fn = var_transform{ + values, values_iter, d_means, d_group_sizes, group_labels.data(), ddof}; + auto const itr = thrust::make_counting_iterator(0); + // Using a temporary buffer for intermediate transform results instead of + // using the transform-iterator directly in thrust::reduce_by_key + // improves compile-time significantly. + auto vars = rmm::device_uvector(values.size(), stream); + thrust::transform(rmm::exec_policy(stream), itr, itr + values.size(), vars.begin(), var_fn); thrust::reduce_by_key(rmm::exec_policy(stream), group_labels.begin(), group_labels.end(), - var_iter, + vars.begin(), thrust::make_discard_iterator(), d_result); } diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index 743ca5e8065..820dc8a3077 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,7 +69,7 @@ struct scan_result_functor final : store_result_functor { if (grouped_values) return grouped_values->view(); else - return (grouped_values = helper.grouped_values(values, stream))->view(); + return (grouped_values = helper.grouped_values(values, stream, mr))->view(); }; }; @@ -129,8 +129,10 @@ void scan_result_functor::operator()(aggregation const& agg) auto const group_labels_view = column_view(cudf::device_span(group_labels)); auto const gather_map = [&]() { if (is_presorted()) { // assumes both keys and values are sorted, Spark does this. - return cudf::detail::sequence( - group_labels.size(), *cudf::make_fixed_width_scalar(size_type{0}, stream), stream); + return cudf::detail::sequence(group_labels.size(), + *cudf::make_fixed_width_scalar(size_type{0}, stream), + stream, + rmm::mr::get_current_device_resource()); } else { auto sort_order = (rank_agg._method == rank_method::FIRST ? cudf::detail::stable_sorted_order : cudf::detail::sorted_order); diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu index ebafcd75e6d..5b5a6356d67 100644 --- a/cpp/src/groupby/sort/sort_helper.cu +++ b/cpp/src/groupby/sort/sort_helper.cu @@ -16,6 +16,8 @@ #include "common_utils.cuh" +#include + #include #include #include @@ -144,7 +146,8 @@ sort_groupby_helper::index_vector const& sort_groupby_helper::group_offsets( { if (_group_offsets) return *_group_offsets; - _group_offsets = std::make_unique(num_keys(stream) + 1, stream); + auto const size = num_keys(stream); + _group_offsets = std::make_unique(size + 1, stream); auto const comparator = cudf::experimental::row::equality::self_comparator{_keys, stream}; @@ -154,23 +157,33 @@ sort_groupby_helper::index_vector const& sort_groupby_helper::group_offsets( if (cudf::detail::has_nested_columns(_keys)) { auto const d_key_equal = comparator.equal_to( cudf::nullate::DYNAMIC{cudf::has_nested_nulls(_keys)}, null_equality::EQUAL); - result_end = thrust::unique_copy(rmm::exec_policy(stream), - thrust::counting_iterator(0), - thrust::counting_iterator(num_keys(stream)), - _group_offsets->begin(), - permuted_row_equality_comparator(d_key_equal, sorted_order)); + // Using a temporary buffer for intermediate transform results from the iterator containing + // the comparator speeds up compile-time significantly without much degradation in + // runtime performance over using the comparator directly in thrust::unique_copy. + auto result = rmm::device_uvector(size, stream); + auto const itr = thrust::make_counting_iterator(0); + auto const row_eq = permuted_row_equality_comparator(d_key_equal, sorted_order); + auto const ufn = cudf::detail::unique_copy_fn{ + itr, duplicate_keep_option::KEEP_FIRST, row_eq, size - 1}; + thrust::transform(rmm::exec_policy(stream), itr, itr + size, result.begin(), ufn); + result_end = thrust::copy_if(rmm::exec_policy(stream), + itr, + itr + size, + result.begin(), + _group_offsets->begin(), + thrust::identity{}); } else { auto const d_key_equal = comparator.equal_to( cudf::nullate::DYNAMIC{cudf::has_nested_nulls(_keys)}, null_equality::EQUAL); result_end = thrust::unique_copy(rmm::exec_policy(stream), thrust::counting_iterator(0), - thrust::counting_iterator(num_keys(stream)), + thrust::counting_iterator(size), _group_offsets->begin(), permuted_row_equality_comparator(d_key_equal, sorted_order)); } size_type num_groups = thrust::distance(_group_offsets->begin(), result_end); - _group_offsets->set_element(num_groups, num_keys(stream), stream); + _group_offsets->set_element(num_groups, size, stream); _group_offsets->resize(num_groups + 1, stream); return *_group_offsets; @@ -223,7 +236,8 @@ column_view sort_groupby_helper::keys_bitmask_column(rmm::cuda_stream_view strea { if (_keys_bitmask_column) return _keys_bitmask_column->view(); - auto [row_bitmask, null_count] = cudf::detail::bitmask_and(_keys, stream); + auto [row_bitmask, null_count] = + cudf::detail::bitmask_and(_keys, stream, rmm::mr::get_current_device_resource()); _keys_bitmask_column = make_numeric_column( data_type(type_id::INT8), _keys.num_rows(), std::move(row_bitmask), null_count, stream); diff --git a/cpp/src/hash/unordered_multiset.cuh b/cpp/src/hash/unordered_multiset.cuh index c017fd43079..55036bec6a6 100644 --- a/cpp/src/hash/unordered_multiset.cuh +++ b/cpp/src/hash/unordered_multiset.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -84,10 +84,10 @@ class unordered_multiset { auto d_column = column_device_view::create(col, stream); auto d_col = *d_column; - auto hash_bins_start = - cudf::detail::make_zeroed_device_uvector_async(2 * d_col.size() + 1, stream); - auto hash_bins_end = - cudf::detail::make_zeroed_device_uvector_async(2 * d_col.size() + 1, stream); + auto hash_bins_start = cudf::detail::make_zeroed_device_uvector_async( + 2 * d_col.size() + 1, stream, rmm::mr::get_current_device_resource()); + auto hash_bins_end = cudf::detail::make_zeroed_device_uvector_async( + 2 * d_col.size() + 1, stream, rmm::mr::get_current_device_resource()); auto hash_data = rmm::device_uvector(d_col.size(), stream); Hasher hasher; diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index 861b5b0fba4..7f88019beb2 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -215,7 +215,7 @@ std::shared_ptr dispatch_to_arrow::operator()(column_view in arrow::MemoryPool* ar_mr, rmm::cuda_stream_view stream) { - auto bitmask = bools_to_mask(input, stream); + auto bitmask = bools_to_mask(input, stream, rmm::mr::get_current_device_resource()); auto data_buffer = allocate_arrow_buffer(static_cast(bitmask.first->size()), ar_mr); diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp index 48c458109c1..aa0e36d9972 100644 --- a/cpp/src/io/avro/avro.cpp +++ b/cpp/src/io/avro/avro.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -124,9 +124,11 @@ bool container::parse(file_metadata* md, size_t max_num_rows, size_t first_row) md->total_data_size = m_cur - (m_base + md->metadata_size); // Extract columns for (size_t i = 0; i < md->schema.size(); i++) { - type_kind_e kind = md->schema[i].kind; - if (kind > type_null && kind < type_record) { - // Primitive type column + type_kind_e kind = md->schema[i].kind; + logicaltype_kind_e logical_kind = md->schema[i].logical_kind; + + bool is_supported_kind = ((kind > type_null) && (kind < type_record)); + if (is_supported_logical_type(logical_kind) || is_supported_kind) { column_desc col; int parent_idx = md->schema[i].parent_idx; col.schema_data_idx = (int32_t)i; @@ -141,7 +143,9 @@ bool container::parse(file_metadata* md, size_t max_num_rows, size_t first_row) --num_children) { int skip = 1; if (pos == i) { - col.parent_union_idx = md->schema[parent_idx].num_children - num_children; + // parent_idx will always be pointing to our immediate parent + // union at this point. + col.parent_union_idx = parent_idx; } else if (md->schema[pos].kind == type_null) { col.schema_null_idx = pos; break; @@ -152,7 +156,9 @@ bool container::parse(file_metadata* md, size_t max_num_rows, size_t first_row) } while (skip != 0); } } - // Ignore the root or array entries + // We want to "inherit" the column name from our parent union's + // name, as long as we're not dealing with the root (parent_idx == 0) + // or array entries. if ((parent_idx != 0 && md->schema[parent_idx].kind != type_array) || col.name.length() == 0) { if (col.name.length() > 0) { col.name.insert(0, 1, '.'); } @@ -179,13 +185,14 @@ enum json_state_e { state_nextsymbol, }; -enum { +enum attrtype_e { attrtype_none = -1, attrtype_type = 0, attrtype_name, attrtype_fields, attrtype_symbols, attrtype_items, + attrtype_logicaltype, }; /** @@ -205,26 +212,40 @@ bool schema_parser::parse(std::vector& schema, const std::string& int depth = 0, parent_idx = -1, entry_idx = -1; json_state_e state = state_attrname; std::string str; - const std::unordered_map typenames = {{"null", type_null}, - {"boolean", type_boolean}, - {"int", type_int}, - {"long", type_long}, - {"float", type_float}, - {"double", type_double}, - {"bytes", type_bytes}, - {"string", type_string}, - {"record", type_record}, - {"enum", type_enum}, - {"array", type_array}}; - const std::unordered_map attrnames = {{"type", attrtype_type}, - {"name", attrtype_name}, - {"fields", attrtype_fields}, - {"symbols", attrtype_symbols}, - {"items", attrtype_items}}; - int cur_attr = attrtype_none; - m_base = json_str.c_str(); - m_cur = m_base; - m_end = m_base + json_str.length(); + const std::unordered_map typenames = { + {"null", type_null}, + {"boolean", type_boolean}, + {"int", type_int}, + {"long", type_long}, + {"float", type_float}, + {"double", type_double}, + {"bytes", type_bytes}, + {"string", type_string}, + {"record", type_record}, + {"enum", type_enum}, + {"array", type_array}, + {"union", type_union}, + {"fixed", type_fixed}, + {"decimal", type_decimal}, + {"date", type_date}, + {"time-millis", type_time_millis}, + {"time-micros", type_time_micros}, + {"timestamp-millis", type_timestamp_millis}, + {"timestamp-micros", type_timestamp_micros}, + {"local-timestamp-millis", type_local_timestamp_millis}, + {"local-timestamp-micros", type_local_timestamp_micros}, + {"duration", type_duration}}; + const std::unordered_map attrnames = { + {"type", attrtype_type}, + {"name", attrtype_name}, + {"fields", attrtype_fields}, + {"symbols", attrtype_symbols}, + {"items", attrtype_items}, + {"logicalType", attrtype_logicaltype}}; + attrtype_e cur_attr = attrtype_none; + m_base = json_str.c_str(); + m_cur = m_base; + m_end = m_base + json_str.length(); while (more_data()) { int c = *m_cur++; switch (c) { @@ -250,6 +271,10 @@ bool schema_parser::parse(std::vector& schema, const std::string& auto t = typenames.find(str); if (t == typenames.end()) return false; schema[entry_idx].kind = t->second; + } else if (cur_attr == attrtype_logicaltype) { + auto t = typenames.find(str); + if (t == typenames.end()) return false; + schema[entry_idx].logical_kind = static_cast(t->second); } else if (cur_attr == attrtype_name) { if (entry_idx < 0) return false; schema[entry_idx].name = std::move(str); diff --git a/cpp/src/io/avro/avro.hpp b/cpp/src/io/avro/avro.hpp index 1ca50f04d18..ef294893e4b 100644 --- a/cpp/src/io/avro/avro.hpp +++ b/cpp/src/io/avro/avro.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,7 +42,8 @@ struct schema_entry { int32_t parent_idx = -1; // index of parent entry in schema array, negative if no parent int32_t num_children = 0; type_kind_e kind = type_not_set; - std::string name = ""; + logicaltype_kind_e logical_kind = logicaltype_not_set; + std::string name = ""; std::vector symbols; }; diff --git a/cpp/src/io/avro/avro_common.hpp b/cpp/src/io/avro/avro_common.hpp index 229ffa5da04..a3025650ae9 100644 --- a/cpp/src/io/avro/avro_common.hpp +++ b/cpp/src/io/avro/avro_common.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,8 +56,75 @@ enum type_kind_e { type_record, type_union, type_array, + type_fixed, + // Logical types + type_decimal, + type_uuid, + type_date, + type_time_millis, + type_time_micros, + type_timestamp_millis, + type_timestamp_micros, + type_local_timestamp_millis, + type_local_timestamp_micros, + type_duration, }; +enum logicaltype_kind_e { + logicaltype_not_set = 0, + // N.B. We intentionally mirror the logicaltype enum values with their + // equivalent type enum value, as this allows us to cast the type + // value directly to a logical type without an intermediate + // mapping step, and vice versa, e.g.: + // + // auto kind = type_date; + // auto logical_kind = static_cast(type_date); + // // logical_kind == logicaltype_kind_e::logicaltype_date + // + // And: + // + // auto logical_kind = logicaltype_date; + // auto kind = static_cast(logical_kind); + // // kind == type_kind_e::type_date + // + logicaltype_decimal = type_decimal, + logicaltype_uuid, + logicaltype_date, + logicaltype_time_millis, + logicaltype_time_micros, + logicaltype_timestamp_millis, + logicaltype_timestamp_micros, + logicaltype_local_timestamp_millis, + logicaltype_local_timestamp_micros, + logicaltype_duration, +}; + +/** + * @brief Determines if the supplied logical type is currently supported. + * + * @param[in] logical_kind Supplies the logicaltype_kind_e enum value. + * + * @return true if the logical type is supported, false otherwise. + */ +inline constexpr bool is_supported_logical_type(logicaltype_kind_e logical_kind) +{ + switch (logical_kind) { + case logicaltype_date: return true; + + case logicaltype_not_set: [[fallthrough]]; + case logicaltype_decimal: [[fallthrough]]; + case logicaltype_uuid: [[fallthrough]]; + case logicaltype_time_millis: [[fallthrough]]; + case logicaltype_time_micros: [[fallthrough]]; + case logicaltype_timestamp_millis: [[fallthrough]]; + case logicaltype_timestamp_micros: [[fallthrough]]; + case logicaltype_local_timestamp_millis: [[fallthrough]]; + case logicaltype_local_timestamp_micros: [[fallthrough]]; + case logicaltype_duration: [[fallthrough]]; + default: return false; + } +} + using cudf::io::detail::string_index_pair; } // namespace avro diff --git a/cpp/src/io/avro/avro_gpu.cu b/cpp/src/io/avro/avro_gpu.cu index 03edb7ed6cb..64c572424e0 100644 --- a/cpp/src/io/avro/avro_gpu.cu +++ b/cpp/src/io/avro/avro_gpu.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -78,8 +78,11 @@ avro_decode_row(schemadesc_s const* schema, uint32_t array_start = 0, array_repeat_count = 0; int array_children = 0; for (uint32_t i = 0; i < schema_len;) { - uint32_t kind = schema[i].kind; - int skip = 0; + type_kind_e kind = schema[i].kind; + logicaltype_kind_e logical_kind = schema[i].logical_kind; + int skip = 0; + + if (is_supported_logical_type(logical_kind)) { kind = static_cast(logical_kind); } if (kind == type_union) { int skip_after; @@ -93,7 +96,11 @@ avro_decode_row(schemadesc_s const* schema, --skip; } if (i >= schema_len || skip_after < 0) break; - kind = schema[i].kind; + kind = schema[i].kind; + logical_kind = schema[i].logical_kind; + if (is_supported_logical_type(logical_kind)) { + kind = static_cast(logical_kind); + } skip = skip_after; } @@ -106,36 +113,38 @@ avro_decode_row(schemadesc_s const* schema, } break; - case type_int: - case type_long: - case type_bytes: - case type_string: - case type_enum: { + case type_int: { int64_t v = avro_decode_zigzag_varint(cur, end); - if (kind == type_int) { - if (dataptr != nullptr && row < max_rows) { - static_cast(dataptr)[row] = static_cast(v); - } - } else if (kind == type_long) { - if (dataptr != nullptr && row < max_rows) { static_cast(dataptr)[row] = v; } - } else { // string or enum - size_t count = 0; - const char* ptr = nullptr; - if (kind == type_enum) { // dictionary - size_t idx = schema[i].count + v; - if (idx < global_dictionary.size()) { - ptr = global_dictionary[idx].first; - count = global_dictionary[idx].second; - } - } else if (v >= 0 && cur + v <= end) { // string - ptr = reinterpret_cast(cur); - count = (size_t)v; - cur += count; - } - if (dataptr != nullptr && row < max_rows) { - static_cast(dataptr)[row].first = ptr; - static_cast(dataptr)[row].second = count; + if (dataptr != nullptr && row < max_rows) { + static_cast(dataptr)[row] = static_cast(v); + } + } break; + + case type_long: { + int64_t v = avro_decode_zigzag_varint(cur, end); + if (dataptr != nullptr && row < max_rows) { static_cast(dataptr)[row] = v; } + } break; + + case type_bytes: [[fallthrough]]; + case type_string: [[fallthrough]]; + case type_enum: { + int64_t v = avro_decode_zigzag_varint(cur, end); + size_t count = 0; + const char* ptr = nullptr; + if (kind == type_enum) { // dictionary + size_t idx = schema[i].count + v; + if (idx < global_dictionary.size()) { + ptr = global_dictionary[idx].first; + count = global_dictionary[idx].second; } + } else if (v >= 0 && cur + v <= end) { // string or bytes + ptr = reinterpret_cast(cur); + count = (size_t)v; + cur += count; + } + if (dataptr != nullptr && row < max_rows) { + static_cast(dataptr)[row].first = ptr; + static_cast(dataptr)[row].second = count; } } break; @@ -190,7 +199,48 @@ avro_decode_row(schemadesc_s const* schema, skip += schema[i].count; // Should always be 1 } } break; + + case type_duration: { + // A duration logical type annotates Avro fixed type of size 12, which + // stores three little-endian unsigned integers that represent durations + // at different granularities of time. The first stores a number in + // months, the second stores a number in days, and the third stores a + // number in milliseconds. + CUDF_UNREACHABLE("avro type 'duration' not yet implemented"); + } break; + + // N.B. These aren't handled yet, see the discussion on + // https://github.com/rapidsai/cudf/pull/12788. The decoding logic + // is correct, though, so there's no harm in having them here. + case type_timestamp_millis: [[fallthrough]]; + case type_timestamp_micros: [[fallthrough]]; + case type_local_timestamp_millis: [[fallthrough]]; + case type_local_timestamp_micros: [[fallthrough]]; + case type_time_millis: [[fallthrough]]; + case type_time_micros: { + // N.B. time-millis is stored as a 32-bit int, however, cudf expects an + // int64 for DURATION_MILLISECONDS. From our perspective, the fact + // that time-millis comes from a 32-bit int is hidden from us by + // way of the zig-zag varint encoding, so we can safely treat them + // both as int64_t. Everything else is 64-bit in both avro and + // cudf. + CUDF_UNREACHABLE("avro time/timestamp types not yet implemented"); + // + // When we do implement these, the following decoding logic should + // be correct: + // + // int64_t v = avro_decode_zigzag_varint(cur, end); + // if (dataptr != nullptr && row < max_rows) { static_cast(dataptr)[row] = v; } + } break; + + case type_date: { + int64_t v = avro_decode_zigzag_varint(cur, end); + if (dataptr != nullptr && row < max_rows) { + static_cast(dataptr)[row] = static_cast(v); + } + } break; } + if (array_repeat_count != 0) { array_children--; if (schema[i].kind >= type_record) { array_children += schema[i].count; } diff --git a/cpp/src/io/avro/avro_gpu.hpp b/cpp/src/io/avro/avro_gpu.hpp index 7bfb3a75250..6575d76d8d9 100644 --- a/cpp/src/io/avro/avro_gpu.hpp +++ b/cpp/src/io/avro/avro_gpu.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,7 +30,8 @@ namespace gpu { * @brief Struct to describe the avro schema */ struct schemadesc_s { - uint32_t kind; // avro type kind + cudf::io::avro::type_kind_e kind; // avro type kind + cudf::io::avro::logicaltype_kind_e logical_kind; // avro logicaltype kind uint32_t count; // for records/unions: number of following child columns, for nulls: global // null_count, for enums: dictionary ofs void* dataptr; // Ptr to column data, or null if column not selected diff --git a/cpp/src/io/avro/reader_impl.cu b/cpp/src/io/avro/reader_impl.cu index d9da2f083d1..60a1b4263b2 100644 --- a/cpp/src/io/avro/reader_impl.cu +++ b/cpp/src/io/avro/reader_impl.cu @@ -66,15 +66,42 @@ namespace { */ type_id to_type_id(avro::schema_entry const* col) { - switch (col->kind) { + avro::type_kind_e kind; + + // N.B. The switch statement seems a bit ridiculous for a single type, but the + // plan is to incrementally add more types to it as support is added for + // them in the future. + switch (col->logical_kind) { + case avro::logicaltype_date: kind = static_cast(col->logical_kind); break; + case avro::logicaltype_not_set: [[fallthrough]]; + default: kind = col->kind; break; + } + + switch (kind) { case avro::type_boolean: return type_id::BOOL8; case avro::type_int: return type_id::INT32; case avro::type_long: return type_id::INT64; case avro::type_float: return type_id::FLOAT32; case avro::type_double: return type_id::FLOAT64; - case avro::type_bytes: + case avro::type_bytes: [[fallthrough]]; case avro::type_string: return type_id::STRING; + case avro::type_date: return type_id::TIMESTAMP_DAYS; + case avro::type_timestamp_millis: return type_id::TIMESTAMP_MILLISECONDS; + case avro::type_timestamp_micros: return type_id::TIMESTAMP_MICROSECONDS; + case avro::type_local_timestamp_millis: return type_id::TIMESTAMP_MILLISECONDS; + case avro::type_local_timestamp_micros: return type_id::TIMESTAMP_MICROSECONDS; case avro::type_enum: return (!col->symbols.empty()) ? type_id::STRING : type_id::INT32; + // The avro time-millis and time-micros types are closest to Arrow's + // TIME32 and TIME64. They're single-day units, i.e. they won't exceed + // 23:59:59.9999 (or .999999 for micros). There's no equivalent cudf + // type for this; type_id::DURATION_MILLISECONDS/MICROSECONDS are close, + // but they're not semantically the same. + case avro::type_time_millis: [[fallthrough]]; + case avro::type_time_micros: [[fallthrough]]; + // There's no cudf equivalent for the avro duration type, which is a fixed + // 12 byte value which stores three little-endian unsigned 32-bit integers + // representing months, days, and milliseconds, respectively. + case avro::type_duration: [[fallthrough]]; default: return type_id::EMPTY; } } @@ -141,6 +168,7 @@ class metadata : public file_metadata { break; } } + if (!column_in_array) { auto col_type = to_type_id(&schema[columns[i].schema_data_idx]); CUDF_EXPECTS(col_type != type_id::EMPTY, "Unsupported data type"); @@ -360,7 +388,9 @@ std::vector decode_data(metadata& meta, int skip_field_cnt = 0; for (size_t i = 0; i < meta.schema.size(); i++) { - type_kind_e kind = meta.schema[i].kind; + type_kind_e kind = meta.schema[i].kind; + logicaltype_kind_e logical_kind = meta.schema[i].logical_kind; + if (skip_field_cnt != 0) { // Exclude union and array members from min_row_data_size skip_field_cnt += meta.schema[i].num_children - 1; @@ -382,7 +412,8 @@ std::vector decode_data(metadata& meta, } } if (kind == type_enum && !meta.schema[i].symbols.size()) { kind = type_int; } - schema_desc[i].kind = kind; + schema_desc[i].kind = kind; + schema_desc[i].logical_kind = logical_kind; schema_desc[i].count = (kind == type_enum) ? 0 : static_cast(meta.schema[i].num_children); schema_desc[i].dataptr = nullptr; @@ -413,7 +444,8 @@ std::vector decode_data(metadata& meta, } } - auto block_list = cudf::detail::make_device_uvector_async(meta.block_list, stream); + auto block_list = cudf::detail::make_device_uvector_async( + meta.block_list, stream, rmm::mr::get_current_device_resource()); schema_desc.host_to_device(stream); @@ -543,8 +575,10 @@ table_with_metadata read_avro(std::unique_ptr&& source, } } - d_global_dict = cudf::detail::make_device_uvector_async(h_global_dict, stream); - d_global_dict_data = cudf::detail::make_device_uvector_async(h_global_dict_data, stream); + d_global_dict = cudf::detail::make_device_uvector_async( + h_global_dict, stream, rmm::mr::get_current_device_resource()); + d_global_dict_data = cudf::detail::make_device_uvector_async( + h_global_dict_data, stream, rmm::mr::get_current_device_resource()); stream.synchronize(); } diff --git a/cpp/src/io/comp/uncomp.cpp b/cpp/src/io/comp/uncomp.cpp index 6778ddead28..008c7215cca 100644 --- a/cpp/src/io/comp/uncomp.cpp +++ b/cpp/src/io/comp/uncomp.cpp @@ -509,9 +509,10 @@ size_t decompress_zstd(host_span src, rmm::cuda_stream_view stream) { // Init device span of spans (source) - auto const d_src = cudf::detail::make_device_uvector_async(src, stream); - auto hd_srcs = hostdevice_vector>(1, stream); - hd_srcs[0] = d_src; + auto const d_src = + cudf::detail::make_device_uvector_async(src, stream, rmm::mr::get_current_device_resource()); + auto hd_srcs = hostdevice_vector>(1, stream); + hd_srcs[0] = d_src; hd_srcs.host_to_device(stream); // Init device span of spans (temporary destination) diff --git a/cpp/src/io/csv/csv_gpu.cu b/cpp/src/io/csv/csv_gpu.cu index 4f6f8162246..51e3783bac5 100644 --- a/cpp/src/io/csv/csv_gpu.cu +++ b/cpp/src/io/csv/csv_gpu.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -794,8 +794,8 @@ std::vector detect_column_types( const int block_size = csvparse_block_dim; const int grid_size = (row_starts.size() + block_size - 1) / block_size; - auto d_stats = - detail::make_zeroed_device_uvector_async(num_active_columns, stream); + auto d_stats = detail::make_zeroed_device_uvector_async( + num_active_columns, stream, rmm::mr::get_current_device_resource()); data_type_detection<<>>( options, data, column_flags, row_starts, d_stats); diff --git a/cpp/src/io/csv/durations.hpp b/cpp/src/io/csv/durations.hpp index d42ddf3817c..ac925011c58 100644 --- a/cpp/src/io/csv/durations.hpp +++ b/cpp/src/io/csv/durations.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,10 +28,9 @@ namespace io { namespace detail { namespace csv { -std::unique_ptr pandas_format_durations( - column_view const& durations, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr pandas_format_durations(column_view const& durations, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace csv } // namespace detail diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index 2e38ea7f4ab..9c1ff67d97c 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -523,13 +523,13 @@ void infer_column_types(parse_options const& parse_opts, }); if (num_inferred_columns == 0) { return; } - auto const column_stats = - cudf::io::csv::gpu::detect_column_types(parse_opts.view(), - data, - make_device_uvector_async(column_flags, stream), - row_offsets, - num_inferred_columns, - stream); + auto const column_stats = cudf::io::csv::gpu::detect_column_types( + parse_opts.view(), + data, + make_device_uvector_async(column_flags, stream, rmm::mr::get_current_device_resource()), + row_offsets, + num_inferred_columns, + stream); stream.synchronize(); auto inf_col_idx = 0; @@ -595,14 +595,15 @@ std::vector decode_data(parse_options const& parse_opts, h_valid[i] = out_buffers[i].null_mask(); } - cudf::io::csv::gpu::decode_row_column_data(parse_opts.view(), - data, - make_device_uvector_async(column_flags, stream), - row_offsets, - make_device_uvector_async(column_types, stream), - make_device_uvector_async(h_data, stream), - make_device_uvector_async(h_valid, stream), - stream); + cudf::io::csv::gpu::decode_row_column_data( + parse_opts.view(), + data, + make_device_uvector_async(column_flags, stream, rmm::mr::get_current_device_resource()), + row_offsets, + make_device_uvector_async(column_types, stream, rmm::mr::get_current_device_resource()), + make_device_uvector_async(h_data, stream, rmm::mr::get_current_device_resource()), + make_device_uvector_async(h_valid, stream, rmm::mr::get_current_device_resource()), + stream); return out_buffers; } diff --git a/cpp/src/io/json/experimental/read_json.cpp b/cpp/src/io/json/experimental/read_json.cpp index 70a0b66ebc6..c18b15708ab 100644 --- a/cpp/src/io/json/experimental/read_json.cpp +++ b/cpp/src/io/json/experimental/read_json.cpp @@ -80,7 +80,8 @@ rmm::device_uvector ingest_raw_input(host_span auto const uncomp_data = decompress(compression, buffer); return cudf::detail::make_device_uvector_sync( host_span{reinterpret_cast(uncomp_data.data()), uncomp_data.size()}, - stream); + stream, + rmm::mr::get_current_device_resource()); } } diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 16273b35a11..c937315969c 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -602,8 +602,10 @@ void make_device_json_column(device_span input, col.validity.data()}; } - auto d_ignore_vals = cudf::detail::make_device_uvector_async(ignore_vals, stream); - auto d_columns_data = cudf::detail::make_device_uvector_async(columns_data, stream); + auto d_ignore_vals = cudf::detail::make_device_uvector_async( + ignore_vals, stream, rmm::mr::get_current_device_resource()); + auto d_columns_data = cudf::detail::make_device_uvector_async( + columns_data, stream, rmm::mr::get_current_device_resource()); // 3. scatter string offsets to respective columns, set validity bits thrust::for_each_n( @@ -891,9 +893,11 @@ table_with_metadata device_parse_nested_json(device_span d_input, auto gpu_tree = [&]() { // Parse the JSON and get the token stream - const auto [tokens_gpu, token_indices_gpu] = get_token_stream(d_input, options, stream); + const auto [tokens_gpu, token_indices_gpu] = + get_token_stream(d_input, options, stream, rmm::mr::get_current_device_resource()); // gpu tree generation - return get_tree_representation(tokens_gpu, token_indices_gpu, stream); + return get_tree_representation( + tokens_gpu, token_indices_gpu, stream, rmm::mr::get_current_device_resource()); }(); // IILE used to free memory of token data. #ifdef NJP_DEBUG_PRINT auto h_input = cudf::detail::make_host_vector_async(d_input, stream); @@ -913,8 +917,13 @@ table_with_metadata device_parse_nested_json(device_span d_input, return h_node_categories[0] == NC_LIST and h_node_categories[1] == NC_LIST; }(); - auto [gpu_col_id, gpu_row_offsets] = records_orient_tree_traversal( - d_input, gpu_tree, is_array_of_arrays, options.is_enabled_lines(), stream); + auto [gpu_col_id, gpu_row_offsets] = + records_orient_tree_traversal(d_input, + gpu_tree, + is_array_of_arrays, + options.is_enabled_lines(), + stream, + rmm::mr::get_current_device_resource()); device_json_column root_column(stream, mr); root_column.type = json_col_t::ListColumn; diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu index 8b6c0f9d528..d1711db0484 100644 --- a/cpp/src/io/json/json_gpu.cu +++ b/cpp/src/io/json/json_gpu.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -578,7 +578,7 @@ std::vector detect_data_types( return d_column_infos; } else { return cudf::detail::make_zeroed_device_uvector_async( - num_columns, stream); + num_columns, stream, rmm::mr::get_current_device_resource()); } }(); diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 409786d5f1d..f44b7d1ddcc 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -191,11 +191,10 @@ void get_stack_context(device_span json_in, * @return A tree representation of the input JSON string as vectors of node type, parent index, * level, begin index, and end index in the input JSON string */ -tree_meta_t get_tree_representation( - device_span tokens, - device_span token_indices, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +tree_meta_t get_tree_representation(device_span tokens, + device_span token_indices, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Traverse the tree representation of the JSON input in records orient format and populate @@ -211,13 +210,12 @@ tree_meta_t get_tree_representation( * @return A tuple of the output column indices and the row offsets within each column for each node */ std::tuple, rmm::device_uvector> -records_orient_tree_traversal( - device_span d_input, - tree_meta_t const& d_tree, - bool is_array_of_arrays, - bool is_enabled_lines, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +records_orient_tree_traversal(device_span d_input, + tree_meta_t const& d_tree, + bool is_array_of_arrays, + bool is_enabled_lines, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Searches for and selects nodes at level `row_array_children_level`. For each selected @@ -258,11 +256,10 @@ reduce_to_column_tree(tree_meta_t& tree, * All processing is done in device memory. * */ -table_with_metadata device_parse_nested_json( - device_span input, - cudf::io::json_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +table_with_metadata device_parse_nested_json(device_span input, + cudf::io::json_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Parses the given JSON string and generates table from the given input. @@ -273,11 +270,10 @@ table_with_metadata device_parse_nested_json( * @param mr Optional, resource with which to allocate * @return The data parsed from the given JSON input */ -table_with_metadata host_parse_nested_json( - device_span input, - cudf::io::json_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +table_with_metadata host_parse_nested_json(device_span input, + cudf::io::json_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index fb58b48d68d..77749b42781 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -1169,7 +1169,7 @@ void make_json_column(json_column& root_column, cudf::io::json_reader_options const& options, bool include_quote_char, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr) { // Range of encapsulating function that parses to internal columnar data representation CUDF_FUNC_RANGE(); @@ -1597,9 +1597,11 @@ std::pair, std::vector> json_column_to // Move string_offsets and string_lengths to GPU rmm::device_uvector d_string_offsets = - cudf::detail::make_device_uvector_async(json_col.string_offsets, stream); + cudf::detail::make_device_uvector_async( + json_col.string_offsets, stream, rmm::mr::get_current_device_resource()); rmm::device_uvector d_string_lengths = - cudf::detail::make_device_uvector_async(json_col.string_lengths, stream); + cudf::detail::make_device_uvector_async( + json_col.string_lengths, stream, rmm::mr::get_current_device_resource()); // Prepare iterator that returns (string_offset, string_length)-tuples auto offset_length_it = diff --git a/cpp/src/io/json/reader_impl.cu b/cpp/src/io/json/reader_impl.cu index 6e1089796de..7ae8deb8055 100644 --- a/cpp/src/io/json/reader_impl.cu +++ b/cpp/src/io/json/reader_impl.cu @@ -340,8 +340,8 @@ rmm::device_uvector upload_data_to_device(json_reader_options const& reade "Error finding the record within the specified byte range.\n"); // Upload the raw data that is within the rows of interest - return cudf::detail::make_device_uvector_async(h_data.subspan(start_offset, bytes_to_upload), - stream); + return cudf::detail::make_device_uvector_async( + h_data.subspan(start_offset, bytes_to_upload), stream, rmm::mr::get_current_device_resource()); } std::pair, col_map_ptr_type> get_column_names_and_map( @@ -512,11 +512,14 @@ table_with_metadata convert_data_to_table(parse_options_view const& parse_opts, h_valid[i] = out_buffers[i].null_mask(); } - auto d_dtypes = cudf::detail::make_device_uvector_async(h_dtypes, stream); - auto d_data = cudf::detail::make_device_uvector_async(h_data, stream); - auto d_valid = cudf::detail::make_device_uvector_async(h_valid, stream); - auto d_valid_counts = - cudf::detail::make_zeroed_device_uvector_async(num_columns, stream); + auto d_dtypes = cudf::detail::make_device_uvector_async( + h_dtypes, stream, rmm::mr::get_current_device_resource()); + auto d_data = cudf::detail::make_device_uvector_async( + h_data, stream, rmm::mr::get_current_device_resource()); + auto d_valid = cudf::detail::make_device_uvector_async( + h_valid, stream, rmm::mr::get_current_device_resource()); + auto d_valid_counts = cudf::detail::make_zeroed_device_uvector_async( + num_columns, stream, rmm::mr::get_current_device_resource()); cudf::io::json::gpu::convert_json_to_columns( parse_opts, data, rec_starts, d_dtypes, column_map, d_data, d_valid, d_valid_counts, stream); @@ -530,13 +533,18 @@ table_with_metadata convert_data_to_table(parse_options_view const& parse_opts, auto repl_chars = std::vector{'"', '\\', '\t', '\r', '\b'}; auto repl_offsets = std::vector{0, 1, 2, 3, 4, 5}; - auto target = make_strings_column(cudf::detail::make_device_uvector_async(target_chars, stream), - cudf::detail::make_device_uvector_async(target_offsets, stream), - {}, - 0, - stream); - auto repl = make_strings_column(cudf::detail::make_device_uvector_async(repl_chars, stream), - cudf::detail::make_device_uvector_async(repl_offsets, stream), + auto target = + make_strings_column(cudf::detail::make_device_uvector_async( + target_chars, stream, rmm::mr::get_current_device_resource()), + cudf::detail::make_device_uvector_async( + target_offsets, stream, rmm::mr::get_current_device_resource()), + {}, + 0, + stream); + auto repl = make_strings_column(cudf::detail::make_device_uvector_async( + repl_chars, stream, rmm::mr::get_current_device_resource()), + cudf::detail::make_device_uvector_async( + repl_offsets, stream, rmm::mr::get_current_device_resource()), {}, 0, stream); @@ -617,7 +625,8 @@ table_with_metadata read_json(std::vector>& sources, auto d_data = rmm::device_uvector(0, stream); if (should_load_whole_source(reader_opts)) { - d_data = cudf::detail::make_device_uvector_async(h_data, stream); + d_data = cudf::detail::make_device_uvector_async( + h_data, stream, rmm::mr::get_current_device_resource()); } auto rec_starts = find_record_starts(reader_opts, h_data, d_data, stream); diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu index b4bcb5548de..9e56b20114c 100644 --- a/cpp/src/io/json/write_json.cu +++ b/cpp/src/io/json/write_json.cu @@ -552,14 +552,16 @@ std::unique_ptr make_strings_column_from_host(host_span offsets(host_strings.size() + 1, 0); std::transform_inclusive_scan(host_strings.begin(), host_strings.end(), offsets.begin() + 1, std::plus{}, [](auto& str) { return str.size(); }); - auto d_offsets = cudf::detail::make_device_uvector_sync(offsets, stream); + auto d_offsets = + cudf::detail::make_device_uvector_sync(offsets, stream, rmm::mr::get_current_device_resource()); return cudf::make_strings_column( host_strings.size(), std::move(d_offsets), std::move(d_chars), {}, 0); } diff --git a/cpp/src/io/orc/orc.cpp b/cpp/src/io/orc/orc.cpp index 880990c552f..5445e59297c 100644 --- a/cpp/src/io/orc/orc.cpp +++ b/cpp/src/io/orc/orc.cpp @@ -28,7 +28,7 @@ namespace cudf { namespace io { namespace orc { -uint32_t ProtobufReader::read_field_size(const uint8_t* end) +uint32_t ProtobufReader::read_field_size(uint8_t const* end) { auto const size = get(); CUDF_EXPECTS(size <= static_cast(end - m_cur), "Protobuf parsing out of bounds"); @@ -213,8 +213,7 @@ void ProtobufWriter::put_row_index_entry(int32_t present_blk, TypeKind kind, ColStatsBlob const* stats) { - std::vector positions_data; - ProtobufWriter position_writer(&positions_data); + ProtobufWriter position_writer; auto const positions_size_offset = position_writer.put_uint( encode_field_number(1, ProtofType::FIXEDLEN)); // 1:positions[packed=true] position_writer.put_byte(0xcd); // positions size placeholder @@ -246,19 +245,20 @@ void ProtobufWriter::put_row_index_entry(int32_t present_blk, positions_size += position_writer.put_byte(0); } } + // size of the field 1 - positions_data[positions_size_offset] = static_cast(positions_size); + position_writer.buffer()[positions_size_offset] = static_cast(positions_size); auto const stats_size = (stats == nullptr) ? 0 : varint_size(encode_field_number(2)) + varint_size(stats->size()) + stats->size(); - auto const entry_size = positions_data.size() + stats_size; + auto const entry_size = position_writer.size() + stats_size; // 1:RowIndex.entry put_uint(encode_field_number(1, ProtofType::FIXEDLEN)); put_uint(entry_size); - put_bytes(positions_data); + put_bytes(position_writer.buffer()); if (stats != nullptr) { put_uint(encode_field_number(2)); // 2: statistics @@ -268,7 +268,7 @@ void ProtobufWriter::put_row_index_entry(int32_t present_blk, } } -size_t ProtobufWriter::write(const PostScript& s) +size_t ProtobufWriter::write(PostScript const& s) { ProtobufFieldWriter w(this); w.field_uint(1, s.footerLength); @@ -280,7 +280,7 @@ size_t ProtobufWriter::write(const PostScript& s) return w.value(); } -size_t ProtobufWriter::write(const FileFooter& s) +size_t ProtobufWriter::write(FileFooter const& s) { ProtobufFieldWriter w(this); w.field_uint(1, s.headerLength); @@ -294,7 +294,7 @@ size_t ProtobufWriter::write(const FileFooter& s) return w.value(); } -size_t ProtobufWriter::write(const StripeInformation& s) +size_t ProtobufWriter::write(StripeInformation const& s) { ProtobufFieldWriter w(this); w.field_uint(1, s.offset); @@ -305,7 +305,7 @@ size_t ProtobufWriter::write(const StripeInformation& s) return w.value(); } -size_t ProtobufWriter::write(const SchemaType& s) +size_t ProtobufWriter::write(SchemaType const& s) { ProtobufFieldWriter w(this); w.field_uint(1, s.kind); @@ -317,7 +317,7 @@ size_t ProtobufWriter::write(const SchemaType& s) return w.value(); } -size_t ProtobufWriter::write(const UserMetadataItem& s) +size_t ProtobufWriter::write(UserMetadataItem const& s) { ProtobufFieldWriter w(this); w.field_blob(1, s.name); @@ -325,7 +325,7 @@ size_t ProtobufWriter::write(const UserMetadataItem& s) return w.value(); } -size_t ProtobufWriter::write(const StripeFooter& s) +size_t ProtobufWriter::write(StripeFooter const& s) { ProtobufFieldWriter w(this); w.field_repeated_struct(1, s.streams); @@ -334,7 +334,7 @@ size_t ProtobufWriter::write(const StripeFooter& s) return w.value(); } -size_t ProtobufWriter::write(const Stream& s) +size_t ProtobufWriter::write(Stream const& s) { ProtobufFieldWriter w(this); w.field_uint(1, s.kind); @@ -343,7 +343,7 @@ size_t ProtobufWriter::write(const Stream& s) return w.value(); } -size_t ProtobufWriter::write(const ColumnEncoding& s) +size_t ProtobufWriter::write(ColumnEncoding const& s) { ProtobufFieldWriter w(this); w.field_uint(1, s.kind); @@ -351,14 +351,14 @@ size_t ProtobufWriter::write(const ColumnEncoding& s) return w.value(); } -size_t ProtobufWriter::write(const StripeStatistics& s) +size_t ProtobufWriter::write(StripeStatistics const& s) { ProtobufFieldWriter w(this); w.field_repeated_struct_blob(1, s.colStats); return w.value(); } -size_t ProtobufWriter::write(const Metadata& s) +size_t ProtobufWriter::write(Metadata const& s) { ProtobufFieldWriter w(this); w.field_repeated_struct(1, s.stripeStats); @@ -443,13 +443,13 @@ host_span OrcDecompressor::decompress_blocks(host_spansize(); - const auto max_ps_size = std::min(len, static_cast(256)); + auto const len = source->size(); + auto const max_ps_size = std::min(len, static_cast(256)); // Read uncompressed postscript section (max 255 bytes + 1 byte for length) auto buffer = source->host_read(len - max_ps_size, max_ps_size); - const size_t ps_length = buffer->data()[max_ps_size - 1]; - const uint8_t* ps_data = &buffer->data()[max_ps_size - ps_length - 1]; + size_t const ps_length = buffer->data()[max_ps_size - 1]; + uint8_t const* ps_data = &buffer->data()[max_ps_size - ps_length - 1]; ProtobufReader(ps_data, ps_length).read(ps); CUDF_EXPECTS(ps.footerLength + ps_length < len, "Invalid footer length"); diff --git a/cpp/src/io/orc/orc.hpp b/cpp/src/io/orc/orc.hpp index 44882b71925..21fc04a69ec 100644 --- a/cpp/src/io/orc/orc.hpp +++ b/cpp/src/io/orc/orc.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,6 +38,8 @@ namespace io { namespace orc { static constexpr uint32_t block_header_size = 3; +// Seconds from January 1st, 1970 to January 1st, 2015 +static constexpr int64_t orc_utc_epoch = 1420070400; struct PostScript { uint64_t footerLength = 0; // the length of the footer section in bytes @@ -196,7 +198,7 @@ int constexpr encode_field_number(int field_number) noexcept */ class ProtobufReader { public: - ProtobufReader(const uint8_t* base, size_t len) : m_base(base), m_cur(base), m_end(base + len) {} + ProtobufReader(uint8_t const* base, size_t len) : m_base(base), m_cur(base), m_end(base + len) {} template void read(T& s) @@ -241,40 +243,40 @@ class ProtobufReader { template void function_builder(T& s, size_t maxlen, std::tuple& op); - uint32_t read_field_size(const uint8_t* end); + uint32_t read_field_size(uint8_t const* end); template >* = nullptr> - void read_field(T& value, const uint8_t* end) + void read_field(T& value, uint8_t const* end) { value = get(); } template >* = nullptr> - void read_field(T& value, const uint8_t* end) + void read_field(T& value, uint8_t const* end) { value = static_cast(get()); } template >* = nullptr> - void read_field(T& value, const uint8_t* end) + void read_field(T& value, uint8_t const* end) { auto const size = read_field_size(end); - value.assign(reinterpret_cast(m_cur), size); + value.assign(reinterpret_cast(m_cur), size); m_cur += size; } template >>* = nullptr> - void read_field(T& value, const uint8_t* end) + void read_field(T& value, uint8_t const* end) { auto const size = read_field_size(end); - value.emplace_back(reinterpret_cast(m_cur), size); + value.emplace_back(reinterpret_cast(m_cur), size); m_cur += size; } template > and !std::is_same_v>* = nullptr> - void read_field(T& value, const uint8_t* end) + void read_field(T& value, uint8_t const* end) { auto const size = read_field_size(end); value.emplace_back(); @@ -283,7 +285,7 @@ class ProtobufReader { template >>* = nullptr> - void read_field(T& value, const uint8_t* end) + void read_field(T& value, uint8_t const* end) { typename T::value_type contained_value; read_field(contained_value, end); @@ -291,21 +293,21 @@ class ProtobufReader { } template - auto read_field(T& value, const uint8_t* end) -> decltype(read(value, 0)) + auto read_field(T& value, uint8_t const* end) -> decltype(read(value, 0)) { auto const size = read_field_size(end); read(value, size); } template >* = nullptr> - void read_field(T& value, const uint8_t* end) + void read_field(T& value, uint8_t const* end) { memcpy(&value, m_cur, sizeof(T)); m_cur += sizeof(T); } template - void read_packed_field(T& value, const uint8_t* end) + void read_packed_field(T& value, uint8_t const* end) { auto const len = get(); auto const field_end = std::min(m_cur + len, end); @@ -314,7 +316,7 @@ class ProtobufReader { } template - void read_raw_field(T& value, const uint8_t* end) + void read_raw_field(T& value, uint8_t const* end) { auto const size = read_field_size(end); value.emplace_back(m_cur, m_cur + size); @@ -331,7 +333,7 @@ class ProtobufReader { { } - inline void operator()(ProtobufReader* pbr, const uint8_t* end) + inline void operator()(ProtobufReader* pbr, uint8_t const* end) { pbr->read_field(output_value, end); } @@ -347,7 +349,7 @@ class ProtobufReader { { } - inline void operator()(ProtobufReader* pbr, const uint8_t* end) + inline void operator()(ProtobufReader* pbr, uint8_t const* end) { pbr->read_packed_field(output_value, end); } @@ -363,15 +365,15 @@ class ProtobufReader { { } - inline void operator()(ProtobufReader* pbr, const uint8_t* end) + inline void operator()(ProtobufReader* pbr, uint8_t const* end) { pbr->read_raw_field(output_value, end); } }; - const uint8_t* const m_base; - const uint8_t* m_cur; - const uint8_t* const m_end; + uint8_t const* const m_base; + uint8_t const* m_cur; + uint8_t const* const m_end; public: /** @@ -477,21 +479,25 @@ inline int64_t ProtobufReader::get() */ class ProtobufWriter { public: - ProtobufWriter() { m_buf = nullptr; } - ProtobufWriter(std::vector* output) { m_buf = output; } + ProtobufWriter() = default; + + ProtobufWriter(std::size_t bytes) : m_buff(bytes) {} + uint32_t put_byte(uint8_t v) { - m_buf->push_back(v); + m_buff.push_back(v); return 1; } + template uint32_t put_bytes(host_span values) { static_assert(sizeof(T) == 1); - m_buf->reserve(m_buf->size() + values.size()); - m_buf->insert(m_buf->end(), values.begin(), values.end()); + m_buff.reserve(m_buff.size() + values.size()); + m_buff.insert(m_buff.end(), values.begin(), values.end()); return values.size(); } + uint32_t put_uint(uint64_t v) { int l = 1; @@ -519,6 +525,7 @@ class ProtobufWriter { int64_t s = (v < 0); return put_uint(((v ^ -s) << 1) + s); } + void put_row_index_entry(int32_t present_blk, int32_t present_ofs, int32_t data_blk, @@ -528,20 +535,26 @@ class ProtobufWriter { TypeKind kind, ColStatsBlob const* stats); + std::size_t size() const { return m_buff.size(); } + uint8_t const* data() { return m_buff.data(); } + + std::vector& buffer() { return m_buff; } + std::vector release() { return std::move(m_buff); } + public: - size_t write(const PostScript&); - size_t write(const FileFooter&); - size_t write(const StripeInformation&); - size_t write(const SchemaType&); - size_t write(const UserMetadataItem&); - size_t write(const StripeFooter&); - size_t write(const Stream&); - size_t write(const ColumnEncoding&); - size_t write(const StripeStatistics&); - size_t write(const Metadata&); + size_t write(PostScript const&); + size_t write(FileFooter const&); + size_t write(StripeInformation const&); + size_t write(SchemaType const&); + size_t write(UserMetadataItem const&); + size_t write(StripeFooter const&); + size_t write(Stream const&); + size_t write(ColumnEncoding const&); + size_t write(StripeStatistics const&); + size_t write(Metadata const&); protected: - std::vector* m_buf; + std::vector m_buff; struct ProtobufFieldWriter; }; @@ -613,7 +626,7 @@ struct column_validity_info { * convenience methods for initializing and accessing metadata. */ class metadata { - using OrcStripeInfo = std::pair; + using OrcStripeInfo = std::pair; public: struct stripe_source_mapping { diff --git a/cpp/src/io/orc/orc_field_writer.hpp b/cpp/src/io/orc/orc_field_writer.hpp index 44d87190844..fdba0d81a32 100644 --- a/cpp/src/io/orc/orc_field_writer.hpp +++ b/cpp/src/io/orc/orc_field_writer.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -54,7 +54,7 @@ struct ProtobufWriter::ProtobufFieldWriter { void field_packed_uint(int field, const std::vector& value) { struct_size += p->put_uint(encode_field_number>(field)); - auto lpos = p->m_buf->size(); + auto lpos = p->m_buff.size(); p->put_byte(0); auto sz = std::accumulate(value.begin(), value.end(), 0, [p = this->p](size_t sum, auto val) { return sum + p->put_uint(val); @@ -62,8 +62,8 @@ struct ProtobufWriter::ProtobufFieldWriter { struct_size += sz + 1; for (; sz > 0x7f; sz >>= 7, struct_size++) - p->m_buf->insert(p->m_buf->begin() + (lpos++), static_cast((sz & 0x7f) | 0x80)); - (*(p->m_buf))[lpos] = static_cast(sz); + p->m_buff.insert(p->m_buff.begin() + (lpos++), static_cast((sz & 0x7f) | 0x80)); + (p->m_buff)[lpos] = static_cast(sz); } /** @@ -84,13 +84,13 @@ struct ProtobufWriter::ProtobufFieldWriter { void field_struct(int field, const T& value) { struct_size += p->put_uint(encode_field_number(field, ProtofType::FIXEDLEN)); - auto lpos = p->m_buf->size(); + auto lpos = p->m_buff.size(); p->put_byte(0); auto sz = p->write(value); struct_size += sz + 1; for (; sz > 0x7f; sz >>= 7, struct_size++) - p->m_buf->insert(p->m_buf->begin() + (lpos++), static_cast((sz & 0x7f) | 0x80)); - (*(p->m_buf))[lpos] = static_cast(sz); + p->m_buff.insert(p->m_buff.begin() + (lpos++), static_cast((sz & 0x7f) | 0x80)); + (p->m_buff)[lpos] = static_cast(sz); } /** diff --git a/cpp/src/io/orc/orc_gpu.hpp b/cpp/src/io/orc/orc_gpu.hpp index 43f0565845c..05560a3ca62 100644 --- a/cpp/src/io/orc/orc_gpu.hpp +++ b/cpp/src/io/orc/orc_gpu.hpp @@ -16,7 +16,7 @@ #pragma once -#include "timezone.cuh" +#include #include "orc.hpp" @@ -294,7 +294,7 @@ void DecodeOrcColumnData(ColumnDesc* chunks, uint32_t num_columns, uint32_t num_stripes, size_t first_row, - timezone_table_view tz_table, + table_device_view tz_table, uint32_t num_rowgroups, uint32_t rowidx_stride, size_t level, diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 96eb20e1e66..bcf53159676 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -23,13 +23,13 @@ #include "orc_gpu.hpp" #include "reader_impl.hpp" -#include "timezone.cuh" #include #include #include #include +#include #include #include #include @@ -576,8 +576,8 @@ void scan_null_counts(cudf::detail::hostdevice_2dvector const& prefix_sums_to_update.emplace_back(col_idx, prefix_sums[col_idx]); } } - auto const d_prefix_sums_to_update = - cudf::detail::make_device_uvector_async(prefix_sums_to_update, stream); + auto const d_prefix_sums_to_update = cudf::detail::make_device_uvector_async( + prefix_sums_to_update, stream, rmm::mr::get_current_device_resource()); thrust::for_each(rmm::exec_policy(stream), d_prefix_sums_to_update.begin(), @@ -603,7 +603,7 @@ void scan_null_counts(cudf::detail::hostdevice_2dvector const& void reader::impl::decode_stream_data(cudf::detail::hostdevice_2dvector& chunks, size_t num_dicts, size_t skip_rows, - timezone_table_view tz_table, + table_device_view tz_table, cudf::detail::hostdevice_2dvector& row_groups, size_t row_index_stride, std::vector& out_buffers, @@ -915,11 +915,11 @@ reader::impl::impl(std::vector>&& sources, decimal128_columns = options.get_decimal128_columns(); } -timezone_table reader::impl::compute_timezone_table( +std::unique_ptr
reader::impl::compute_timezone_table( const std::vector& selected_stripes, rmm::cuda_stream_view stream) { - if (selected_stripes.empty()) return {}; + if (selected_stripes.empty()) return std::make_unique(); auto const has_timestamp_column = std::any_of( selected_columns.levels.cbegin(), selected_columns.levels.cend(), [&](auto& col_lvl) { @@ -927,10 +927,10 @@ timezone_table reader::impl::compute_timezone_table( return _metadata.get_col_type(col_meta.id).kind == TypeKind::TIMESTAMP; }); }); - if (not has_timestamp_column) return {}; + if (not has_timestamp_column) return std::make_unique(); - return build_timezone_transition_table(selected_stripes[0].stripe_info[0].second->writerTimezone, - stream); + return cudf::detail::make_timezone_transition_table( + {}, selected_stripes[0].stripe_info[0].second->writerTimezone, stream); } table_with_metadata reader::impl::read(size_type skip_rows, @@ -1038,7 +1038,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, selected_columns.levels[level].size(), [&]() { return cudf::detail::make_zeroed_device_uvector_async( - total_num_stripes, stream); + total_num_stripes, stream, rmm::mr::get_current_device_resource()); }); // Tracker for eventually deallocating compressed and uncompressed data @@ -1238,10 +1238,11 @@ table_with_metadata reader::impl::read(size_type skip_rows, } if (not is_level_data_empty) { + auto const tz_table_dview = table_device_view::create(tz_table->view(), stream); decode_stream_data(chunks, num_dict_entries, skip_rows, - tz_table.view(), + *tz_table_dview, row_groups, _metadata.get_row_index_stride(), out_buffers[level], @@ -1270,7 +1271,8 @@ table_with_metadata reader::impl::read(size_type skip_rows, }); if (buff_data.size()) { - auto const dev_buff_data = cudf::detail::make_device_uvector_async(buff_data, stream); + auto const dev_buff_data = cudf::detail::make_device_uvector_async( + buff_data, stream, rmm::mr::get_current_device_resource()); generate_offsets_for_list(dev_buff_data, stream); } } diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 96492e4c2b2..94b0fdc09d2 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -145,7 +145,7 @@ class reader::impl { void decode_stream_data(cudf::detail::hostdevice_2dvector& chunks, size_t num_dicts, size_t skip_rows, - timezone_table_view tz_table, + table_device_view tz_table, cudf::detail::hostdevice_2dvector& row_groups, size_t row_index_stride, std::vector& out_buffers, @@ -210,7 +210,7 @@ class reader::impl { * * @return Timezone table with timestamp offsets */ - timezone_table compute_timezone_table( + std::unique_ptr
compute_timezone_table( const std::vector& selected_stripes, rmm::cuda_stream_view stream); diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index d0d077d2611..8e698dd9dff 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,9 +43,6 @@ inline __device__ uint8_t is_rlev1(uint8_t encoding_mode) { return encoding_mode inline __device__ uint8_t is_dictionary(uint8_t encoding_mode) { return encoding_mode & 1; } -static __device__ __constant__ int64_t kORCTimeToUTC = - 1420070400; // Seconds from January 1st, 1970 to January 1st, 2015 - struct orc_bytestream_s { const uint8_t* base; uint32_t pos; @@ -101,7 +98,7 @@ struct orc_datadec_state_s { uint32_t max_vals; // max # of non-zero values to decode in this batch uint32_t nrows; // # of rows in current batch (up to block_size) uint32_t buffered_count; // number of buffered values in the secondary data stream - int64_t utc_epoch; // kORCTimeToUTC - gmtOffset + duration_s tz_epoch; // orc_ut_epoch - ut_offset RowGroup index; }; @@ -1374,7 +1371,7 @@ template __global__ void __launch_bounds__(block_size) gpuDecodeOrcColumnData(ColumnDesc* chunks, DictionaryEntry* global_dictionary, - timezone_table_view tz_table, + table_device_view tz_table, device_2dspan row_groups, size_t first_row, uint32_t rowidx_stride, @@ -1446,7 +1443,8 @@ __global__ void __launch_bounds__(block_size) } if (!is_dictionary(s->chunk.encoding_kind)) { s->chunk.dictionary_start = 0; } - s->top.data.utc_epoch = kORCTimeToUTC - tz_table.gmt_offset; + static constexpr duration_s d_orc_utc_epoch = duration_s{orc_utc_epoch}; + s->top.data.tz_epoch = d_orc_utc_epoch - get_ut_offset(tz_table, timestamp_s{d_orc_utc_epoch}); bytestream_init(&s->bs, s->chunk.streams[CI_DATA], s->chunk.strm_len[CI_DATA]); bytestream_init(&s->bs2, s->chunk.streams[CI_DATA2], s->chunk.strm_len[CI_DATA2]); @@ -1769,37 +1767,33 @@ __global__ void __launch_bounds__(block_size) break; } case TIMESTAMP: { - int64_t seconds = s->vals.i64[t + vals_skipped] + s->top.data.utc_epoch; - int64_t nanos = secondary_val; - nanos = (nanos >> 3) * kTimestampNanoScale[nanos & 7]; - if (!tz_table.ttimes.empty()) { - seconds += get_gmt_offset(tz_table.ttimes, tz_table.offsets, seconds); - } + auto seconds = s->top.data.tz_epoch + duration_s{s->vals.i64[t + vals_skipped]}; + // Convert to UTC + seconds += get_ut_offset(tz_table, timestamp_s{seconds}); + + duration_ns nanos = duration_ns{(static_cast(secondary_val) >> 3) * + kTimestampNanoScale[secondary_val & 7]}; + // Adjust seconds only for negative timestamps with positive nanoseconds. // Alternative way to represent negative timestamps is with negative nanoseconds // in which case the adjustment in not needed. // Comparing with 999999 instead of zero to match the apache writer. - if (seconds < 0 and nanos > 999999) { seconds -= 1; } - - duration_ns d_ns{nanos}; - duration_s d_s{seconds}; + if (seconds.count() < 0 and nanos.count() > 999999) { seconds -= duration_s{1}; } static_cast(data_out)[row] = [&]() { using cuda::std::chrono::duration_cast; switch (s->chunk.timestamp_type_id) { case type_id::TIMESTAMP_SECONDS: - return d_s.count() + duration_cast(d_ns).count(); + return (seconds + duration_cast(nanos)).count(); case type_id::TIMESTAMP_MILLISECONDS: - return duration_cast(d_s).count() + - duration_cast(d_ns).count(); + return (seconds + duration_cast(nanos)).count(); case type_id::TIMESTAMP_MICROSECONDS: - return duration_cast(d_s).count() + - duration_cast(d_ns).count(); + return (seconds + duration_cast(nanos)).count(); case type_id::TIMESTAMP_NANOSECONDS: default: - return duration_cast(d_s).count() + - d_ns.count(); // nanoseconds as output in case of `type_id::EMPTY` and - // `type_id::TIMESTAMP_NANOSECONDS` + // nanoseconds as output in case of `type_id::EMPTY` and + // `type_id::TIMESTAMP_NANOSECONDS` + return (seconds + nanos).count(); } }(); @@ -1887,7 +1881,7 @@ void __host__ DecodeOrcColumnData(ColumnDesc* chunks, uint32_t num_columns, uint32_t num_stripes, size_t first_row, - timezone_table_view tz_table, + table_device_view tz_table, uint32_t num_rowgroups, uint32_t rowidx_stride, size_t level, diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index 9032e3d2502..427167e2d0f 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,9 +49,6 @@ constexpr int scratch_buffer_size = 512 * 4; // Workaround replaces zero-length patch lists by a dummy zero patch constexpr bool zero_pll_war = true; -static __device__ __constant__ int64_t kORCTimeToUTC = - 1420070400; // Seconds from January 1st, 1970 to January 1st, 2015 - struct byterle_enc_state_s { uint32_t literal_run; uint32_t repeat_run; @@ -814,7 +811,7 @@ __global__ void __launch_bounds__(block_size) int32_t ts_scale = powers_of_ten[9 - min(s->chunk.scale, 9)]; int64_t seconds = ts / ts_scale; int64_t nanos = (ts - seconds * ts_scale); - s->vals.i64[nz_idx] = seconds - kORCTimeToUTC; + s->vals.i64[nz_idx] = seconds - orc_utc_epoch; if (nanos != 0) { // Trailing zeroes are encoded in the lower 3-bits uint32_t zeroes = 0; diff --git a/cpp/src/io/orc/timezone.cuh b/cpp/src/io/orc/timezone.cuh deleted file mode 100644 index 52736d6451a..00000000000 --- a/cpp/src/io/orc/timezone.cuh +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -namespace cudf { -namespace io { - -struct timezone_table_view { - int32_t gmt_offset = 0; - cudf::device_span ttimes; - cudf::device_span offsets; -}; - -// Cycle in which the time offsets repeat -static constexpr int32_t cycle_years = 400; -// Number of seconds in 400 years -static constexpr int64_t cycle_seconds = - cuda::std::chrono::duration_cast(duration_D{365 * cycle_years + (100 - 3)}).count(); -// Two entries per year, over the length of the cycle -static constexpr uint32_t cycle_entry_cnt = 2 * cycle_years; - -/** - * @brief Returns the GMT offset for a given date and given timezone table. - * - * @param ttimes Transition times; trailing `cycle_entry_cnt` entries are used for all times - * beyond the one covered by the TZif file - * @param offsets Time offsets in specific intervals; trailing `cycle_entry_cnt` entries are used - * for all times beyond the one covered by the TZif file - * @param count Number of elements in @p ttimes and @p offsets - * @param ts ORC timestamp - * - * @return GMT offset - */ -CUDF_HOST_DEVICE inline int32_t get_gmt_offset_impl(int64_t const* ttimes, - int32_t const* offsets, - size_t count, - int64_t ts) -{ - // Returns start of the range if all elements are larger than the input timestamp - auto last_less_equal_ttime_idx = [&](long begin_idx, long end_idx, int64_t ts) { - auto const first_larger_ttime = - thrust::upper_bound(thrust::seq, ttimes + begin_idx, ttimes + end_idx, ts); - // Element before the first larger element is the last one less of equal - return std::max(first_larger_ttime - ttimes - 1, begin_idx); - }; - - auto const file_entry_cnt = count - cycle_entry_cnt; - // Search in the file entries if the timestamp is in range - if (ts <= ttimes[file_entry_cnt - 1]) { - return offsets[last_less_equal_ttime_idx(0, file_entry_cnt, ts)]; - } else { - // Search in the 400-year cycle if outside of the file entries range - return offsets[last_less_equal_ttime_idx( - file_entry_cnt, count, (ts + cycle_seconds) % cycle_seconds)]; - } -} - -/** - * @brief Host `get_gmt_offset` interface. - * - * Implemented in `get_gmt_offset_impl`. - */ -inline __host__ int32_t get_gmt_offset(cudf::host_span ttimes, - cudf::host_span offsets, - int64_t ts) -{ - CUDF_EXPECTS(ttimes.size() == offsets.size(), - "transition times and offsets must have the same length"); - return get_gmt_offset_impl(ttimes.begin(), offsets.begin(), ttimes.size(), ts); -} - -/** - * @brief Device `get_gmt_offset` interface. - * - * Implemented in `get_gmt_offset_impl`. - */ -inline __device__ int32_t get_gmt_offset(cudf::device_span ttimes, - cudf::device_span offsets, - int64_t ts) -{ - return get_gmt_offset_impl(ttimes.begin(), offsets.begin(), ttimes.size(), ts); -} - -class timezone_table { - int32_t gmt_offset = 0; - rmm::device_uvector ttimes; - rmm::device_uvector offsets; - - public: - // Safe to use the default stream, device_uvectors will not change after they are created empty - timezone_table() : ttimes{0, cudf::get_default_stream()}, offsets{0, cudf::get_default_stream()} - { - } - timezone_table(int32_t gmt_offset, - rmm::device_uvector&& ttimes, - rmm::device_uvector&& offsets) - : gmt_offset{gmt_offset}, ttimes{std::move(ttimes)}, offsets{std::move(offsets)} - { - } - [[nodiscard]] timezone_table_view view() const { return {gmt_offset, ttimes, offsets}; } -}; - -/** - * @brief Creates a transition table to convert ORC timestamps to UTC. - * - * Uses system's TZif files. Assumes little-endian platform when parsing these files. - * - * @param timezone_name standard timezone name (for example, "US/Pacific") - * @param stream CUDA stream used for device memory operations and kernel launches - * - * @return The transition table for the given timezone - */ -timezone_table build_timezone_transition_table(std::string const& timezone_name, - rmm::cuda_stream_view stream); - -} // namespace io -} // namespace cudf diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 8d85b001817..bd526f4f4eb 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -55,6 +55,7 @@ #include #include #include +#include #include #include @@ -332,6 +333,136 @@ size_type orc_table_view::num_rows() const noexcept return columns.empty() ? 0 : columns.front().size(); } +orc_streams::orc_stream_offsets orc_streams::compute_offsets( + host_span columns, size_t num_rowgroups) const +{ + std::vector strm_offsets(streams.size()); + size_t non_rle_data_size = 0; + size_t rle_data_size = 0; + for (size_t i = 0; i < streams.size(); ++i) { + const auto& stream = streams[i]; + + auto const is_rle_data = [&]() { + // First stream is an index stream, don't check types, etc. + if (!stream.column_index().has_value()) return true; + + auto const& column = columns[stream.column_index().value()]; + // Dictionary encoded string column - dictionary characters or + // directly encoded string - column characters + if (column.orc_kind() == TypeKind::STRING && + ((stream.kind == DICTIONARY_DATA && column.orc_encoding() == DICTIONARY_V2) || + (stream.kind == DATA && column.orc_encoding() == DIRECT_V2))) + return false; + // Decimal data + if (column.orc_kind() == TypeKind::DECIMAL && stream.kind == DATA) return false; + + // Everything else uses RLE + return true; + }(); + // non-RLE and RLE streams are separated in the buffer that stores encoded data + // The computed offsets do not take the streams of the other type into account + if (is_rle_data) { + strm_offsets[i] = rle_data_size; + rle_data_size += (stream.length + 7) & ~7; + } else { + strm_offsets[i] = non_rle_data_size; + non_rle_data_size += stream.length; + } + } + non_rle_data_size = (non_rle_data_size + 7) & ~7; + + return {std::move(strm_offsets), non_rle_data_size, rle_data_size}; +} + +namespace { +struct string_length_functor { + __device__ inline size_type operator()(int const i) const + { + // we translate from 0 -> num_chunks * 2 because each statistic has a min and max + // string and we need to calculate lengths for both. + if (i >= num_chunks * 2) return 0; + + // min strings are even values, max strings are odd values of i + auto const should_copy_min = i % 2 == 0; + // index of the chunk + auto const idx = i / 2; + auto& str_val = should_copy_min ? stripe_stat_chunks[idx].min_value.str_val + : stripe_stat_chunks[idx].max_value.str_val; + auto const str = stripe_stat_merge[idx].stats_dtype == dtype_string; + return str ? str_val.length : 0; + } + + int const num_chunks; + statistics_chunk const* stripe_stat_chunks; + statistics_merge_group const* stripe_stat_merge; +}; + +__global__ void copy_string_data(char* string_pool, + size_type* offsets, + statistics_chunk* chunks, + statistics_merge_group const* groups) +{ + auto const idx = blockIdx.x / 2; + if (groups[idx].stats_dtype == dtype_string) { + // min strings are even values, max strings are odd values of i + auto const should_copy_min = blockIdx.x % 2 == 0; + auto& str_val = should_copy_min ? chunks[idx].min_value.str_val : chunks[idx].max_value.str_val; + auto dst = &string_pool[offsets[blockIdx.x]]; + auto src = str_val.ptr; + + for (int i = threadIdx.x; i < str_val.length; i += blockDim.x) { + dst[i] = src[i]; + } + if (threadIdx.x == 0) { str_val.ptr = dst; } + } +} + +} // namespace + +void persisted_statistics::persist(int num_table_rows, + bool single_write_mode, + intermediate_statistics& intermediate_stats, + rmm::cuda_stream_view stream) +{ + if (not single_write_mode) { + // persist the strings in the chunks into a string pool and update pointers + auto const num_chunks = static_cast(intermediate_stats.stripe_stat_chunks.size()); + // min offset and max offset + 1 for total size + rmm::device_uvector offsets((num_chunks * 2) + 1, stream); + + auto iter = cudf::detail::make_counting_transform_iterator( + 0, + string_length_functor{num_chunks, + intermediate_stats.stripe_stat_chunks.data(), + intermediate_stats.stripe_stat_merge.device_ptr()}); + thrust::exclusive_scan(rmm::exec_policy(stream), iter, iter + offsets.size(), offsets.begin()); + + // pull size back to host + auto const total_string_pool_size = offsets.element(num_chunks * 2, stream); + if (total_string_pool_size > 0) { + rmm::device_uvector string_pool(total_string_pool_size, stream); + + // offsets describes where in the string pool each string goes. Going with the simple + // approach for now, but it is possible something fancier with breaking up each thread into + // copying x bytes instead of a single string is the better method since we are dealing in + // min/max strings they almost certainly will not be uniform length. + copy_string_data<<>>( + string_pool.data(), + offsets.data(), + intermediate_stats.stripe_stat_chunks.data(), + intermediate_stats.stripe_stat_merge.device_ptr()); + string_pools.emplace_back(std::move(string_pool)); + } + } + + stripe_stat_chunks.emplace_back(std::move(intermediate_stats.stripe_stat_chunks)); + stripe_stat_merge.emplace_back(std::move(intermediate_stats.stripe_stat_merge)); + stats_dtypes = std::move(intermediate_stats.stats_dtypes); + col_types = std::move(intermediate_stats.col_types); + num_rows = num_table_rows; +} + +namespace { /** * @brief Gathers stripe information. * @@ -418,7 +549,7 @@ void init_dictionaries(orc_table_view& orc_table, [&](auto& col_idx) { auto& str_column = orc_table.column(col_idx); return cudf::detail::make_zeroed_device_uvector_async( - str_column.size(), stream); + str_column.size(), stream, rmm::mr::get_current_device_resource()); }); // Create views of the temporary buffers in device memory @@ -428,7 +559,8 @@ void init_dictionaries(orc_table_view& orc_table, dict_indices.begin(), dict_indices.end(), std::back_inserter(dict_indices_views), [](auto& di) { return device_span{di}; }); - auto d_dict_indices_views = cudf::detail::make_device_uvector_async(dict_indices_views, stream); + auto d_dict_indices_views = cudf::detail::make_device_uvector_async( + dict_indices_views, stream, rmm::mr::get_current_device_resource()); gpu::InitDictionaryIndices(orc_table.d_columns, *dict, @@ -441,15 +573,27 @@ void init_dictionaries(orc_table_view& orc_table, dict->device_to_host(stream, true); } -void writer::impl::build_dictionaries(orc_table_view& orc_table, - host_span stripe_bounds, - hostdevice_2dvector const& dict, - host_span> dict_index, - host_span dictionary_enabled, - hostdevice_2dvector& stripe_dict) +/** + * @brief Builds up per-stripe dictionaries for string columns. + * + * @param orc_table Non-owning view of a cuDF table w/ ORC-related info + * @param stripe_bounds List of stripe boundaries + * @param dict List of dictionary chunks [rowgroup][column] + * @param dict_index List of dictionary indices + * @param dictionary_enabled Whether dictionary encoding is enabled for a given column + * @param stripe_dict List of stripe dictionaries + * @param enable_dictionary Whether dictionary is enabled + * @param stream CUDA stream used for device memory operations and kernel launches + */ +void build_dictionaries(orc_table_view& orc_table, + host_span stripe_bounds, + hostdevice_2dvector const& dict, + host_span> dict_index, + host_span dictionary_enabled, + hostdevice_2dvector& stripe_dict, + bool enable_dictionary, + rmm::cuda_stream_view stream) { - const auto num_rowgroups = dict.size().first; - for (size_t dict_idx = 0; dict_idx < orc_table.num_string_columns(); ++dict_idx) { auto& str_column = orc_table.string_column(dict_idx); str_column.attach_stripe_dict(stripe_dict.base_host_ptr(), stripe_dict.base_device_ptr()); @@ -470,7 +614,7 @@ void writer::impl::build_dictionaries(orc_table_view& orc_table, sd.leaf_column = dict[0][dict_idx].leaf_column; } - if (enable_dictionary_) { + if (enable_dictionary) { struct string_column_cost { size_t direct = 0; size_t dictionary = 0; @@ -554,9 +698,20 @@ auto comp_block_alignment(CompressionKind compression_kind) return 1u << nvcomp::compress_output_alignment_bits(to_nvcomp_compression_type(compression_kind)); } -orc_streams writer::impl::create_streams(host_span columns, - file_segmentation const& segmentation, - std::map const& decimal_column_sizes) +/** + * @brief Builds up per-column streams. + * + * @param[in,out] columns List of columns + * @param[in] segmentation stripe and rowgroup ranges + * @param[in] decimal_column_sizes Sizes of encoded decimal columns + * @return List of stream descriptors + */ +orc_streams create_streams(host_span columns, + file_segmentation const& segmentation, + std::map const& decimal_column_sizes, + bool enable_dictionary, + CompressionKind compression_kind, + bool single_write_mode) { // 'column 0' row index stream std::vector streams{{ROW_INDEX, 0}}; // TODO: Separate index and data streams? @@ -599,7 +754,7 @@ orc_streams writer::impl::create_streams(host_span columns, auto add_stream = [&](gpu::StreamIndexType index_type, StreamKind kind, TypeKind type_kind, size_t size) { - auto const max_alignment_padding = uncomp_block_alignment(compression_kind_) - 1; + auto const max_alignment_padding = uncomp_block_alignment(compression_kind) - 1; const auto base = column.index() * gpu::CI_NUM_STREAMS; ids[base + index_type] = streams.size(); streams.push_back(orc::Stream{ @@ -636,7 +791,7 @@ orc_streams writer::impl::create_streams(host_span columns, column.set_orc_encoding(DIRECT); break; case TypeKind::STRING: { - bool enable_dict = enable_dictionary_; + bool enable_dict = enable_dictionary; size_t dict_data_size = 0; size_t dict_strings = 0; size_t dict_lengths_div512 = 0; @@ -711,47 +866,6 @@ orc_streams writer::impl::create_streams(host_span columns, return {std::move(streams), std::move(ids), std::move(types)}; } -orc_streams::orc_stream_offsets orc_streams::compute_offsets( - host_span columns, size_t num_rowgroups) const -{ - std::vector strm_offsets(streams.size()); - size_t non_rle_data_size = 0; - size_t rle_data_size = 0; - for (size_t i = 0; i < streams.size(); ++i) { - const auto& stream = streams[i]; - - auto const is_rle_data = [&]() { - // First stream is an index stream, don't check types, etc. - if (!stream.column_index().has_value()) return true; - - auto const& column = columns[stream.column_index().value()]; - // Dictionary encoded string column - dictionary characters or - // directly encoded string - column characters - if (column.orc_kind() == TypeKind::STRING && - ((stream.kind == DICTIONARY_DATA && column.orc_encoding() == DICTIONARY_V2) || - (stream.kind == DATA && column.orc_encoding() == DIRECT_V2))) - return false; - // Decimal data - if (column.orc_kind() == TypeKind::DECIMAL && stream.kind == DATA) return false; - - // Everything else uses RLE - return true; - }(); - // non-RLE and RLE streams are separated in the buffer that stores encoded data - // The computed offsets do not take the streams of the other type into account - if (is_rle_data) { - strm_offsets[i] = rle_data_size; - rle_data_size += (stream.length + 7) & ~7; - } else { - strm_offsets[i] = non_rle_data_size; - non_rle_data_size += stream.length; - } - } - non_rle_data_size = (non_rle_data_size + 7) & ~7; - - return {std::move(strm_offsets), non_rle_data_size, rle_data_size}; -} - std::vector> calculate_aligned_rowgroup_bounds( orc_table_view const& orc_table, file_segmentation const& segmentation, @@ -772,7 +886,8 @@ std::vector> calculate_aligned_rowgroup_bounds( aligned_rgs.count() * sizeof(rowgroup_rows), cudaMemcpyDefault, stream.value())); - auto const d_stripes = cudf::detail::make_device_uvector_async(segmentation.stripes, stream); + auto const d_stripes = cudf::detail::make_device_uvector_async( + segmentation.stripes, stream, rmm::mr::get_current_device_resource()); // One thread per column, per stripe thrust::for_each_n( @@ -1091,11 +1206,23 @@ encoded_data encode_columns(orc_table_view const& orc_table, return {std::move(encoded_data), std::move(chunk_streams)}; } -std::vector writer::impl::gather_stripes( +/** + * @brief Returns stripe information after compacting columns' individual data + * chunks into contiguous data streams. + * + * @param[in] num_index_streams Total number of index streams + * @param[in] segmentation stripe and rowgroup ranges + * @param[in,out] enc_streams List of encoder chunk streams [column][rowgroup] + * @param[in,out] strm_desc List of stream descriptors [stripe][data_stream] + * @param[in] stream CUDA stream used for device memory operations and kernel launches + * @return The stripes' information + */ +std::vector gather_stripes( size_t num_index_streams, file_segmentation const& segmentation, hostdevice_2dvector* enc_streams, - hostdevice_2dvector* strm_desc) + hostdevice_2dvector* strm_desc, + rmm::cuda_stream_view stream) { if (segmentation.num_stripes() == 0) { return {}; } std::vector stripes(segmentation.num_stripes()); @@ -1163,16 +1290,25 @@ hostdevice_vector allocate_and_encode_blobs( return blobs; } -writer::impl::intermediate_statistics writer::impl::gather_statistic_blobs( - statistics_freq const stats_freq, - orc_table_view const& orc_table, - file_segmentation const& segmentation) +/** + * @brief Returns column statistics in an intermediate format. + * + * @param statistics_freq Frequency of statistics to be included in the output file + * @param orc_table Table information to be written + * @param segmentation stripe and rowgroup ranges + * @param stream CUDA stream used for device memory operations and kernel launches + * @return The statistic information + */ +intermediate_statistics gather_statistic_blobs(statistics_freq const stats_freq, + orc_table_view const& orc_table, + file_segmentation const& segmentation, + rmm::cuda_stream_view stream) { auto const num_rowgroup_blobs = segmentation.rowgroups.count(); auto const num_stripe_blobs = segmentation.num_stripes() * orc_table.num_columns(); auto const are_statistics_enabled = stats_freq != statistics_freq::STATISTICS_NONE; if (not are_statistics_enabled or num_rowgroup_blobs + num_stripe_blobs == 0) { - return writer::impl::intermediate_statistics{stream}; + return intermediate_statistics{stream}; } hostdevice_vector stat_desc(orc_table.num_columns(), stream); @@ -1290,8 +1426,17 @@ writer::impl::intermediate_statistics writer::impl::gather_statistic_blobs( std::move(col_types)}; } -writer::impl::encoded_footer_statistics writer::impl::finish_statistic_blobs( - int num_stripes, writer::impl::persisted_statistics& per_chunk_stats) +/** + * @brief Returns column statistics encoded in ORC protobuf format stored in the footer. + * + * @param num_stripes number of stripes in the data + * @param incoming_stats intermediate statistics returned from `gather_statistic_blobs` + * @param stream CUDA stream used for device memory operations and kernel launches + * @return The encoded statistic blobs + */ +encoded_footer_statistics finish_statistic_blobs(int num_stripes, + persisted_statistics& per_chunk_stats, + rmm::cuda_stream_view stream) { auto stripe_size_iter = thrust::make_transform_iterator(per_chunk_stats.stripe_stat_merge.begin(), [](auto const& i) { return i.size(); }); @@ -1381,17 +1526,36 @@ writer::impl::encoded_footer_statistics writer::impl::finish_statistic_blobs( return {std::move(stripe_blobs), std::move(file_blobs)}; } -void writer::impl::write_index_stream(int32_t stripe_id, - int32_t stream_id, - host_span columns, - file_segmentation const& segmentation, - host_2dspan enc_streams, - host_2dspan strm_desc, - host_span comp_res, - std::vector const& rg_stats, - StripeInformation* stripe, - orc_streams* streams, - ProtobufWriter* pbw) +/** + * @brief Writes the specified column's row index stream. + * + * @param[in] stripe_id Stripe's identifier + * @param[in] stream_id Stream identifier (column id + 1) + * @param[in] columns List of columns + * @param[in] segmentation stripe and rowgroup ranges + * @param[in] enc_streams List of encoder chunk streams [column][rowgroup] + * @param[in] strm_desc List of stream descriptors + * @param[in] comp_res Output status for compressed streams + * @param[in] rg_stats row group level statistics + * @param[in,out] stripe Stream's parent stripe + * @param[in,out] streams List of all streams + * @param[in] compression_kind The compression kind + * @param[in] compression_blocksize The block size used for compression + * @param[in] out_sink Sink for writing data + */ +void write_index_stream(int32_t stripe_id, + int32_t stream_id, + host_span columns, + file_segmentation const& segmentation, + host_2dspan enc_streams, + host_2dspan strm_desc, + host_span comp_res, + std::vector const& rg_stats, + StripeInformation* stripe, + orc_streams* streams, + CompressionKind compression_kind, + size_t compression_blocksize, + std::unique_ptr const& out_sink) { row_group_index_info present; row_group_index_info data; @@ -1403,7 +1567,7 @@ void writer::impl::write_index_stream(int32_t stripe_id, row_group_index_info record; if (stream.ids[type] > 0) { record.pos = 0; - if (compression_kind_ != NONE) { + if (compression_kind != NONE) { auto const& ss = strm_desc[stripe_id][stream.ids[type] - (columns.size() + 1)]; record.blk_pos = ss.first_block; record.comp_pos = 0; @@ -1418,10 +1582,10 @@ void writer::impl::write_index_stream(int32_t stripe_id, if (record.pos >= 0) { record.pos += stream.lengths[type]; while ((record.pos >= 0) && (record.blk_pos >= 0) && - (static_cast(record.pos) >= compression_blocksize_) && + (static_cast(record.pos) >= compression_blocksize) && (record.comp_pos + block_header_size + comp_res[record.blk_pos].bytes_written < static_cast(record.comp_size))) { - record.pos -= compression_blocksize_; + record.pos -= compression_blocksize; record.comp_pos += block_header_size + comp_res[record.blk_pos].bytes_written; record.blk_pos += 1; } @@ -1443,21 +1607,21 @@ void writer::impl::write_index_stream(int32_t stripe_id, } } - buffer_.resize((compression_kind_ != NONE) ? 3 : 0); + ProtobufWriter pbw((compression_kind != NONE) ? 3 : 0); // Add row index entries auto const& rowgroups_range = segmentation.stripes[stripe_id]; std::for_each(rowgroups_range.cbegin(), rowgroups_range.cend(), [&](auto rowgroup) { - pbw->put_row_index_entry(present.comp_pos, - present.pos, - data.comp_pos, - data.pos, - data2.comp_pos, - data2.pos, - kind, - (rg_stats.empty() or stream_id == 0) - ? nullptr - : (&rg_stats[column_id * segmentation.num_rowgroups() + rowgroup])); + pbw.put_row_index_entry(present.comp_pos, + present.pos, + data.comp_pos, + data.pos, + data2.comp_pos, + data2.pos, + kind, + (rg_stats.empty() or stream_id == 0) + ? nullptr + : (&rg_stats[column_id * segmentation.num_rowgroups() + rowgroup])); if (stream_id != 0) { const auto& strm = enc_streams[column_id][rowgroup]; @@ -1467,23 +1631,40 @@ void writer::impl::write_index_stream(int32_t stripe_id, } }); - (*streams)[stream_id].length = buffer_.size(); - if (compression_kind_ != NONE) { + (*streams)[stream_id].length = pbw.size(); + if (compression_kind != NONE) { uint32_t uncomp_ix_len = (uint32_t)((*streams)[stream_id].length - 3) * 2 + 1; - buffer_[0] = static_cast(uncomp_ix_len >> 0); - buffer_[1] = static_cast(uncomp_ix_len >> 8); - buffer_[2] = static_cast(uncomp_ix_len >> 16); + pbw.buffer()[0] = static_cast(uncomp_ix_len >> 0); + pbw.buffer()[1] = static_cast(uncomp_ix_len >> 8); + pbw.buffer()[2] = static_cast(uncomp_ix_len >> 16); } - out_sink_->host_write(buffer_.data(), buffer_.size()); - stripe->indexLength += buffer_.size(); + out_sink->host_write(pbw.data(), pbw.size()); + stripe->indexLength += pbw.size(); } -std::future writer::impl::write_data_stream(gpu::StripeStream const& strm_desc, - gpu::encoder_chunk_streams const& enc_stream, - uint8_t const* compressed_data, - uint8_t* stream_out, - StripeInformation* stripe, - orc_streams* streams) +/** + * @brief Write the specified column's data streams + * + * @param[in] strm_desc Stream's descriptor + * @param[in] enc_stream Chunk's streams + * @param[in] compressed_data Compressed stream data + * @param[in,out] stream_out Temporary host output buffer + * @param[in,out] stripe Stream's parent stripe + * @param[in,out] streams List of all streams + * @param[in] compression_kind The compression kind + * @param[in] out_sink Sink for writing data + * @param[in] stream CUDA stream used for device memory operations and kernel launches + * @return An std::future that should be synchronized to ensure the writing is complete + */ +std::future write_data_stream(gpu::StripeStream const& strm_desc, + gpu::encoder_chunk_streams const& enc_stream, + uint8_t const* compressed_data, + uint8_t* stream_out, + StripeInformation* stripe, + orc_streams* streams, + CompressionKind compression_kind, + std::unique_ptr const& out_sink, + rmm::cuda_stream_view stream) { const auto length = strm_desc.stream_size; (*streams)[enc_stream.ids[strm_desc.stream_type]].length = length; @@ -1491,18 +1672,18 @@ std::future writer::impl::write_data_stream(gpu::StripeStream const& strm_ return std::async(std::launch::deferred, [] {}); } - const auto* stream_in = (compression_kind_ == NONE) ? enc_stream.data_ptrs[strm_desc.stream_type] - : (compressed_data + strm_desc.bfr_offset); + const auto* stream_in = (compression_kind == NONE) ? enc_stream.data_ptrs[strm_desc.stream_type] + : (compressed_data + strm_desc.bfr_offset); auto write_task = [&]() { - if (out_sink_->is_device_write_preferred(length)) { - return out_sink_->device_write_async(stream_in, length, stream); + if (out_sink->is_device_write_preferred(length)) { + return out_sink->device_write_async(stream_in, length, stream); } else { CUDF_CUDA_TRY( cudaMemcpyAsync(stream_out, stream_in, length, cudaMemcpyDefault, stream.value())); stream.synchronize(); - out_sink_->host_write(stream_out, length); + out_sink->host_write(stream_out, length); return std::async(std::launch::deferred, [] {}); } }(); @@ -1510,18 +1691,27 @@ std::future writer::impl::write_data_stream(gpu::StripeStream const& strm_ return write_task; } -void writer::impl::add_uncompressed_block_headers(std::vector& v) +/** + * @brief Insert 3-byte uncompressed block headers in a byte vector + * + * @param compression_kind The compression kind + * @param compression_blocksize The block size used for compression + * @param v The destitation byte vector to write, which must include initial 3-byte header + */ +void add_uncompressed_block_headers(CompressionKind compression_kind, + size_t compression_blocksize, + std::vector& v) { - if (compression_kind_ != NONE) { + if (compression_kind != NONE) { size_t uncomp_len = v.size() - 3, pos = 0, block_len; - while (uncomp_len > compression_blocksize_) { - block_len = compression_blocksize_ * 2 + 1; + while (uncomp_len > compression_blocksize) { + block_len = compression_blocksize * 2 + 1; v[pos + 0] = static_cast(block_len >> 0); v[pos + 1] = static_cast(block_len >> 8); v[pos + 2] = static_cast(block_len >> 16); - pos += 3 + compression_blocksize_; + pos += 3 + compression_blocksize; v.insert(v.begin() + pos, 3, 0); - uncomp_len -= compression_blocksize_; + uncomp_len -= compression_blocksize; } block_len = uncomp_len * 2 + 1; v[pos + 0] = static_cast(block_len >> 0); @@ -1530,58 +1720,6 @@ void writer::impl::add_uncompressed_block_headers(std::vector& v) } } -writer::impl::impl(std::unique_ptr sink, - orc_writer_options const& options, - SingleWriteMode mode, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - : _mr(mr), - stream(stream), - max_stripe_size{options.get_stripe_size_bytes(), options.get_stripe_size_rows()}, - row_index_stride{options.get_row_index_stride()}, - compression_kind_(to_orc_compression(options.get_compression())), - compression_blocksize_(compression_block_size(compression_kind_)), - stats_freq_(options.get_statistics_freq()), - single_write_mode(mode == SingleWriteMode::YES), - kv_meta(options.get_key_value_metadata()), - out_sink_(std::move(sink)) -{ - if (options.get_metadata()) { - table_meta = std::make_unique(*options.get_metadata()); - } - init_state(); -} - -writer::impl::impl(std::unique_ptr sink, - chunked_orc_writer_options const& options, - SingleWriteMode mode, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - : _mr(mr), - stream(stream), - max_stripe_size{options.get_stripe_size_bytes(), options.get_stripe_size_rows()}, - row_index_stride{options.get_row_index_stride()}, - compression_kind_(to_orc_compression(options.get_compression())), - compression_blocksize_(compression_block_size(compression_kind_)), - stats_freq_(options.get_statistics_freq()), - single_write_mode(mode == SingleWriteMode::YES), - kv_meta(options.get_key_value_metadata()), - out_sink_(std::move(sink)) -{ - if (options.get_metadata()) { - table_meta = std::make_unique(*options.get_metadata()); - } - init_state(); -} - -writer::impl::~impl() { close(); } - -void writer::impl::init_state() -{ - // Write file header - out_sink_->host_write(MAGIC, std::strlen(MAGIC)); -} - void pushdown_lists_null_mask(orc_column_view const& col, device_span d_columns, bitmask_type const* parent_pd_mask, @@ -1676,7 +1814,8 @@ pushdown_null_masks init_pushdown_null_masks(orc_table_view& orc_table, } // Attach null masks to device column views (async) - auto const d_mask_ptrs = cudf::detail::make_device_uvector_async(mask_ptrs, stream); + auto const d_mask_ptrs = cudf::detail::make_device_uvector_async( + mask_ptrs, stream, rmm::mr::get_current_device_resource()); thrust::for_each_n( rmm::exec_policy(stream), thrust::make_counting_iterator(0ul), @@ -1766,7 +1905,8 @@ orc_table_view make_orc_table_view(table_view const& table, orc_columns.cbegin(), orc_columns.cend(), std::back_inserter(type_kinds), [](auto& orc_column) { return orc_column.orc_kind(); }); - auto const d_type_kinds = cudf::detail::make_device_uvector_async(type_kinds, stream); + auto const d_type_kinds = cudf::detail::make_device_uvector_async( + type_kinds, stream, rmm::mr::get_current_device_resource()); rmm::device_uvector d_orc_columns(orc_columns.size(), stream); using stack_value_type = thrust::pair>; @@ -1816,7 +1956,8 @@ orc_table_view make_orc_table_view(table_view const& table, return {std::move(orc_columns), std::move(d_orc_columns), str_col_indexes, - cudf::detail::make_device_uvector_sync(str_col_indexes, stream)}; + cudf::detail::make_device_uvector_sync( + str_col_indexes, stream, rmm::mr::get_current_device_resource())}; } hostdevice_2dvector calculate_rowgroup_bounds(orc_table_view const& orc_table, @@ -1984,7 +2125,7 @@ string_dictionaries allocate_dictionaries(orc_table_view const& orc_table, std::back_inserter(data), [&](auto& idx) { return cudf::detail::make_zeroed_device_uvector_async( - orc_table.columns[idx].size(), stream); + orc_table.columns[idx].size(), stream, rmm::mr::get_current_device_resource()); }); std::vector> index; std::transform(orc_table.string_column_indices.begin(), @@ -1992,7 +2133,7 @@ string_dictionaries allocate_dictionaries(orc_table_view const& orc_table, std::back_inserter(index), [&](auto& idx) { return cudf::detail::make_zeroed_device_uvector_async( - orc_table.columns[idx].size(), stream); + orc_table.columns[idx].size(), stream, rmm::mr::get_current_device_resource()); }); stream.synchronize(); @@ -2007,53 +2148,13 @@ string_dictionaries allocate_dictionaries(orc_table_view const& orc_table, return {std::move(data), std::move(index), - cudf::detail::make_device_uvector_sync(data_ptrs, stream), - cudf::detail::make_device_uvector_sync(index_ptrs, stream), + cudf::detail::make_device_uvector_sync( + data_ptrs, stream, rmm::mr::get_current_device_resource()), + cudf::detail::make_device_uvector_sync( + index_ptrs, stream, rmm::mr::get_current_device_resource()), std::move(is_dict_enabled)}; } -struct string_length_functor { - __device__ inline size_type operator()(int const i) const - { - // we translate from 0 -> num_chunks * 2 because each statistic has a min and max - // string and we need to calculate lengths for both. - if (i >= num_chunks * 2) return 0; - - // min strings are even values, max strings are odd values of i - auto const should_copy_min = i % 2 == 0; - // index of the chunk - auto const idx = i / 2; - auto& str_val = should_copy_min ? stripe_stat_chunks[idx].min_value.str_val - : stripe_stat_chunks[idx].max_value.str_val; - auto const str = stripe_stat_merge[idx].stats_dtype == dtype_string; - return str ? str_val.length : 0; - } - - int const num_chunks; - statistics_chunk const* stripe_stat_chunks; - statistics_merge_group const* stripe_stat_merge; -}; - -__global__ void copy_string_data(char* string_pool, - size_type* offsets, - statistics_chunk* chunks, - statistics_merge_group const* groups) -{ - auto const idx = blockIdx.x / 2; - if (groups[idx].stats_dtype == dtype_string) { - // min strings are even values, max strings are odd values of i - auto const should_copy_min = blockIdx.x % 2 == 0; - auto& str_val = should_copy_min ? chunks[idx].min_value.str_val : chunks[idx].max_value.str_val; - auto dst = &string_pool[offsets[blockIdx.x]]; - auto src = str_val.ptr; - - for (int i = threadIdx.x; i < str_val.length; i += blockDim.x) { - dst[i] = src[i]; - } - if (threadIdx.x == 0) { str_val.ptr = dst; } - } -} - size_t max_compression_output_size(CompressionKind compression_kind, uint32_t compression_blocksize) { if (compression_kind == NONE) return 0; @@ -2062,60 +2163,14 @@ size_t max_compression_output_size(CompressionKind compression_kind, uint32_t co compression_blocksize); } -void writer::impl::persisted_statistics::persist(int num_table_rows, - bool single_write_mode, - intermediate_statistics& intermediate_stats, - rmm::cuda_stream_view stream) -{ - if (not single_write_mode) { - // persist the strings in the chunks into a string pool and update pointers - auto const num_chunks = static_cast(intermediate_stats.stripe_stat_chunks.size()); - // min offset and max offset + 1 for total size - rmm::device_uvector offsets((num_chunks * 2) + 1, stream); - - auto iter = cudf::detail::make_counting_transform_iterator( - 0, - string_length_functor{num_chunks, - intermediate_stats.stripe_stat_chunks.data(), - intermediate_stats.stripe_stat_merge.device_ptr()}); - thrust::exclusive_scan(rmm::exec_policy(stream), iter, iter + offsets.size(), offsets.begin()); - - // pull size back to host - auto const total_string_pool_size = offsets.element(num_chunks * 2, stream); - if (total_string_pool_size > 0) { - rmm::device_uvector string_pool(total_string_pool_size, stream); - - // offsets describes where in the string pool each string goes. Going with the simple - // approach for now, but it is possible something fancier with breaking up each thread into - // copying x bytes instead of a single string is the better method since we are dealing in - // min/max strings they almost certainly will not be uniform length. - copy_string_data<<>>( - string_pool.data(), - offsets.data(), - intermediate_stats.stripe_stat_chunks.data(), - intermediate_stats.stripe_stat_merge.device_ptr()); - string_pools.emplace_back(std::move(string_pool)); - } - } - - stripe_stat_chunks.emplace_back(std::move(intermediate_stats.stripe_stat_chunks)); - stripe_stat_merge.emplace_back(std::move(intermediate_stats.stripe_stat_merge)); - stats_dtypes = std::move(intermediate_stats.stats_dtypes); - col_types = std::move(intermediate_stats.col_types); - num_rows = num_table_rows; -} - -void writer::impl::write(table_view const& table) +std::unique_ptr make_table_meta(table_view const& input) { - CUDF_EXPECTS(not closed, "Data has already been flushed to out and closed"); - auto const num_rows = table.num_rows(); - - if (not table_meta) { table_meta = std::make_unique(table); } + auto table_meta = std::make_unique(input); // Fill unnamed columns' names in table_meta std::function add_default_name = [&](column_in_metadata& col_meta, std::string default_name) { - if (col_meta.get_name().empty()) col_meta.set_name(default_name); + if (col_meta.get_name().empty()) { col_meta.set_name(default_name); } for (size_type i = 0; i < col_meta.num_children(); ++i) { add_default_name(col_meta.child(i), std::to_string(i)); } @@ -2124,11 +2179,55 @@ void writer::impl::write(table_view const& table) add_default_name(table_meta->column_metadata[i], "_col" + std::to_string(i)); } - auto const d_table = table_device_view::create(table, stream); + return table_meta; +} - auto orc_table = make_orc_table_view(table, *d_table, *table_meta, stream); +/** + * @brief Perform the processing steps needed to convert the input table into the output ORC data + * for writing, such as compression and ORC encoding. + * + * @param input The input table + * @param table_meta The table metadata + * @param max_stripe_size Maximum size of stripes in the output file + * @param row_index_stride The row index stride + * @param enable_dictionary Whether dictionary is enabled + * @param compression_kind The compression kind + * @param compression_blocksize The block size used for compression + * @param stats_freq Column statistics granularity type for parquet/orc writers + * @param single_write_mode Flag to indicate if there is only a single table write + * @param out_sink Sink for writing data + * @param stream CUDA stream used for device memory operations and kernel launches + * @return A tuple of the intermediate results containing the processed data + */ +std::tuple, + hostdevice_2dvector, + encoded_data, + file_segmentation, + hostdevice_2dvector, + std::vector, + orc_table_view, + rmm::device_buffer, + intermediate_statistics, + pinned_buffer> +convert_table_to_orc_data(table_view const& input, + table_input_metadata const& table_meta, + stripe_size_limits max_stripe_size, + size_type row_index_stride, + bool enable_dictionary, + CompressionKind compression_kind, + size_t compression_blocksize, + statistics_freq stats_freq, + bool single_write_mode, + data_sink const& out_sink, + rmm::cuda_stream_view stream) +{ + auto const input_tview = table_device_view::create(input, stream); - auto const pd_masks = init_pushdown_null_masks(orc_table, stream); + auto orc_table = make_orc_table_view(input, *input_tview, table_meta, stream); + + // This is unused but it holds memory buffers for later access thus needs to be kept alive. + [[maybe_unused]] auto const pd_masks = init_pushdown_null_masks(orc_table, stream); auto rowgroup_bounds = calculate_rowgroup_bounds(orc_table, row_index_stride, stream); @@ -2146,7 +2245,7 @@ void writer::impl::write(table_view const& table) } // Decide stripe boundaries based on rowgroups and dict chunks - auto const segmentation = + auto segmentation = calculate_segmentation(orc_table.columns, std::move(rowgroup_bounds), max_stripe_size); // Build stripe-level dictionaries @@ -2158,15 +2257,22 @@ void writer::impl::write(table_view const& table) dict, dictionaries.index, dictionaries.dictionary_enabled, - stripe_dict); + stripe_dict, + enable_dictionary, + stream); } auto dec_chunk_sizes = decimal_chunk_sizes(orc_table, segmentation, stream); - auto const uncompressed_block_align = uncomp_block_alignment(compression_kind_); - auto const compressed_block_align = comp_block_alignment(compression_kind_); - auto streams = - create_streams(orc_table.columns, segmentation, decimal_column_sizes(dec_chunk_sizes.rg_sizes)); + auto const uncompressed_block_align = uncomp_block_alignment(compression_kind); + auto const compressed_block_align = comp_block_alignment(compression_kind); + + auto streams = create_streams(orc_table.columns, + segmentation, + decimal_column_sizes(dec_chunk_sizes.rg_sizes), + enable_dictionary, + compression_kind, + single_write_mode); auto enc_data = encode_columns(orc_table, std::move(dictionaries), std::move(dec_chunk_sizes), @@ -2175,155 +2281,317 @@ void writer::impl::write(table_view const& table) uncompressed_block_align, stream); + auto const num_rows = input.num_rows(); + // Assemble individual disparate column chunks into contiguous data streams size_type const num_index_streams = (orc_table.num_columns() + 1); const auto num_data_streams = streams.size() - num_index_streams; hostdevice_2dvector strm_descs( segmentation.num_stripes(), num_data_streams, stream); - auto stripes = gather_stripes(num_index_streams, segmentation, &enc_data.streams, &strm_descs); - - if (num_rows > 0) { - // Allocate intermediate output stream buffer - size_t compressed_bfr_size = 0; - size_t num_compressed_blocks = 0; - - auto const max_compressed_block_size = - max_compression_output_size(compression_kind_, compression_blocksize_); - auto const padded_max_compressed_block_size = - util::round_up_unsafe(max_compressed_block_size, compressed_block_align); - auto const padded_block_header_size = - util::round_up_unsafe(block_header_size, compressed_block_align); - - auto stream_output = [&]() { - size_t max_stream_size = 0; - bool all_device_write = true; - - for (auto& ss : strm_descs.host_view().flat_view()) { - if (!out_sink_->is_device_write_preferred(ss.stream_size)) { all_device_write = false; } - size_t stream_size = ss.stream_size; - if (compression_kind_ != NONE) { - ss.first_block = num_compressed_blocks; - ss.bfr_offset = compressed_bfr_size; - - auto num_blocks = std::max( - (stream_size + compression_blocksize_ - 1) / compression_blocksize_, 1); - stream_size += num_blocks * block_header_size; - num_compressed_blocks += num_blocks; - compressed_bfr_size += - (padded_block_header_size + padded_max_compressed_block_size) * num_blocks; - } - max_stream_size = std::max(max_stream_size, stream_size); - } + auto stripes = + gather_stripes(num_index_streams, segmentation, &enc_data.streams, &strm_descs, stream); + + if (num_rows == 0) { + return {std::move(streams), + hostdevice_vector{}, // comp_results + std::move(strm_descs), + std::move(enc_data), + std::move(segmentation), + std::move(stripe_dict), + std::move(stripes), + std::move(orc_table), + rmm::device_buffer{}, // compressed_data + intermediate_statistics{stream}, + pinned_buffer{nullptr, cudaFreeHost}}; + } - if (all_device_write) { - return pinned_buffer{nullptr, cudaFreeHost}; - } else { - return pinned_buffer{[](size_t size) { - uint8_t* ptr = nullptr; - CUDF_CUDA_TRY(cudaMallocHost(&ptr, size)); - return ptr; - }(max_stream_size), - cudaFreeHost}; + // Allocate intermediate output stream buffer + size_t compressed_bfr_size = 0; + size_t num_compressed_blocks = 0; + + auto const max_compressed_block_size = + max_compression_output_size(compression_kind, compression_blocksize); + auto const padded_max_compressed_block_size = + util::round_up_unsafe(max_compressed_block_size, compressed_block_align); + auto const padded_block_header_size = + util::round_up_unsafe(block_header_size, compressed_block_align); + + auto stream_output = [&]() { + size_t max_stream_size = 0; + bool all_device_write = true; + + for (auto& ss : strm_descs.host_view().flat_view()) { + if (!out_sink.is_device_write_preferred(ss.stream_size)) { all_device_write = false; } + size_t stream_size = ss.stream_size; + if (compression_kind != NONE) { + ss.first_block = num_compressed_blocks; + ss.bfr_offset = compressed_bfr_size; + + auto num_blocks = + std::max((stream_size + compression_blocksize - 1) / compression_blocksize, 1); + stream_size += num_blocks * block_header_size; + num_compressed_blocks += num_blocks; + compressed_bfr_size += + (padded_block_header_size + padded_max_compressed_block_size) * num_blocks; } - }(); + max_stream_size = std::max(max_stream_size, stream_size); + } - // Compress the data streams - rmm::device_buffer compressed_data(compressed_bfr_size, stream); - hostdevice_vector comp_results(num_compressed_blocks, stream); - thrust::fill(rmm::exec_policy(stream), - comp_results.d_begin(), - comp_results.d_end(), - compression_result{0, compression_status::FAILURE}); - if (compression_kind_ != NONE) { - strm_descs.host_to_device(stream); - gpu::CompressOrcDataStreams(static_cast(compressed_data.data()), - num_compressed_blocks, - compression_kind_, - compression_blocksize_, - max_compressed_block_size, - compressed_block_align, - strm_descs, - enc_data.streams, - comp_results, - stream); - - // deallocate encoded data as it is not needed anymore - enc_data.data = rmm::device_uvector{0, stream}; - - strm_descs.device_to_host(stream); - comp_results.device_to_host(stream, true); + if (all_device_write) { + return pinned_buffer{nullptr, cudaFreeHost}; + } else { + return pinned_buffer{[](size_t size) { + uint8_t* ptr = nullptr; + CUDF_CUDA_TRY(cudaMallocHost(&ptr, size)); + return ptr; + }(max_stream_size), + cudaFreeHost}; } + }(); + + // Compress the data streams + rmm::device_buffer compressed_data(compressed_bfr_size, stream); + hostdevice_vector comp_results(num_compressed_blocks, stream); + thrust::fill(rmm::exec_policy(stream), + comp_results.d_begin(), + comp_results.d_end(), + compression_result{0, compression_status::FAILURE}); + if (compression_kind != NONE) { + strm_descs.host_to_device(stream); + gpu::CompressOrcDataStreams(static_cast(compressed_data.data()), + num_compressed_blocks, + compression_kind, + compression_blocksize, + max_compressed_block_size, + compressed_block_align, + strm_descs, + enc_data.streams, + comp_results, + stream); + + // deallocate encoded data as it is not needed anymore + enc_data.data = rmm::device_uvector{0, stream}; + + strm_descs.device_to_host(stream); + comp_results.device_to_host(stream, true); + } + + auto intermediate_stats = gather_statistic_blobs(stats_freq, orc_table, segmentation, stream); + + return {std::move(streams), + std::move(comp_results), + std::move(strm_descs), + std::move(enc_data), + std::move(segmentation), + std::move(stripe_dict), + std::move(stripes), + std::move(orc_table), + std::move(compressed_data), + std::move(intermediate_stats), + std::move(stream_output)}; +} + +} // namespace + +writer::impl::impl(std::unique_ptr sink, + orc_writer_options const& options, + SingleWriteMode mode, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : _mr(mr), + stream(stream), + max_stripe_size{options.get_stripe_size_bytes(), options.get_stripe_size_rows()}, + row_index_stride{options.get_row_index_stride()}, + compression_kind_(to_orc_compression(options.get_compression())), + compression_blocksize_(compression_block_size(compression_kind_)), + stats_freq_(options.get_statistics_freq()), + single_write_mode(mode == SingleWriteMode::YES), + kv_meta(options.get_key_value_metadata()), + out_sink_(std::move(sink)) +{ + if (options.get_metadata()) { + table_meta = std::make_unique(*options.get_metadata()); + } + init_state(); +} + +writer::impl::impl(std::unique_ptr sink, + chunked_orc_writer_options const& options, + SingleWriteMode mode, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : _mr(mr), + stream(stream), + max_stripe_size{options.get_stripe_size_bytes(), options.get_stripe_size_rows()}, + row_index_stride{options.get_row_index_stride()}, + compression_kind_(to_orc_compression(options.get_compression())), + compression_blocksize_(compression_block_size(compression_kind_)), + stats_freq_(options.get_statistics_freq()), + single_write_mode(mode == SingleWriteMode::YES), + kv_meta(options.get_key_value_metadata()), + out_sink_(std::move(sink)) +{ + if (options.get_metadata()) { + table_meta = std::make_unique(*options.get_metadata()); + } + init_state(); +} + +writer::impl::~impl() { close(); } - ProtobufWriter pbw_(&buffer_); +void writer::impl::init_state() +{ + // Write file header + out_sink_->host_write(MAGIC, std::strlen(MAGIC)); +} - auto intermediate_stats = gather_statistic_blobs(stats_freq_, orc_table, segmentation); +void writer::impl::write(table_view const& input) +{ + CUDF_EXPECTS(not closed, "Data has already been flushed to out and closed"); - if (intermediate_stats.stripe_stat_chunks.size() > 0) { - persisted_stripe_statistics.persist( - orc_table.num_rows(), single_write_mode, intermediate_stats, stream); + if (not table_meta) { table_meta = make_table_meta(input); } + + // All kinds of memory allocation and data compressions/encoding are performed here. + // If any error occurs, such as out-of-memory exception, the internal state of the current writer + // is still intact. + // Note that `out_sink_` is intentionally passed by const reference to prevent accidentally + // writing anything to it. + [[maybe_unused]] auto [streams, + comp_results, + strm_descs, + enc_data, + segmentation, + stripe_dict, /* unused, but its data will be accessed via pointer later */ + stripes, + orc_table, + compressed_data, + intermediate_stats, + stream_output] = [&] { + try { + return convert_table_to_orc_data(input, + *table_meta, + max_stripe_size, + row_index_stride, + enable_dictionary_, + compression_kind_, + compression_blocksize_, + stats_freq_, + single_write_mode, + *out_sink_, + stream); + } catch (...) { // catch any exception type + CUDF_LOG_ERROR( + "ORC writer encountered exception during processing. " + "No data has been written to the sink."); + throw; // this throws the same exception } + }(); - // Write stripes - std::vector> write_tasks; - for (size_t stripe_id = 0; stripe_id < stripes.size(); ++stripe_id) { - auto& stripe = stripes[stripe_id]; - - stripe.offset = out_sink_->bytes_written(); - - // Column (skippable) index streams appear at the start of the stripe - for (size_type stream_id = 0; stream_id < num_index_streams; ++stream_id) { - write_index_stream(stripe_id, - stream_id, - orc_table.columns, - segmentation, - enc_data.streams, - strm_descs, - comp_results, - intermediate_stats.rowgroup_blobs, - &stripe, - &streams, - &pbw_); - } + // Compression/encoding were all successful. Now write the intermediate results. + write_orc_data_to_sink(streams, + comp_results, + strm_descs, + enc_data, + segmentation, + stripes, + orc_table, + compressed_data, + intermediate_stats, + stream_output.get()); + + // Update data into the footer. This needs to be called even when num_rows==0. + add_table_to_footer_data(orc_table, stripes); +} - // Column data consisting one or more separate streams - for (auto const& strm_desc : strm_descs[stripe_id]) { - write_tasks.push_back(write_data_stream( - strm_desc, - enc_data.streams[strm_desc.column_id][segmentation.stripes[stripe_id].first], - static_cast(compressed_data.data()), - stream_output.get(), - &stripe, - &streams)); - } +void writer::impl::write_orc_data_to_sink(orc_streams& streams, + hostdevice_vector const& comp_results, + hostdevice_2dvector const& strm_descs, + encoded_data const& enc_data, + file_segmentation const& segmentation, + std::vector& stripes, + orc_table_view const& orc_table, + rmm::device_buffer const& compressed_data, + intermediate_statistics& intermediate_stats, + uint8_t* stream_output) +{ + if (orc_table.num_rows() == 0) { return; } - // Write stripefooter consisting of stream information - StripeFooter sf; - sf.streams = streams; - sf.columns.resize(orc_table.num_columns() + 1); - sf.columns[0].kind = DIRECT; - for (size_t i = 1; i < sf.columns.size(); ++i) { - sf.columns[i].kind = orc_table.column(i - 1).orc_encoding(); - sf.columns[i].dictionarySize = - (sf.columns[i].kind == DICTIONARY_V2) - ? orc_table.column(i - 1).host_stripe_dict(stripe_id)->num_strings - : 0; - if (orc_table.column(i - 1).orc_kind() == TIMESTAMP) { sf.writerTimezone = "UTC"; } - } - buffer_.resize((compression_kind_ != NONE) ? 3 : 0); - pbw_.write(sf); - stripe.footerLength = buffer_.size(); - if (compression_kind_ != NONE) { - uint32_t uncomp_sf_len = (stripe.footerLength - 3) * 2 + 1; - buffer_[0] = static_cast(uncomp_sf_len >> 0); - buffer_[1] = static_cast(uncomp_sf_len >> 8); - buffer_[2] = static_cast(uncomp_sf_len >> 16); - } - out_sink_->host_write(buffer_.data(), buffer_.size()); + if (intermediate_stats.stripe_stat_chunks.size() > 0) { + persisted_stripe_statistics.persist( + orc_table.num_rows(), single_write_mode, intermediate_stats, stream); + } + + // Write stripes + std::vector> write_tasks; + for (size_t stripe_id = 0; stripe_id < stripes.size(); ++stripe_id) { + auto& stripe = stripes[stripe_id]; + + stripe.offset = out_sink_->bytes_written(); + + // Column (skippable) index streams appear at the start of the stripe + size_type const num_index_streams = (orc_table.num_columns() + 1); + for (size_type stream_id = 0; stream_id < num_index_streams; ++stream_id) { + write_index_stream(stripe_id, + stream_id, + orc_table.columns, + segmentation, + enc_data.streams, + strm_descs, + comp_results, + intermediate_stats.rowgroup_blobs, + &stripe, + &streams, + compression_kind_, + compression_blocksize_, + out_sink_); } - for (auto const& task : write_tasks) { - task.wait(); + + // Column data consisting one or more separate streams + for (auto const& strm_desc : strm_descs[stripe_id]) { + write_tasks.push_back(write_data_stream( + strm_desc, + enc_data.streams[strm_desc.column_id][segmentation.stripes[stripe_id].first], + static_cast(compressed_data.data()), + stream_output, + &stripe, + &streams, + compression_kind_, + out_sink_, + stream)); } + + // Write stripefooter consisting of stream information + StripeFooter sf; + sf.streams = streams; + sf.columns.resize(orc_table.num_columns() + 1); + sf.columns[0].kind = DIRECT; + for (size_t i = 1; i < sf.columns.size(); ++i) { + sf.columns[i].kind = orc_table.column(i - 1).orc_encoding(); + sf.columns[i].dictionarySize = + (sf.columns[i].kind == DICTIONARY_V2) + ? orc_table.column(i - 1).host_stripe_dict(stripe_id)->num_strings + : 0; + if (orc_table.column(i - 1).orc_kind() == TIMESTAMP) { sf.writerTimezone = "UTC"; } + } + ProtobufWriter pbw((compression_kind_ != NONE) ? 3 : 0); + pbw.write(sf); + stripe.footerLength = pbw.size(); + if (compression_kind_ != NONE) { + uint32_t uncomp_sf_len = (stripe.footerLength - 3) * 2 + 1; + pbw.buffer()[0] = static_cast(uncomp_sf_len >> 0); + pbw.buffer()[1] = static_cast(uncomp_sf_len >> 8); + pbw.buffer()[2] = static_cast(uncomp_sf_len >> 16); + } + out_sink_->host_write(pbw.data(), pbw.size()); + } + for (auto const& task : write_tasks) { + task.wait(); } +} + +void writer::impl::add_table_to_footer_data(orc_table_view const& orc_table, + std::vector& stripes) +{ if (ff.headerLength == 0) { // First call ff.headerLength = std::strlen(MAGIC); @@ -2369,26 +2637,26 @@ void writer::impl::write(table_view const& table) ff.stripes.insert(ff.stripes.end(), std::make_move_iterator(stripes.begin()), std::make_move_iterator(stripes.end())); - ff.numberOfRows += num_rows; + ff.numberOfRows += orc_table.num_rows(); } void writer::impl::close() { if (closed) { return; } closed = true; - ProtobufWriter pbw_(&buffer_); PostScript ps; - auto const statistics = finish_statistic_blobs(ff.stripes.size(), persisted_stripe_statistics); + auto const statistics = + finish_statistic_blobs(ff.stripes.size(), persisted_stripe_statistics, stream); // File-level statistics if (not statistics.file_level.empty()) { - buffer_.resize(0); - pbw_.put_uint(encode_field_number(1)); - pbw_.put_uint(persisted_stripe_statistics.num_rows); + ProtobufWriter pbw; + pbw.put_uint(encode_field_number(1)); + pbw.put_uint(persisted_stripe_statistics.num_rows); // First entry contains total number of rows ff.statistics.reserve(ff.types.size()); - ff.statistics.emplace_back(std::move(buffer_)); + ff.statistics.emplace_back(pbw.release()); // Add file stats, stored after stripe stats in `column_stats` ff.statistics.insert(ff.statistics.end(), std::make_move_iterator(statistics.file_level.begin()), @@ -2400,10 +2668,10 @@ void writer::impl::close() md.stripeStats.resize(ff.stripes.size()); for (size_t stripe_id = 0; stripe_id < ff.stripes.size(); stripe_id++) { md.stripeStats[stripe_id].colStats.resize(ff.types.size()); - buffer_.resize(0); - pbw_.put_uint(encode_field_number(1)); - pbw_.put_uint(ff.stripes[stripe_id].numberOfRows); - md.stripeStats[stripe_id].colStats[0] = std::move(buffer_); + ProtobufWriter pbw; + pbw.put_uint(encode_field_number(1)); + pbw.put_uint(ff.stripes[stripe_id].numberOfRows); + md.stripeStats[stripe_id].colStats[0] = pbw.release(); for (size_t col_idx = 0; col_idx < ff.types.size() - 1; col_idx++) { size_t idx = ff.stripes.size() * col_idx + stripe_id; md.stripeStats[stripe_id].colStats[1 + col_idx] = std::move(statistics.stripe_level[idx]); @@ -2421,27 +2689,28 @@ void writer::impl::close() // Write statistics metadata if (md.stripeStats.size() != 0) { - buffer_.resize((compression_kind_ != NONE) ? 3 : 0); - pbw_.write(md); - add_uncompressed_block_headers(buffer_); - ps.metadataLength = buffer_.size(); - out_sink_->host_write(buffer_.data(), buffer_.size()); + ProtobufWriter pbw((compression_kind_ != NONE) ? 3 : 0); + pbw.write(md); + add_uncompressed_block_headers(compression_kind_, compression_blocksize_, pbw.buffer()); + ps.metadataLength = pbw.size(); + out_sink_->host_write(pbw.data(), pbw.size()); } else { ps.metadataLength = 0; } - buffer_.resize((compression_kind_ != NONE) ? 3 : 0); - pbw_.write(ff); - add_uncompressed_block_headers(buffer_); + ProtobufWriter pbw((compression_kind_ != NONE) ? 3 : 0); + pbw.write(ff); + add_uncompressed_block_headers(compression_kind_, compression_blocksize_, pbw.buffer()); // Write postscript metadata - ps.footerLength = buffer_.size(); + ps.footerLength = pbw.size(); ps.compression = compression_kind_; ps.compressionBlockSize = compression_blocksize_; ps.version = {0, 12}; ps.magic = MAGIC; - const auto ps_length = static_cast(pbw_.write(ps)); - buffer_.push_back(ps_length); - out_sink_->host_write(buffer_.data(), buffer_.size()); + + const auto ps_length = static_cast(pbw.write(ps)); + pbw.put_byte(ps_length); + out_sink_->host_write(pbw.data(), pbw.size()); out_sink_->flush(); } diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index dc8aad33af0..27d74e45b46 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -176,6 +176,72 @@ struct stripe_size_limits { size_type rows; }; +/** + * @brief Statistics data stored between calls to write for chunked writes + * + */ +struct intermediate_statistics { + explicit intermediate_statistics(rmm::cuda_stream_view stream) : stripe_stat_chunks(0, stream) {} + + intermediate_statistics(std::vector rb, + rmm::device_uvector sc, + hostdevice_vector smg, + std::vector sdt, + std::vector sct) + : rowgroup_blobs(std::move(rb)), + stripe_stat_chunks(std::move(sc)), + stripe_stat_merge(std::move(smg)), + stats_dtypes(std::move(sdt)), + col_types(std::move(sct)) + { + } + + // blobs for the rowgroups. Not persisted + std::vector rowgroup_blobs; + + rmm::device_uvector stripe_stat_chunks; + hostdevice_vector stripe_stat_merge; + std::vector stats_dtypes; + std::vector col_types; +}; + +/** + * @brief used for chunked writes to persist data between calls to write. + * + */ +struct persisted_statistics { + void clear() + { + stripe_stat_chunks.clear(); + stripe_stat_merge.clear(); + string_pools.clear(); + stats_dtypes.clear(); + col_types.clear(); + num_rows = 0; + } + + void persist(int num_table_rows, + bool single_write_mode, + intermediate_statistics& intermediate_stats, + rmm::cuda_stream_view stream); + + std::vector> stripe_stat_chunks; + std::vector> stripe_stat_merge; + std::vector> string_pools; + std::vector stats_dtypes; + std::vector col_types; + int num_rows = 0; +}; + +/** + * @brief Protobuf encoded statistics created at file close + * + */ +struct encoded_footer_statistics { + std::vector stripe_level; + std::vector file_level; +}; + /** * @brief Implementation for ORC writer */ @@ -227,7 +293,7 @@ class writer::impl { /** * @brief Writes a single subtable as part of a larger ORC file/table write. * - * @param[in] table The table information to be written + * @param table The table information to be written */ void write(table_view const& table); @@ -238,188 +304,41 @@ class writer::impl { private: /** - * @brief Builds up per-stripe dictionaries for string columns. - * - * @param orc_table Non-owning view of a cuDF table w/ ORC-related info - * @param stripe_bounds List of stripe boundaries - * @param dict List of dictionary chunks [rowgroup][column] - * @param dict_index List of dictionary indices - * @param dictionary_enabled Whether dictionary encoding is enabled for a given column - * @param stripe_dict List of stripe dictionaries - */ - void build_dictionaries(orc_table_view& orc_table, - host_span stripe_bounds, - hostdevice_2dvector const& dict, - host_span> dict_index, - host_span dictionary_enabled, - hostdevice_2dvector& stripe_dict); - - /** - * @brief Builds up per-column streams. - * - * @param[in,out] columns List of columns - * @param[in] segmentation stripe and rowgroup ranges - * @param[in] decimal_column_sizes Sizes of encoded decimal columns - * @return List of stream descriptors - */ - orc_streams create_streams(host_span columns, - file_segmentation const& segmentation, - std::map const& decimal_column_sizes); - - /** - * @brief Returns stripe information after compacting columns' individual data - * chunks into contiguous data streams. - * - * @param[in] num_index_streams Total number of index streams - * @param[in] segmentation stripe and rowgroup ranges - * @param[in,out] enc_streams List of encoder chunk streams [column][rowgroup] - * @param[in,out] strm_desc List of stream descriptors [stripe][data_stream] + * @brief Write the intermediate ORC data into the data sink. * - * @return The stripes' information - */ - std::vector gather_stripes( - size_t num_index_streams, - file_segmentation const& segmentation, - hostdevice_2dvector* enc_streams, - hostdevice_2dvector* strm_desc); - - /** - * @brief Statistics data stored between calls to write for chunked writes - * - */ - struct intermediate_statistics { - explicit intermediate_statistics(rmm::cuda_stream_view stream) - : stripe_stat_chunks(0, stream){}; - intermediate_statistics(std::vector rb, - rmm::device_uvector sc, - hostdevice_vector smg, - std::vector sdt, - std::vector sct) - : rowgroup_blobs(std::move(rb)), - stripe_stat_chunks(std::move(sc)), - stripe_stat_merge(std::move(smg)), - stats_dtypes(std::move(sdt)), - col_types(std::move(sct)){}; - - // blobs for the rowgroups. Not persisted - std::vector rowgroup_blobs; - - rmm::device_uvector stripe_stat_chunks; - hostdevice_vector stripe_stat_merge; - std::vector stats_dtypes; - std::vector col_types; - }; - - /** - * @brief used for chunked writes to persist data between calls to write. - * - */ - struct persisted_statistics { - void clear() - { - stripe_stat_chunks.clear(); - stripe_stat_merge.clear(); - string_pools.clear(); - stats_dtypes.clear(); - col_types.clear(); - num_rows = 0; - } - - void persist(int num_table_rows, - bool single_write_mode, - intermediate_statistics& intermediate_stats, - rmm::cuda_stream_view stream); - - std::vector> stripe_stat_chunks; - std::vector> stripe_stat_merge; - std::vector> string_pools; - std::vector stats_dtypes; - std::vector col_types; - int num_rows = 0; - }; - - /** - * @brief Protobuf encoded statistics created at file close - * - */ - struct encoded_footer_statistics { - std::vector stripe_level; - std::vector file_level; - }; - - /** - * @brief Returns column statistics in an intermediate format. - * - * @param statistics_freq Frequency of statistics to be included in the output file - * @param orc_table Table information to be written - * @param segmentation stripe and rowgroup ranges - * @return The statistic information - */ - intermediate_statistics gather_statistic_blobs(statistics_freq const statistics_freq, - orc_table_view const& orc_table, - file_segmentation const& segmentation); - - /** - * @brief Returns column statistics encoded in ORC protobuf format stored in the footer. - * - * @param num_stripes number of stripes in the data - * @param incoming_stats intermediate statistics returned from `gather_statistic_blobs` - * @return The encoded statistic blobs - */ - encoded_footer_statistics finish_statistic_blobs( - int num_stripes, writer::impl::persisted_statistics& incoming_stats); - - /** - * @brief Writes the specified column's row index stream. - * - * @param[in] stripe_id Stripe's identifier - * @param[in] stream_id Stream identifier (column id + 1) - * @param[in] columns List of columns - * @param[in] segmentation stripe and rowgroup ranges - * @param[in] enc_streams List of encoder chunk streams [column][rowgroup] - * @param[in] strm_desc List of stream descriptors - * @param[in] comp_out Output status for compressed streams - * @param[in] rg_stats row group level statistics - * @param[in,out] stripe Stream's parent stripe - * @param[in,out] streams List of all streams - * @param[in,out] pbw Protobuf writer - */ - void write_index_stream(int32_t stripe_id, - int32_t stream_id, - host_span columns, - file_segmentation const& segmentation, - host_2dspan enc_streams, - host_2dspan strm_desc, - host_span comp_out, - std::vector const& rg_stats, - StripeInformation* stripe, - orc_streams* streams, - ProtobufWriter* pbw); - - /** - * @brief Write the specified column's data streams + * The intermediate data is generated from processing (compressing/encoding) an cuDF input table + * by `process_for_write` called in the `write()` function. * - * @param[in] strm_desc Stream's descriptor - * @param[in] enc_stream Chunk's streams - * @param[in] compressed_data Compressed stream data - * @param[in,out] stream_out Temporary host output buffer - * @param[in,out] stripe Stream's parent stripe - * @param[in,out] streams List of all streams - * @return An std::future that should be synchronized to ensure the writing is complete + * @param streams List of stream descriptors + * @param comp_results Status of data compression + * @param strm_descs List of stream descriptors + * @param enc_data ORC per-chunk streams of encoded data + * @param segmentation Description of how the ORC file is segmented into stripes and rowgroups + * @param stripes List of stripe description + * @param orc_table Non-owning view of a cuDF table that includes ORC-related information + * @param compressed_data Compressed stream data + * @param intermediate_stats Statistics data stored between calls to write + * @param stream_output Temporary host output buffer */ - std::future write_data_stream(gpu::StripeStream const& strm_desc, - gpu::encoder_chunk_streams const& enc_stream, - uint8_t const* compressed_data, - uint8_t* stream_out, - StripeInformation* stripe, - orc_streams* streams); + void write_orc_data_to_sink(orc_streams& streams, + hostdevice_vector const& comp_results, + hostdevice_2dvector const& strm_descs, + encoded_data const& enc_data, + file_segmentation const& segmentation, + std::vector& stripes, + orc_table_view const& orc_table, + rmm::device_buffer const& compressed_data, + intermediate_statistics& intermediate_stats, + uint8_t* stream_output); /** - * @brief Insert 3-byte uncompressed block headers in a byte vector + * @brief Add the processed table data into the internal file footer. * - * @param byte_vector Raw data (must include initial 3-byte header) + * @param orc_table Non-owning view of a cuDF table that includes ORC-related information + * @param stripes List of stripe description */ - void add_uncompressed_block_headers(std::vector& byte_vector); + void add_table_to_footer_data(orc_table_view const& orc_table, + std::vector& stripes); private: rmm::mr::device_memory_resource* _mr = nullptr; @@ -451,7 +370,6 @@ class writer::impl { // statistics data saved between calls to write before a close writes out the statistics persisted_statistics persisted_stripe_statistics; - std::vector buffer_; std::unique_ptr out_sink_; }; diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 5a12acec2a3..e48696fcb9b 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -282,6 +282,7 @@ __global__ void __launch_bounds__(128) g.col = ck_g->col_desc; g.start_row = fragments[frag_id].start_value_idx; g.num_rows = fragments[frag_id].num_leaf_values; + g.non_leaf_nulls = fragments[frag_id].num_values - g.num_rows; groups[frag_id] = g; } } diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index 5f09ec33811..363192dda2e 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -468,10 +468,12 @@ void decode_page_headers(hostdevice_vector& chunks, host_span const> comp_in_view{comp_in.data() + start_pos, codec.num_pages}; - auto const d_comp_in = cudf::detail::make_device_uvector_async(comp_in_view, stream); + auto const d_comp_in = cudf::detail::make_device_uvector_async( + comp_in_view, stream, rmm::mr::get_current_device_resource()); host_span const> comp_out_view(comp_out.data() + start_pos, codec.num_pages); - auto const d_comp_out = cudf::detail::make_device_uvector_async(comp_out_view, stream); + auto const d_comp_out = cudf::detail::make_device_uvector_async( + comp_out_view, stream, rmm::mr::get_current_device_resource()); device_span d_comp_res_view(comp_res.data() + start_pos, codec.num_pages); switch (codec.compression_type) { @@ -523,8 +525,10 @@ void decode_page_headers(hostdevice_vector& chunks, // now copy the uncompressed V2 def and rep level data if (not copy_in.empty()) { - auto const d_copy_in = cudf::detail::make_device_uvector_async(copy_in, stream); - auto const d_copy_out = cudf::detail::make_device_uvector_async(copy_out, stream); + auto const d_copy_in = cudf::detail::make_device_uvector_async( + copy_in, stream, rmm::mr::get_current_device_resource()); + auto const d_copy_out = cudf::detail::make_device_uvector_async( + copy_out, stream, rmm::mr::get_current_device_resource()); gpu_copy_uncompressed_blocks(d_copy_in, d_copy_out, stream); stream.synchronize(); @@ -1531,8 +1535,8 @@ void reader::impl::preprocess_pages(size_t skip_rows, // Build index for string dictionaries since they can't be indexed // directly due to variable-sized elements _chunk_itm_data.str_dict_index = - cudf::detail::make_zeroed_device_uvector_async(total_str_dict_indexes, - _stream); + cudf::detail::make_zeroed_device_uvector_async( + total_str_dict_indexes, _stream, rmm::mr::get_current_device_resource()); // Update chunks with pointers to string dict indices for (size_t c = 0, page_count = 0, str_ofs = 0; c < chunks.size(); c++) { diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 2c9bff33a14..e6e14908f36 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -509,20 +509,15 @@ inline bool is_col_nullable(cudf::detail::LinkedColPtr const& col, column_in_metadata const& col_meta, bool single_write_mode) { - if (single_write_mode) { - return col->nullable(); - } else { - if (col_meta.is_nullability_defined()) { - CUDF_EXPECTS(col_meta.nullable() || !col->nullable(), - "Mismatch in metadata prescribed nullability and input column nullability. " - "Metadata for nullable input column cannot prescribe nullability = false"); - return col_meta.nullable(); - } else { - // For chunked write, when not provided nullability, we assume the worst case scenario - // that all columns are nullable. - return true; - } - } + if (col_meta.is_nullability_defined()) { + CUDF_EXPECTS(col_meta.nullable() || !col->nullable(), + "Mismatch in metadata prescribed nullability and input column nullability. " + "Metadata for nullable input column cannot prescribe nullability = false"); + return col_meta.nullable(); + } + // For chunked write, when not provided nullability, we assume the worst case scenario + // that all columns are nullable. + return not single_write_mode or col->nullable(); } /** @@ -858,7 +853,8 @@ parquet_column_view::parquet_column_view(schema_tree_node const& schema_node, _nullability = std::vector(r_nullability.crbegin(), r_nullability.crend()); // TODO(cp): Explore doing this for all columns in a single go outside this ctor. Maybe using // hostdevice_vector. Currently this involves a cudaMemcpyAsync for each column. - _d_nullability = cudf::detail::make_device_uvector_async(_nullability, stream); + _d_nullability = cudf::detail::make_device_uvector_async( + _nullability, stream, rmm::mr::get_current_device_resource()); _is_list = (_max_rep_level > 0); @@ -933,7 +929,8 @@ void writer::impl::init_row_group_fragments( device_span part_frag_offset, uint32_t fragment_size) { - auto d_partitions = cudf::detail::make_device_uvector_async(partitions, stream); + auto d_partitions = cudf::detail::make_device_uvector_async( + partitions, stream, rmm::mr::get_current_device_resource()); gpu::InitRowGroupFragments(frag, col_desc, d_partitions, part_frag_offset, fragment_size, stream); frag.device_to_host(stream, true); } @@ -941,7 +938,8 @@ void writer::impl::init_row_group_fragments( void writer::impl::calculate_page_fragments(device_span frag, host_span frag_sizes) { - auto d_frag_sz = cudf::detail::make_device_uvector_async(frag_sizes, stream); + auto d_frag_sz = cudf::detail::make_device_uvector_async( + frag_sizes, stream, rmm::mr::get_current_device_resource()); gpu::CalculatePageFragments(frag, d_frag_sz, stream); } @@ -1512,7 +1510,8 @@ void writer::impl::write(table_view const& table, std::vector co num_frag_in_part.begin(), num_frag_in_part.end(), std::back_inserter(part_frag_offset), 0); part_frag_offset.push_back(part_frag_offset.back() + num_frag_in_part.back()); - auto d_part_frag_offset = cudf::detail::make_device_uvector_async(part_frag_offset, stream); + auto d_part_frag_offset = cudf::detail::make_device_uvector_async( + part_frag_offset, stream, rmm::mr::get_current_device_resource()); cudf::detail::hostdevice_2dvector row_group_fragments( num_columns, num_fragments, stream); diff --git a/cpp/src/io/statistics/column_statistics.cuh b/cpp/src/io/statistics/column_statistics.cuh index 125235ebf2f..0b09cb63d19 100644 --- a/cpp/src/io/statistics/column_statistics.cuh +++ b/cpp/src/io/statistics/column_statistics.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -129,7 +129,13 @@ struct calculate_group_statistics_functor { chunk = block_reduce(chunk, storage); - if (t == 0) { s.ck = get_untyped_chunk(chunk); } + if (t == 0) { + // parquet wants total null count in stats, not just count of null leaf values + if constexpr (IO == detail::io_file_format::PARQUET) { + chunk.null_count += s.group.non_leaf_nulls; + } + s.ck = get_untyped_chunk(chunk); + } } template multibyte_split(cudf::io::text::data_chunk_source // must be at least 32 when using warp-reduce on partials // must be at least 1 more than max possible concurrent tiles // best when at least 32 more than max possible concurrent tiles, due to rolling `invalid`s - auto num_tile_states = std::max(32, TILES_PER_CHUNK * concurrency + 32); - auto tile_multistates = scan_tile_state(num_tile_states, stream); - auto tile_offsets = scan_tile_state(num_tile_states, stream); + auto num_tile_states = std::max(32, TILES_PER_CHUNK * concurrency + 32); + auto tile_multistates = + scan_tile_state(num_tile_states, stream, rmm::mr::get_current_device_resource()); + auto tile_offsets = + scan_tile_state(num_tile_states, stream, rmm::mr::get_current_device_resource()); multibyte_split_init_kernel<< #include +#include + namespace cudf { namespace io { namespace detail { @@ -43,7 +45,8 @@ void column_buffer::create(size_type _size, // make_zeroed_device_uvector_async here and instead let it use the // default rmm memory resource. _strings = std::make_unique>( - cudf::detail::make_zeroed_device_uvector_async(size, stream)); + cudf::detail::make_zeroed_device_uvector_async( + size, stream, rmm::mr::get_current_device_resource())); break; // list columns store a buffer of int32's as offsets to represent diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp index cba45f693f9..40b70986eca 100644 --- a/cpp/src/io/utilities/data_sink.cpp +++ b/cpp/src/io/utilities/data_sink.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,8 +63,8 @@ class file_sink : public data_sink { [[nodiscard]] bool is_device_write_preferred(size_t size) const override { - return !_kvikio_file.closed() || - (_cufile_out != nullptr && _cufile_out->is_cufile_io_preferred(size)); + if (size < _gds_write_preferred_threshold) { return false; } + return supports_device_write(); } std::future device_write_async(void const* gpu_data, @@ -96,6 +96,8 @@ class file_sink : public data_sink { size_t _bytes_written = 0; std::unique_ptr _cufile_out; kvikio::FileHandle _kvikio_file; + // The write size above which GDS is faster then d2h-copy + posix-write + static constexpr size_t _gds_write_preferred_threshold = 128 << 10; // 128KB }; /** diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 71d64900398..e2cea7a56ff 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -56,8 +56,8 @@ class file_source : public datasource { [[nodiscard]] bool is_device_read_preferred(size_t size) const override { - return !_kvikio_file.closed() || - (_cufile_in != nullptr && _cufile_in->is_cufile_io_preferred(size)); + if (size < _gds_read_preferred_threshold) { return false; } + return supports_device_read(); } std::future device_read_async(size_t offset, @@ -98,6 +98,8 @@ class file_source : public datasource { private: std::unique_ptr _cufile_in; kvikio::FileHandle _kvikio_file; + // The read size above which GDS is faster then posix-read + h2d-copy + static constexpr size_t _gds_read_preferred_threshold = 128 << 10; // 128KB }; /** diff --git a/cpp/src/io/utilities/file_io_utilities.hpp b/cpp/src/io/utilities/file_io_utilities.hpp index 38674892966..b55dd3b1583 100644 --- a/cpp/src/io/utilities/file_io_utilities.hpp +++ b/cpp/src/io/utilities/file_io_utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,36 +49,10 @@ class file_wrapper { [[nodiscard]] auto desc() const { return fd; } }; -/** - * @brief Base class for cuFile input/output. - * - * Contains the common API for cuFile input and output classes. - */ -class cufile_io_base { - public: - /** - * @brief Returns an estimate of whether the cuFile operation is the optimal option. - * - * @param size Read/write operation size, in bytes. - * @return Whether a cuFile operation with the given size is expected to be faster than a host - * read + H2D copy - */ - static bool is_cufile_io_preferred(size_t size) { return size > op_size_threshold; } - - protected: - /** - * @brief The read/write size above which cuFile is faster then host read + copy - * - * This may not be the optimal threshold for all systems. Derived `is_cufile_io_preferred` - * implementations can use a different logic. - */ - static constexpr size_t op_size_threshold = 128 << 10; -}; - /** * @brief Interface class for cufile input. */ -class cufile_input : public cufile_io_base { +class cufile_input { public: /** * @brief Asynchronously reads into existing device memory. @@ -101,7 +75,7 @@ class cufile_input : public cufile_io_base { /** * @brief Interface class for cufile output. */ -class cufile_output : public cufile_io_base { +class cufile_output { public: /** * @brief Asynchronously writes the data from a device buffer into a file. diff --git a/cpp/src/io/utilities/parsing_utils.cu b/cpp/src/io/utilities/parsing_utils.cu index a03789464cc..5c5cbd1c01d 100644 --- a/cpp/src/io/utilities/parsing_utils.cu +++ b/cpp/src/io/utilities/parsing_utils.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -126,7 +126,8 @@ cudf::size_type find_all_from_set(device_span data, cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, count_and_set_positions)); const int grid_size = divCeil(data.size(), (size_t)block_size); - auto d_count = cudf::detail::make_zeroed_device_uvector_async(1, stream); + auto d_count = cudf::detail::make_zeroed_device_uvector_async( + 1, stream, rmm::mr::get_current_device_resource()); for (char key : keys) { count_and_set_positions<<>>( data.data(), data.size(), result_offset, key, d_count.data(), positions); @@ -143,7 +144,8 @@ cudf::size_type find_all_from_set(host_span data, rmm::cuda_stream_view stream) { rmm::device_buffer d_chunk(std::min(max_chunk_bytes, data.size()), stream); - auto d_count = cudf::detail::make_zeroed_device_uvector_async(1, stream); + auto d_count = cudf::detail::make_zeroed_device_uvector_async( + 1, stream, rmm::mr::get_current_device_resource()); int block_size = 0; // suggested thread count to use int min_grid_size = 0; // minimum block count required diff --git a/cpp/src/io/utilities/trie.cu b/cpp/src/io/utilities/trie.cu index bf03d6a6a89..e2ace7258f7 100644 --- a/cpp/src/io/utilities/trie.cu +++ b/cpp/src/io/utilities/trie.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -101,7 +101,8 @@ rmm::device_uvector create_serialized_trie(const std::vector>(0, stream, mr)); // Full joins need to return the trivial complement. case join_kind::FULL_JOIN: { - auto ret_flipped = get_trivial_left_join_indices(right, stream); + auto ret_flipped = + get_trivial_left_join_indices(right, stream, rmm::mr::get_current_device_resource()); return std::pair(std::move(ret_flipped.second), std::move(ret_flipped.first)); } default: CUDF_FAIL("Invalid join kind."); break; diff --git a/cpp/src/join/conditional_join.hpp b/cpp/src/join/conditional_join.hpp index 7c329cd8e17..9bc6024ee7e 100644 --- a/cpp/src/join/conditional_join.hpp +++ b/cpp/src/join/conditional_join.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,9 +47,9 @@ conditional_join(table_view const& left, table_view const& right, ast::expression const& binary_predicate, join_kind JoinKind, - std::optional output_size = {}, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + std::optional output_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Computes the size of a join operation between two tables without @@ -63,13 +63,12 @@ conditional_join(table_view const& left, * * @return Join output indices vector pair */ -std::size_t compute_conditional_join_output_size( - table_view const& left, - table_view const& right, - ast::expression const& binary_predicate, - join_kind JoinKind, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::size_t compute_conditional_join_output_size(table_view const& left, + table_view const& right, + ast::expression const& binary_predicate, + join_kind JoinKind, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/src/join/cross_join.cu b/cpp/src/join/cross_join.cu index 7358726d69d..07057acf37e 100644 --- a/cpp/src/join/cross_join.cu +++ b/cpp/src/join/cross_join.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,11 +37,10 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches */ -std::unique_ptr cross_join( - cudf::table_view const& left, - cudf::table_view const& right, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr cross_join(cudf::table_view const& left, + cudf::table_view const& right, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(0 != left.num_columns(), "Left table is empty"); CUDF_EXPECTS(0 != right.num_columns(), "Right table is empty"); diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 7fb35e179e9..d0bdad73614 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -284,7 +284,8 @@ hash_join::hash_join(cudf::table_view const& build, cudf::null_equality compare_nulls, rmm::cuda_stream_view stream) : _is_empty{build.num_rows() == 0}, - _composite_bitmask{cudf::detail::bitmask_and(build, stream).first}, + _composite_bitmask{ + cudf::detail::bitmask_and(build, stream, rmm::mr::get_current_device_resource()).first}, _nulls_equal{compare_nulls}, _hash_table{compute_hash_table_size(build.num_rows()), cuco::empty_key{std::numeric_limits::max()}, @@ -298,8 +299,13 @@ hash_join::hash_join(cudf::table_view const& build, "Build column size is too big for hash join"); // need to store off the owning structures for some of the views in _build - _flattened_build_table = structs::detail::flatten_nested_columns( - build, {}, {}, structs::detail::column_nullability::FORCE, stream); + _flattened_build_table = + structs::detail::flatten_nested_columns(build, + {}, + {}, + structs::detail::column_nullability::FORCE, + stream, + rmm::mr::get_current_device_resource()); _build = _flattened_build_table->flattened_columns(); if (_is_empty) { return; } @@ -356,8 +362,13 @@ std::size_t hash_join::inner_join_size(cudf::table_view const& probe, // Return directly if build table is empty if (_is_empty) { return 0; } - auto flattened_probe = structs::detail::flatten_nested_columns( - probe, {}, {}, structs::detail::column_nullability::FORCE, stream); + auto flattened_probe = + structs::detail::flatten_nested_columns(probe, + {}, + {}, + structs::detail::column_nullability::FORCE, + stream, + rmm::mr::get_current_device_resource()); auto const flattened_probe_table = flattened_probe->flattened_columns(); auto build_table_ptr = cudf::table_device_view::create(_build, stream); @@ -381,8 +392,13 @@ std::size_t hash_join::left_join_size(cudf::table_view const& probe, // Trivial left join case - exit early if (_is_empty) { return probe.num_rows(); } - auto flattened_probe = structs::detail::flatten_nested_columns( - probe, {}, {}, structs::detail::column_nullability::FORCE, stream); + auto flattened_probe = + structs::detail::flatten_nested_columns(probe, + {}, + {}, + structs::detail::column_nullability::FORCE, + stream, + rmm::mr::get_current_device_resource()); auto const flattened_probe_table = flattened_probe->flattened_columns(); auto build_table_ptr = cudf::table_device_view::create(_build, stream); @@ -407,8 +423,13 @@ std::size_t hash_join::full_join_size(cudf::table_view const& probe, // Trivial left join case - exit early if (_is_empty) { return probe.num_rows(); } - auto flattened_probe = structs::detail::flatten_nested_columns( - probe, {}, {}, structs::detail::column_nullability::FORCE, stream); + auto flattened_probe = + structs::detail::flatten_nested_columns(probe, + {}, + {}, + structs::detail::column_nullability::FORCE, + stream, + rmm::mr::get_current_device_resource()); auto const flattened_probe_table = flattened_probe->flattened_columns(); auto build_table_ptr = cudf::table_device_view::create(_build, stream); @@ -474,8 +495,13 @@ hash_join::compute_hash_join(cudf::table_view const& probe, CUDF_EXPECTS(probe.num_rows() < cudf::detail::MAX_JOIN_SIZE, "Probe column size is too big for hash join"); - auto flattened_probe = structs::detail::flatten_nested_columns( - probe, {}, {}, structs::detail::column_nullability::FORCE, stream); + auto flattened_probe = + structs::detail::flatten_nested_columns(probe, + {}, + {}, + structs::detail::column_nullability::FORCE, + stream, + rmm::mr::get_current_device_resource()); auto const flattened_probe_table = flattened_probe->flattened_columns(); CUDF_EXPECTS(_build.num_columns() == flattened_probe_table.num_columns(), diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index 44cddd2720e..bc4c62291b2 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -140,10 +140,9 @@ class pair_equality { */ std::pair>, std::unique_ptr>> -get_trivial_left_join_indices( - table_view const& left, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +get_trivial_left_join_indices(table_view const& left, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Builds the hash table based on the given `build_table`. diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu index 46e337a3363..8d66cba8f8d 100644 --- a/cpp/src/join/mixed_join.cu +++ b/cpp/src/join/mixed_join.cu @@ -80,7 +80,9 @@ mixed_join( // Left and full joins all return all the row indices from // left with a corresponding NULL from the right. case join_kind::LEFT_JOIN: - case join_kind::FULL_JOIN: return get_trivial_left_join_indices(left_conditional, stream); + case join_kind::FULL_JOIN: + return get_trivial_left_join_indices( + left_conditional, stream, rmm::mr::get_current_device_resource()); // Inner joins return empty output because no matches can exist. case join_kind::INNER_JOIN: return std::pair(std::make_unique>(0, stream, mr), @@ -96,7 +98,8 @@ mixed_join( std::make_unique>(0, stream, mr)); // Full joins need to return the trivial complement. case join_kind::FULL_JOIN: { - auto ret_flipped = get_trivial_left_join_indices(right_conditional, stream); + auto ret_flipped = get_trivial_left_join_indices( + right_conditional, stream, rmm::mr::get_current_device_resource()); return std::pair(std::move(ret_flipped.second), std::move(ret_flipped.first)); } default: CUDF_FAIL("Invalid join kind."); break; @@ -136,7 +139,8 @@ mixed_join( // TODO: To add support for nested columns we will need to flatten in many // places. However, this probably isn't worth adding any time soon since we // won't be able to support AST conditions for those types anyway. - auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first; + auto const row_bitmask = + cudf::detail::bitmask_and(build, stream, rmm::mr::get_current_device_resource()).first; build_join_hash_table( build, hash_table, compare_nulls, static_cast(row_bitmask.data()), stream); auto hash_table_view = hash_table.get_device_view(); @@ -384,7 +388,8 @@ compute_mixed_join_output_size(table_view const& left_equality, // TODO: To add support for nested columns we will need to flatten in many // places. However, this probably isn't worth adding any time soon since we // won't be able to support AST conditions for those types anyway. - auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first; + auto const row_bitmask = + cudf::detail::bitmask_and(build, stream, rmm::mr::get_current_device_resource()).first; build_join_hash_table( build, hash_table, compare_nulls, static_cast(row_bitmask.data()), stream); auto hash_table_view = hash_table.get_device_view(); diff --git a/cpp/src/join/mixed_join_semi.cu b/cpp/src/join/mixed_join_semi.cu index b32df9316e2..1304c4ae3b0 100644 --- a/cpp/src/join/mixed_join_semi.cu +++ b/cpp/src/join/mixed_join_semi.cu @@ -118,7 +118,9 @@ std::unique_ptr> mixed_join_semi( // Anti and semi return all the row indices from left // with a corresponding NULL from the right. case join_kind::LEFT_ANTI_JOIN: - return get_trivial_left_join_indices(left_conditional, stream).first; + return get_trivial_left_join_indices( + left_conditional, stream, rmm::mr::get_current_device_resource()) + .first; // Inner and left semi joins return empty output because no matches can exist. case join_kind::LEFT_SEMI_JOIN: return std::make_unique>(0, stream, mr); @@ -193,7 +195,8 @@ std::unique_ptr> mixed_join_semi( hash_table.insert(iter, iter + right_num_rows, hash_build, equality_build, stream.value()); } else { thrust::counting_iterator stencil(0); - auto const [row_bitmask, _] = cudf::detail::bitmask_and(build, stream); + auto const [row_bitmask, _] = + cudf::detail::bitmask_and(build, stream, rmm::mr::get_current_device_resource()); row_is_valid pred{static_cast(row_bitmask.data())}; // insert valid rows @@ -431,7 +434,8 @@ compute_mixed_join_output_size_semi(table_view const& left_equality, hash_table.insert(iter, iter + right_num_rows, hash_build, equality_build, stream.value()); } else { thrust::counting_iterator stencil(0); - auto const [row_bitmask, _] = cudf::detail::bitmask_and(build, stream); + auto const [row_bitmask, _] = + cudf::detail::bitmask_and(build, stream, rmm::mr::get_current_device_resource()); row_is_valid pred{static_cast(row_bitmask.data())}; // insert valid rows diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu index cc523b2ac7f..dcb6835ec09 100644 --- a/cpp/src/join/semi_join.cu +++ b/cpp/src/join/semi_join.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,7 +45,7 @@ std::unique_ptr> left_semi_anti_join( cudf::table_view const& right_keys, null_equality compare_nulls, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(0 != left_keys.num_columns(), "Left table is empty"); CUDF_EXPECTS(0 != right_keys.num_columns(), "Right table is empty"); @@ -64,8 +64,12 @@ std::unique_ptr> left_semi_anti_join( // Previously, the gather map was generated directly without this array but by calling to // `map.contains` inside the `thrust::copy_if` kernel. However, that led to increasing register // usage and reducing performance, as reported here: https://github.com/rapidsai/cudf/pull/10511. - auto const flagged = - cudf::detail::contains(right_keys, left_keys, compare_nulls, nan_equality::ALL_EQUAL, stream); + auto const flagged = cudf::detail::contains(right_keys, + left_keys, + compare_nulls, + nan_equality::ALL_EQUAL, + stream, + rmm::mr::get_current_device_resource()); auto const left_num_rows = left_keys.num_rows(); auto gather_map = diff --git a/cpp/src/lists/combine/concatenate_rows.cu b/cpp/src/lists/combine/concatenate_rows.cu index 8b006548391..993d5e3fc78 100644 --- a/cpp/src/lists/combine/concatenate_rows.cu +++ b/cpp/src/lists/combine/concatenate_rows.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -216,7 +216,7 @@ std::unique_ptr concatenate_rows(table_view const& input, // concatenate the input table into one column. std::vector cols(input.num_columns()); std::copy(input.begin(), input.end(), cols.begin()); - auto concat = cudf::detail::concatenate(cols, stream); + auto concat = cudf::detail::concatenate(cols, stream, rmm::mr::get_current_device_resource()); // whether or not we should be generating a null mask at all auto const build_null_mask = concat->has_nulls(); @@ -246,7 +246,8 @@ std::unique_ptr concatenate_rows(table_view const& input, auto const row_index = i % num_rows; return row_null_counts[row_index] != num_columns; }, - stream); + stream, + rmm::mr::get_current_device_resource()); } // NULLIFY_OUTPUT_ROW. Output row is nullfied if any input row is null return cudf::detail::valid_if( @@ -257,7 +258,8 @@ std::unique_ptr concatenate_rows(table_view const& input, auto const row_index = i % num_rows; return row_null_counts[row_index] == 0; }, - stream); + stream, + rmm::mr::get_current_device_resource()); }(); concat->set_null_mask(std::move(null_mask), null_count); } diff --git a/cpp/src/lists/copying/concatenate.cu b/cpp/src/lists/copying/concatenate.cu index 22083f7ce99..8ca26c0ebfb 100644 --- a/cpp/src/lists/copying/concatenate.cu +++ b/cpp/src/lists/copying/concatenate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -91,10 +91,9 @@ std::unique_ptr merge_offsets(host_span columns /** * @copydoc cudf::lists::detail::concatenate */ -std::unique_ptr concatenate( - host_span columns, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr concatenate(host_span columns, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { std::vector lists_columns; lists_columns.reserve(columns.size()); diff --git a/cpp/src/lists/dremel.cu b/cpp/src/lists/dremel.cu index c96a21df905..5136cc8cd37 100644 --- a/cpp/src/lists/dremel.cu +++ b/cpp/src/lists/dremel.cu @@ -266,7 +266,8 @@ dremel_data get_encoding(column_view h_col, max_vals_size += column_ends[l] - column_offsets[l]; } - auto d_nullability = cudf::detail::make_device_uvector_async(nullability, stream); + auto d_nullability = cudf::detail::make_device_uvector_async( + nullability, stream, rmm::mr::get_current_device_resource()); rmm::device_uvector rep_level(max_vals_size, stream); rmm::device_uvector def_level(max_vals_size, stream); diff --git a/cpp/src/lists/extract.cu b/cpp/src/lists/extract.cu index bff63871e29..5d4a20d1cb8 100644 --- a/cpp/src/lists/extract.cu +++ b/cpp/src/lists/extract.cu @@ -101,8 +101,10 @@ std::unique_ptr make_index_child(size_type index, */ std::unique_ptr make_index_offsets(size_type num_lists, rmm::cuda_stream_view stream) { - return cudf::detail::sequence( - num_lists + 1, cudf::scalar_type_t(0, true, stream), stream); + return cudf::detail::sequence(num_lists + 1, + cudf::scalar_type_t(0, true, stream), + stream, + rmm::mr::get_current_device_resource()); } } // namespace diff --git a/cpp/src/lists/reverse.cu b/cpp/src/lists/reverse.cu index c9c88270e10..d606f11bdb9 100644 --- a/cpp/src/lists/reverse.cu +++ b/cpp/src/lists/reverse.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,8 @@ std::unique_ptr reverse(lists_column_view const& input, auto const child = input.get_sliced_child(stream); // The labels are also a map from each list element to its corresponding zero-based list index. - auto const labels = generate_labels(input, child.size(), stream); + auto const labels = + generate_labels(input, child.size(), stream, rmm::mr::get_current_device_resource()); // The offsets of the output lists column. auto out_offsets = get_normalized_offsets(input, stream, mr); diff --git a/cpp/src/lists/set_operations.cu b/cpp/src/lists/set_operations.cu index a31b7c6e5be..813bac54e08 100644 --- a/cpp/src/lists/set_operations.cu +++ b/cpp/src/lists/set_operations.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -73,16 +73,18 @@ std::unique_ptr have_overlap(lists_column_view const& lhs, // - `reduce_by_key` with keys are rhs_labels and `logical_or` reduction on the existence reults // computed in the previous step. - auto const lhs_child = lhs.get_sliced_child(stream); - auto const rhs_child = rhs.get_sliced_child(stream); - auto const lhs_labels = generate_labels(lhs, lhs_child.size(), stream); - auto const rhs_labels = generate_labels(rhs, rhs_child.size(), stream); - auto const lhs_table = table_view{{lhs_labels->view(), lhs_child}}; - auto const rhs_table = table_view{{rhs_labels->view(), rhs_child}}; + auto const lhs_child = lhs.get_sliced_child(stream); + auto const rhs_child = rhs.get_sliced_child(stream); + auto const lhs_labels = + generate_labels(lhs, lhs_child.size(), stream, rmm::mr::get_current_device_resource()); + auto const rhs_labels = + generate_labels(rhs, rhs_child.size(), stream, rmm::mr::get_current_device_resource()); + auto const lhs_table = table_view{{lhs_labels->view(), lhs_child}}; + auto const rhs_table = table_view{{rhs_labels->view(), rhs_child}}; // Check existence for each row of the rhs_table in lhs_table. - auto const contained = - cudf::detail::contains(lhs_table, rhs_table, nulls_equal, nans_equal, stream); + auto const contained = cudf::detail::contains( + lhs_table, rhs_table, nulls_equal, nans_equal, stream, rmm::mr::get_current_device_resource()); auto const num_rows = lhs.size(); @@ -140,20 +142,23 @@ std::unique_ptr intersect_distinct(lists_column_view const& lhs, // - Extract rows of the rhs table using the existence results computed in the previous step. // - Remove duplicate rows, and build the output lists. - auto const lhs_child = lhs.get_sliced_child(stream); - auto const rhs_child = rhs.get_sliced_child(stream); - auto const lhs_labels = generate_labels(lhs, lhs_child.size(), stream); - auto const rhs_labels = generate_labels(rhs, rhs_child.size(), stream); - auto const lhs_table = table_view{{lhs_labels->view(), lhs_child}}; - auto const rhs_table = table_view{{rhs_labels->view(), rhs_child}}; + auto const lhs_child = lhs.get_sliced_child(stream); + auto const rhs_child = rhs.get_sliced_child(stream); + auto const lhs_labels = + generate_labels(lhs, lhs_child.size(), stream, rmm::mr::get_current_device_resource()); + auto const rhs_labels = + generate_labels(rhs, rhs_child.size(), stream, rmm::mr::get_current_device_resource()); + auto const lhs_table = table_view{{lhs_labels->view(), lhs_child}}; + auto const rhs_table = table_view{{rhs_labels->view(), rhs_child}}; - auto const contained = - cudf::detail::contains(lhs_table, rhs_table, nulls_equal, nans_equal, stream); + auto const contained = cudf::detail::contains( + lhs_table, rhs_table, nulls_equal, nans_equal, stream, rmm::mr::get_current_device_resource()); auto const intersect_table = cudf::detail::copy_if( rhs_table, [contained = contained.begin()] __device__(auto const idx) { return contained[idx]; }, - stream); + stream, + rmm::mr::get_current_device_resource()); // A stable algorithm is required to ensure that list labels remain contiguous. auto out_table = cudf::detail::stable_distinct(intersect_table->view(), @@ -191,8 +196,11 @@ std::unique_ptr union_distinct(lists_column_view const& lhs, // Algorithm: `return distinct(concatenate_rows(lhs, rhs))`. - auto const union_col = lists::detail::concatenate_rows( - table_view{{lhs.parent(), rhs.parent()}}, concatenate_null_policy::NULLIFY_OUTPUT_ROW, stream); + auto const union_col = + lists::detail::concatenate_rows(table_view{{lhs.parent(), rhs.parent()}}, + concatenate_null_policy::NULLIFY_OUTPUT_ROW, + stream, + rmm::mr::get_current_device_resource()); return cudf::lists::detail::distinct( lists_column_view{union_col->view()}, nulls_equal, nans_equal, stream, mr); @@ -215,20 +223,23 @@ std::unique_ptr difference_distinct(lists_column_view const& lhs, // - Extract rows of the lhs table using that difference results. // - Remove duplicate rows, and build the output lists. - auto const lhs_child = lhs.get_sliced_child(stream); - auto const rhs_child = rhs.get_sliced_child(stream); - auto const lhs_labels = generate_labels(lhs, lhs_child.size(), stream); - auto const rhs_labels = generate_labels(rhs, rhs_child.size(), stream); - auto const lhs_table = table_view{{lhs_labels->view(), lhs_child}}; - auto const rhs_table = table_view{{rhs_labels->view(), rhs_child}}; + auto const lhs_child = lhs.get_sliced_child(stream); + auto const rhs_child = rhs.get_sliced_child(stream); + auto const lhs_labels = + generate_labels(lhs, lhs_child.size(), stream, rmm::mr::get_current_device_resource()); + auto const rhs_labels = + generate_labels(rhs, rhs_child.size(), stream, rmm::mr::get_current_device_resource()); + auto const lhs_table = table_view{{lhs_labels->view(), lhs_child}}; + auto const rhs_table = table_view{{rhs_labels->view(), rhs_child}}; - auto const contained = - cudf::detail::contains(rhs_table, lhs_table, nulls_equal, nans_equal, stream); + auto const contained = cudf::detail::contains( + rhs_table, lhs_table, nulls_equal, nans_equal, stream, rmm::mr::get_current_device_resource()); auto const difference_table = cudf::detail::copy_if( lhs_table, [contained = contained.begin()] __device__(auto const idx) { return !contained[idx]; }, - stream); + stream, + rmm::mr::get_current_device_resource()); // A stable algorithm is required to ensure that list labels remain contiguous. auto out_table = cudf::detail::stable_distinct(difference_table->view(), diff --git a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu index 5acb1cb8849..0aaa8356304 100644 --- a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu +++ b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu @@ -21,10 +21,10 @@ #include #include #include -#include #include #include #include +#include #include #include @@ -65,12 +65,14 @@ std::unique_ptr apply_boolean_mask(lists_column_view const& input, cudf::detail::slice( boolean_mask.offsets(), {boolean_mask.offset(), boolean_mask.size() + 1}, stream) .front(); - auto const sizes = cudf::reduction::segmented_sum(boolean_mask_sliced_child, - boolean_mask_sliced_offsets, - offset_data_type, - null_policy::EXCLUDE, - std::nullopt, - stream); + auto const sizes = + cudf::reduction::detail::segmented_sum(boolean_mask_sliced_child, + boolean_mask_sliced_offsets, + offset_data_type, + null_policy::EXCLUDE, + std::nullopt, + stream, + rmm::mr::get_current_device_resource()); auto const d_sizes = column_device_view::create(*sizes, stream); auto const sizes_begin = cudf::detail::make_null_replacement_iterator(*d_sizes, offset_type{0}); auto const sizes_end = sizes_begin + sizes->size(); diff --git a/cpp/src/lists/stream_compaction/distinct.cu b/cpp/src/lists/stream_compaction/distinct.cu index d0e4557663e..48d8babb4fa 100644 --- a/cpp/src/lists/stream_compaction/distinct.cu +++ b/cpp/src/lists/stream_compaction/distinct.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,8 +46,9 @@ std::unique_ptr distinct(lists_column_view const& input, if (input.is_empty()) { return empty_like(input.parent()); } - auto const child = input.get_sliced_child(stream); - auto const labels = generate_labels(input, child.size(), stream); + auto const child = input.get_sliced_child(stream); + auto const labels = + generate_labels(input, child.size(), stream, rmm::mr::get_current_device_resource()); auto const distinct_table = cudf::detail::stable_distinct(table_view{{labels->view(), child}}, // input table diff --git a/cpp/src/lists/utilities.hpp b/cpp/src/lists/utilities.hpp index 76f8879c4d3..c881e828677 100644 --- a/cpp/src/lists/utilities.hpp +++ b/cpp/src/lists/utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,11 +33,10 @@ namespace cudf::lists::detail { * @param mr Device memory resource used to allocate the returned object * @return A column containing list labels corresponding to each element in the child column */ -std::unique_ptr generate_labels( - lists_column_view const& input, - size_type n_elements, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr generate_labels(lists_column_view const& input, + size_type n_elements, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Reconstruct an offsets column from the input list labels column. @@ -61,9 +60,8 @@ std::unique_ptr reconstruct_offsets(column_view const& labels, * @param mr Device memory resource used to allocate the returned object * @return The output offsets column with values start from 0 */ -std::unique_ptr get_normalized_offsets( - lists_column_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr get_normalized_offsets(lists_column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace cudf::lists::detail diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index d9c573e8155..83ee6793efb 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -170,8 +170,8 @@ index_vector generate_merged_indices(table_view const& left_table, table_view const& right_table, std::vector const& column_order, std::vector const& null_precedence, - bool nullable = true, - rmm::cuda_stream_view stream = cudf::get_default_stream()) + bool nullable, + rmm::cuda_stream_view stream) { const size_type left_size = left_table.num_rows(); const size_type right_size = right_table.num_rows(); @@ -187,10 +187,12 @@ index_vector generate_merged_indices(table_view const& left_table, auto lhs_device_view = table_device_view::create(left_table, stream); auto rhs_device_view = table_device_view::create(right_table, stream); - auto d_column_order = cudf::detail::make_device_uvector_async(column_order, stream); + auto d_column_order = cudf::detail::make_device_uvector_async( + column_order, stream, rmm::mr::get_current_device_resource()); if (nullable) { - auto d_null_precedence = cudf::detail::make_device_uvector_async(null_precedence, stream); + auto d_null_precedence = cudf::detail::make_device_uvector_async( + null_precedence, stream, rmm::mr::get_current_device_resource()); auto ineq_op = detail::row_lexicographic_tagged_comparator( *lhs_device_view, *rhs_device_view, d_column_order.data(), d_null_precedence.data()); @@ -241,7 +243,7 @@ struct column_merger { column_view const& lcol, column_view const& rcol, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const + rmm::mr::device_memory_resource* mr) const { auto lsz = lcol.size(); auto merged_size = lsz + rcol.size(); @@ -410,7 +412,7 @@ table_ptr_type merge(cudf::table_view const& left_table, // extract merged row order according to indices: // auto const merged_indices = generate_merged_indices( - index_left_view, index_right_view, column_order, null_precedence, nullable); + index_left_view, index_right_view, column_order, null_precedence, nullable, stream); // create merged table: // diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index 54dffc85aca..13f46195392 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -493,11 +493,11 @@ std::pair, std::vector> hash_partition_table( rmm::device_uvector(grid_size * num_partitions, stream); // Holds the total number of rows in each partition - auto global_partition_sizes = - cudf::detail::make_zeroed_device_uvector_async(num_partitions, stream); + auto global_partition_sizes = cudf::detail::make_zeroed_device_uvector_async( + num_partitions, stream, rmm::mr::get_current_device_resource()); - auto row_partition_offset = - cudf::detail::make_zeroed_device_uvector_async(num_rows, stream); + auto row_partition_offset = cudf::detail::make_zeroed_device_uvector_async( + num_rows, stream, rmm::mr::get_current_device_resource()); auto const row_hasher = experimental::row::hash::row_hasher(table_to_hash, stream); auto const hasher = diff --git a/cpp/src/partitioning/round_robin.cu b/cpp/src/partitioning/round_robin.cu index 990992cd8f2..00f64b36e2d 100644 --- a/cpp/src/partitioning/round_robin.cu +++ b/cpp/src/partitioning/round_robin.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -151,9 +151,9 @@ namespace detail { std::pair, std::vector> round_robin_partition( table_view const& input, cudf::size_type num_partitions, - cudf::size_type start_partition = 0, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + cudf::size_type start_partition, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto nrows = input.num_rows(); diff --git a/cpp/src/quantiles/quantile.cu b/cpp/src/quantiles/quantile.cu index 785aa839956..4a9c2e3a902 100644 --- a/cpp/src/quantiles/quantile.cu +++ b/cpp/src/quantiles/quantile.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -85,7 +85,8 @@ struct quantile_functor { auto d_input = column_device_view::create(input, stream); auto d_output = mutable_column_device_view::create(output->mutable_view(), stream); - auto q_device = cudf::detail::make_device_uvector_sync(q, stream); + auto q_device = + cudf::detail::make_device_uvector_sync(q, stream, rmm::mr::get_current_device_resource()); if (!cudf::is_dictionary(input.type())) { auto sorted_data = diff --git a/cpp/src/quantiles/quantiles.cu b/cpp/src/quantiles/quantiles.cu index e71508bab09..c6760e77403 100644 --- a/cpp/src/quantiles/quantiles.cu +++ b/cpp/src/quantiles/quantiles.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,7 +49,8 @@ std::unique_ptr
quantiles(table_view const& input, return detail::select_quantile(selector, size, q, interp); }; - auto const q_device = cudf::detail::make_device_uvector_async(q, stream); + auto const q_device = + cudf::detail::make_device_uvector_async(q, stream, rmm::mr::get_current_device_resource()); auto quantile_idx_iter = thrust::make_transform_iterator(q_device.begin(), quantile_idx_lookup); diff --git a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu index e231d515e86..094e554c3d2 100644 --- a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu +++ b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu @@ -1120,7 +1120,8 @@ std::unique_ptr merge_tdigests(tdigest_column_view const& tdv, tdigests.end(), std::back_inserter(tdigest_views), [](std::unique_ptr
const& t) { return t->view(); }); - auto merged = cudf::detail::concatenate(tdigest_views, stream); + auto merged = + cudf::detail::concatenate(tdigest_views, stream, rmm::mr::get_current_device_resource()); // generate cumulative weights auto merged_weights = merged->get_column(1).view(); diff --git a/cpp/src/reductions/all.cu b/cpp/src/reductions/all.cu index 185e14b6e2f..9d32bc4c7f6 100644 --- a/cpp/src/reductions/all.cu +++ b/cpp/src/reductions/all.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,10 +14,11 @@ * limitations under the License. */ -#include +#include "simple.cuh" + #include #include -#include +#include #include #include @@ -55,8 +56,7 @@ struct all_fn { { auto const d_dict = cudf::column_device_view::create(input, stream); auto const iter = [&] { - auto null_iter = - cudf::reduction::op::min{}.template get_null_replacing_element_transformer(); + auto null_iter = op::min{}.template get_null_replacing_element_transformer(); auto pair_iter = cudf::dictionary::detail::make_dictionary_pair_iterator(*d_dict, input.has_nulls()); return thrust::make_transform_iterator(pair_iter, null_iter); @@ -78,7 +78,6 @@ struct all_fn { }; } // namespace -} // namespace detail std::unique_ptr all(column_view const& col, cudf::data_type const output_dtype, @@ -93,15 +92,11 @@ std::unique_ptr all(column_view const& col, return cudf::type_dispatcher( dictionary_column_view(col).keys().type(), detail::all_fn{}, col, stream, mr); } + using reducer = simple::detail::bool_result_element_dispatcher; // dispatch for non-dictionary types - return cudf::type_dispatcher( - col.type(), - simple::detail::bool_result_element_dispatcher{}, - col, - init, - stream, - mr); + return cudf::type_dispatcher(col.type(), reducer{}, col, init, stream, mr); } +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/any.cu b/cpp/src/reductions/any.cu index 871672e5c03..07977d2417f 100644 --- a/cpp/src/reductions/any.cu +++ b/cpp/src/reductions/any.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,10 +14,11 @@ * limitations under the License. */ -#include +#include "simple.cuh" + #include #include -#include +#include #include #include @@ -55,8 +56,7 @@ struct any_fn { { auto const d_dict = cudf::column_device_view::create(input, stream); auto const iter = [&] { - auto null_iter = - cudf::reduction::op::max{}.template get_null_replacing_element_transformer(); + auto null_iter = op::max{}.template get_null_replacing_element_transformer(); auto pair_iter = cudf::dictionary::detail::make_dictionary_pair_iterator(*d_dict, input.has_nulls()); return thrust::make_transform_iterator(pair_iter, null_iter); @@ -78,7 +78,6 @@ struct any_fn { }; } // namespace -} // namespace detail std::unique_ptr any(column_view const& col, cudf::data_type const output_dtype, @@ -93,15 +92,11 @@ std::unique_ptr any(column_view const& col, return cudf::type_dispatcher( dictionary_column_view(col).keys().type(), detail::any_fn{}, col, stream, mr); } + using reducer = simple::detail::bool_result_element_dispatcher; // dispatch for non-dictionary types - return cudf::type_dispatcher( - col.type(), - simple::detail::bool_result_element_dispatcher{}, - col, - init, - stream, - mr); + return cudf::type_dispatcher(col.type(), reducer{}, col, init, stream, mr); } +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/collect_ops.cu b/cpp/src/reductions/collect_ops.cu index 4d6a32b528a..743eddbffaf 100644 --- a/cpp/src/reductions/collect_ops.cu +++ b/cpp/src/reductions/collect_ops.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,15 +17,15 @@ #include #include #include -#include #include #include +#include #include #include namespace cudf { namespace reduction { - +namespace detail { namespace { /** @@ -49,8 +49,8 @@ std::unique_ptr collect_list(column_view const& col, { if (need_handle_nulls(col, null_handling)) { auto d_view = column_device_view::create(col, stream); - auto filter = detail::validity_accessor(*d_view); - auto null_purged_table = detail::copy_if(table_view{{col}}, filter, stream, mr); + auto filter = cudf::detail::validity_accessor(*d_view); + auto null_purged_table = cudf::detail::copy_if(table_view{{col}}, filter, stream, mr); column* null_purged_col = null_purged_table->release().front().release(); null_purged_col->set_null_mask(rmm::device_buffer{0, stream, mr}, 0); return std::make_unique(std::move(*null_purged_col), true, stream, mr); @@ -86,13 +86,13 @@ std::unique_ptr collect_set(column_view const& col, return std::pair(col, std::unique_ptr(nullptr)); }(); - auto distinct_table = detail::distinct(table_view{{input_as_collect_list}}, - std::vector{0}, - duplicate_keep_option::KEEP_ANY, - nulls_equal, - nans_equal, - stream, - mr); + auto distinct_table = cudf::detail::distinct(table_view{{input_as_collect_list}}, + std::vector{0}, + duplicate_keep_option::KEEP_ANY, + nulls_equal, + nans_equal, + stream, + mr); return std::make_unique(std::move(distinct_table->get_column(0)), true, stream, mr); } @@ -104,15 +104,15 @@ std::unique_ptr merge_sets(lists_column_view const& col, rmm::mr::device_memory_resource* mr) { auto flatten_col = col.get_sliced_child(stream); - auto distinct_table = detail::distinct(table_view{{flatten_col}}, - std::vector{0}, - duplicate_keep_option::KEEP_ANY, - nulls_equal, - nans_equal, - stream, - mr); + auto distinct_table = cudf::detail::distinct(table_view{{flatten_col}}, + std::vector{0}, + duplicate_keep_option::KEEP_ANY, + nulls_equal, + nans_equal, + stream, + mr); return std::make_unique(std::move(distinct_table->get_column(0)), true, stream, mr); } - +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/compound.cuh b/cpp/src/reductions/compound.cuh index 9458ae2d581..3428130d912 100644 --- a/cpp/src/reductions/compound.cuh +++ b/cpp/src/reductions/compound.cuh @@ -16,8 +16,8 @@ #pragma once -#include #include +#include #include #include #include diff --git a/cpp/src/reductions/max.cu b/cpp/src/reductions/max.cu index b57896e5fc0..1cf2b6f53b6 100644 --- a/cpp/src/reductions/max.cu +++ b/cpp/src/reductions/max.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,14 +14,16 @@ * limitations under the License. */ -#include +#include "simple.cuh" + #include -#include +#include #include namespace cudf { namespace reduction { +namespace detail { std::unique_ptr max(column_view const& col, cudf::data_type const output_dtype, @@ -35,14 +37,11 @@ std::unique_ptr max(column_view const& col, auto const dispatch_type = cudf::is_dictionary(col.type()) ? cudf::dictionary_column_view(col).indices().type() : col.type(); - return cudf::type_dispatcher( - dispatch_type, - simple::detail::same_element_type_dispatcher{}, - col, - init, - stream, - mr); + + using reducer = simple::detail::same_element_type_dispatcher; + return cudf::type_dispatcher(dispatch_type, reducer{}, col, init, stream, mr); } +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/mean.cu b/cpp/src/reductions/mean.cu index e4b5f754b9b..e64660932ce 100644 --- a/cpp/src/reductions/mean.cu +++ b/cpp/src/reductions/mean.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,26 +14,30 @@ * limitations under the License. */ -#include +#include "compound.cuh" + #include -#include +#include #include namespace cudf { namespace reduction { +namespace detail { std::unique_ptr mean(column_view const& col, cudf::data_type const output_dtype, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - using reducer = compound::detail::element_type_dispatcher; auto col_type = cudf::is_dictionary(col.type()) ? dictionary_column_view(col).keys().type() : col.type(); + + using reducer = compound::detail::element_type_dispatcher; return cudf::type_dispatcher( col_type, reducer(), col, output_dtype, /* ddof is not used for mean*/ 1, stream, mr); } +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/min.cu b/cpp/src/reductions/min.cu index ed16cec5ffd..792965e8b99 100644 --- a/cpp/src/reductions/min.cu +++ b/cpp/src/reductions/min.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,13 +14,14 @@ * limitations under the License. */ -#include +#include "simple.cuh" + #include -#include +#include namespace cudf { namespace reduction { - +namespace detail { std::unique_ptr min(column_view const& col, data_type const output_dtype, std::optional> init, @@ -33,14 +34,10 @@ std::unique_ptr min(column_view const& col, auto const dispatch_type = cudf::is_dictionary(col.type()) ? cudf::dictionary_column_view(col).indices().type() : col.type(); - return cudf::type_dispatcher( - dispatch_type, - simple::detail::same_element_type_dispatcher{}, - col, - init, - stream, - mr); -} + using reducer = simple::detail::same_element_type_dispatcher; + return cudf::type_dispatcher(dispatch_type, reducer{}, col, init, stream, mr); +} +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/nth_element.cu b/cpp/src/reductions/nth_element.cu index 78c469ee767..ef58ec3f42e 100644 --- a/cpp/src/reductions/nth_element.cu +++ b/cpp/src/reductions/nth_element.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include @@ -28,11 +28,13 @@ #include #include -std::unique_ptr cudf::reduction::nth_element(column_view const& col, - size_type n, - null_policy null_handling, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +namespace cudf::reduction::detail { + +std::unique_ptr nth_element(column_view const& col, + size_type n, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(n >= -col.size() and n < col.size(), "Index out of bounds"); auto wrap_n = [n](size_type size) { return (n < 0 ? size + n : n); }; @@ -60,3 +62,5 @@ std::unique_ptr cudf::reduction::nth_element(column_view const& co return cudf::detail::get_element(col, n, stream, mr); } } + +} // namespace cudf::reduction::detail diff --git a/cpp/src/reductions/product.cu b/cpp/src/reductions/product.cu index 39e031f69d1..2e483813939 100644 --- a/cpp/src/reductions/product.cu +++ b/cpp/src/reductions/product.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,14 +14,16 @@ * limitations under the License. */ -#include +#include "simple.cuh" + #include -#include +#include #include namespace cudf { namespace reduction { +namespace detail { std::unique_ptr product(column_view const& col, cudf::data_type const output_dtype, @@ -31,13 +33,13 @@ std::unique_ptr product(column_view const& col, { return cudf::type_dispatcher( cudf::is_dictionary(col.type()) ? dictionary_column_view(col).keys().type() : col.type(), - simple::detail::element_type_dispatcher{}, + simple::detail::element_type_dispatcher{}, col, output_dtype, init, stream, mr); } - +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index d28cdee1de2..2fef8aa8785 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,11 +19,11 @@ #include #include #include -#include #include #include #include #include +#include #include #include #include @@ -31,6 +31,7 @@ #include namespace cudf { +namespace reduction { namespace detail { struct reduce_dispatch_functor { column_view const col; @@ -52,89 +53,89 @@ struct reduce_dispatch_functor { std::unique_ptr operator()(reduce_aggregation const& agg) { switch (k) { - case aggregation::SUM: return reduction::sum(col, output_dtype, init, stream, mr); - case aggregation::PRODUCT: return reduction::product(col, output_dtype, init, stream, mr); - case aggregation::MIN: return reduction::min(col, output_dtype, init, stream, mr); - case aggregation::MAX: return reduction::max(col, output_dtype, init, stream, mr); - case aggregation::ANY: return reduction::any(col, output_dtype, init, stream, mr); - case aggregation::ALL: return reduction::all(col, output_dtype, init, stream, mr); - case aggregation::SUM_OF_SQUARES: - return reduction::sum_of_squares(col, output_dtype, stream, mr); - case aggregation::MEAN: return reduction::mean(col, output_dtype, stream, mr); + case aggregation::SUM: return sum(col, output_dtype, init, stream, mr); + case aggregation::PRODUCT: return product(col, output_dtype, init, stream, mr); + case aggregation::MIN: return min(col, output_dtype, init, stream, mr); + case aggregation::MAX: return max(col, output_dtype, init, stream, mr); + case aggregation::ANY: return any(col, output_dtype, init, stream, mr); + case aggregation::ALL: return all(col, output_dtype, init, stream, mr); + case aggregation::SUM_OF_SQUARES: return sum_of_squares(col, output_dtype, stream, mr); + case aggregation::MEAN: return mean(col, output_dtype, stream, mr); case aggregation::VARIANCE: { - auto var_agg = static_cast(agg); - return reduction::variance(col, output_dtype, var_agg._ddof, stream, mr); + auto var_agg = static_cast(agg); + return variance(col, output_dtype, var_agg._ddof, stream, mr); } case aggregation::STD: { - auto var_agg = static_cast(agg); - return reduction::standard_deviation(col, output_dtype, var_agg._ddof, stream, mr); + auto var_agg = static_cast(agg); + return standard_deviation(col, output_dtype, var_agg._ddof, stream, mr); } case aggregation::MEDIAN: { - auto current_mr = rmm::mr::get_current_device_resource(); - auto sorted_indices = - sorted_order(table_view{{col}}, {}, {null_order::AFTER}, stream, current_mr); + auto current_mr = rmm::mr::get_current_device_resource(); + auto sorted_indices = cudf::detail::sorted_order( + table_view{{col}}, {}, {null_order::AFTER}, stream, current_mr); auto valid_sorted_indices = - split(*sorted_indices, {col.size() - col.null_count()}, stream)[0]; - auto col_ptr = quantile( + cudf::detail::split(*sorted_indices, {col.size() - col.null_count()}, stream)[0]; + auto col_ptr = cudf::detail::quantile( col, {0.5}, interpolation::LINEAR, valid_sorted_indices, true, stream, current_mr); - return get_element(*col_ptr, 0, stream, mr); + return cudf::detail::get_element(*col_ptr, 0, stream, mr); } case aggregation::QUANTILE: { - auto quantile_agg = static_cast(agg); + auto quantile_agg = static_cast(agg); CUDF_EXPECTS(quantile_agg._quantiles.size() == 1, "Reduction quantile accepts only one quantile value"); - auto current_mr = rmm::mr::get_current_device_resource(); - auto sorted_indices = - sorted_order(table_view{{col}}, {}, {null_order::AFTER}, stream, current_mr); + auto current_mr = rmm::mr::get_current_device_resource(); + auto sorted_indices = cudf::detail::sorted_order( + table_view{{col}}, {}, {null_order::AFTER}, stream, current_mr); auto valid_sorted_indices = - split(*sorted_indices, {col.size() - col.null_count()}, stream)[0]; + cudf::detail::split(*sorted_indices, {col.size() - col.null_count()}, stream)[0]; - auto col_ptr = quantile(col, - quantile_agg._quantiles, - quantile_agg._interpolation, - valid_sorted_indices, - true, - stream, - current_mr); - return get_element(*col_ptr, 0, stream, mr); + auto col_ptr = cudf::detail::quantile(col, + quantile_agg._quantiles, + quantile_agg._interpolation, + valid_sorted_indices, + true, + stream, + current_mr); + return cudf::detail::get_element(*col_ptr, 0, stream, mr); } case aggregation::NUNIQUE: { - auto nunique_agg = static_cast(agg); - return make_fixed_width_scalar( - detail::distinct_count(col, nunique_agg._null_handling, nan_policy::NAN_IS_VALID, stream), + auto nunique_agg = static_cast(agg); + return cudf::make_fixed_width_scalar( + cudf::detail::distinct_count( + col, nunique_agg._null_handling, nan_policy::NAN_IS_VALID, stream), stream, mr); } case aggregation::NTH_ELEMENT: { - auto nth_agg = static_cast(agg); - return reduction::nth_element(col, nth_agg._n, nth_agg._null_handling, stream, mr); + auto nth_agg = static_cast(agg); + return nth_element(col, nth_agg._n, nth_agg._null_handling, stream, mr); } case aggregation::COLLECT_LIST: { - auto col_agg = static_cast(agg); - return reduction::collect_list(col, col_agg._null_handling, stream, mr); + auto col_agg = static_cast(agg); + return collect_list(col, col_agg._null_handling, stream, mr); } case aggregation::COLLECT_SET: { - auto col_agg = static_cast(agg); - return reduction::collect_set( + auto col_agg = static_cast(agg); + return collect_set( col, col_agg._null_handling, col_agg._nulls_equal, col_agg._nans_equal, stream, mr); } case aggregation::MERGE_LISTS: { - return reduction::merge_lists(col, stream, mr); + return merge_lists(col, stream, mr); } case aggregation::MERGE_SETS: { - auto col_agg = static_cast(agg); - return reduction::merge_sets(col, col_agg._nulls_equal, col_agg._nans_equal, stream, mr); + auto col_agg = static_cast(agg); + return merge_sets(col, col_agg._nulls_equal, col_agg._nans_equal, stream, mr); } case aggregation::TDIGEST: { CUDF_EXPECTS(output_dtype.id() == type_id::STRUCT, "Tdigest aggregations expect output type to be STRUCT"); - auto td_agg = static_cast(agg); + auto td_agg = static_cast(agg); return tdigest::detail::reduce_tdigest(col, td_agg.max_centroids, stream, mr); } case aggregation::MERGE_TDIGEST: { CUDF_EXPECTS(output_dtype.id() == type_id::STRUCT, "Tdigest aggregations expect output type to be STRUCT"); - auto td_agg = static_cast(agg); + auto td_agg = static_cast(agg); return tdigest::detail::reduce_merge_tdigest(col, td_agg.max_centroids, stream, mr); } default: CUDF_FAIL("Unsupported reduction operator"); @@ -142,13 +143,12 @@ struct reduce_dispatch_functor { } }; -std::unique_ptr reduce( - column_view const& col, - reduce_aggregation const& agg, - data_type output_dtype, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr reduce(column_view const& col, + reduce_aggregation const& agg, + data_type output_dtype, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(!init.has_value() || col.type() == init.value().get().type(), "column and initial value must be the same type"); @@ -162,7 +162,7 @@ std::unique_ptr reduce( // Returns default scalar if input column is empty or all null if (col.size() <= col.null_count()) { if (agg.kind == aggregation::TDIGEST || agg.kind == aggregation::MERGE_TDIGEST) { - return tdigest::detail::make_empty_tdigest_scalar(stream); + return tdigest::detail::make_empty_tdigest_scalar(stream, mr); } if (output_dtype.id() == type_id::LIST) { @@ -184,10 +184,11 @@ std::unique_ptr reduce( return result; } - return aggregation_dispatcher( + return cudf::detail::aggregation_dispatcher( agg.kind, reduce_dispatch_functor{col, output_dtype, init, stream, mr}, agg); } } // namespace detail +} // namespace reduction std::unique_ptr reduce(column_view const& col, reduce_aggregation const& agg, @@ -195,7 +196,8 @@ std::unique_ptr reduce(column_view const& col, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::reduce(col, agg, output_dtype, std::nullopt, cudf::get_default_stream(), mr); + return reduction::detail::reduce( + col, agg, output_dtype, std::nullopt, cudf::get_default_stream(), mr); } std::unique_ptr reduce(column_view const& col, @@ -205,6 +207,6 @@ std::unique_ptr reduce(column_view const& col, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::reduce(col, agg, output_dtype, init, cudf::get_default_stream(), mr); + return reduction::detail::reduce(col, agg, output_dtype, init, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index 04a96666962..f453e7757a7 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -246,12 +246,11 @@ struct scan_dispatcher { } // namespace -std::unique_ptr scan_inclusive( - column_view const& input, - scan_aggregation const& agg, - null_policy null_handling, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr scan_inclusive(column_view const& input, + scan_aggregation const& agg, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto output = scan_agg_dispatch(input, agg, null_handling, stream, mr); diff --git a/cpp/src/reductions/segmented/all.cu b/cpp/src/reductions/segmented/all.cu index f75fcd8066c..b81a088155c 100644 --- a/cpp/src/reductions/segmented/all.cu +++ b/cpp/src/reductions/segmented/all.cu @@ -16,10 +16,11 @@ #include "simple.cuh" -#include +#include namespace cudf { namespace reduction { +namespace detail { std::unique_ptr segmented_all( column_view const& col, @@ -33,17 +34,12 @@ std::unique_ptr segmented_all( CUDF_EXPECTS(output_dtype == cudf::data_type(cudf::type_id::BOOL8), "segmented_all() operation requires output type `BOOL8`"); + using reducer = simple::detail::bool_result_column_dispatcher; // A minimum over bool types is used to implement all() return cudf::type_dispatcher( - col.type(), - simple::detail::bool_result_column_dispatcher{}, - col, - offsets, - null_handling, - init, - stream, - mr); + col.type(), reducer{}, col, offsets, null_handling, init, stream, mr); } +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/segmented/any.cu b/cpp/src/reductions/segmented/any.cu index 6a4fc70d438..9210fbd3c7c 100644 --- a/cpp/src/reductions/segmented/any.cu +++ b/cpp/src/reductions/segmented/any.cu @@ -16,10 +16,11 @@ #include "simple.cuh" -#include +#include namespace cudf { namespace reduction { +namespace detail { std::unique_ptr segmented_any( column_view const& col, @@ -33,17 +34,12 @@ std::unique_ptr segmented_any( CUDF_EXPECTS(output_dtype == cudf::data_type(cudf::type_id::BOOL8), "segmented_any() operation requires output type `BOOL8`"); + using reducer = simple::detail::bool_result_column_dispatcher; // A maximum over bool types is used to implement any() return cudf::type_dispatcher( - col.type(), - simple::detail::bool_result_column_dispatcher{}, - col, - offsets, - null_handling, - init, - stream, - mr); + col.type(), reducer{}, col, offsets, null_handling, init, stream, mr); } +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/segmented/compound.cuh b/cpp/src/reductions/segmented/compound.cuh index e8abd32cf61..395ad4c1dc9 100644 --- a/cpp/src/reductions/segmented/compound.cuh +++ b/cpp/src/reductions/segmented/compound.cuh @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/src/reductions/segmented/max.cu b/cpp/src/reductions/segmented/max.cu index d72b65301c1..c07c8fb2269 100644 --- a/cpp/src/reductions/segmented/max.cu +++ b/cpp/src/reductions/segmented/max.cu @@ -16,10 +16,11 @@ #include "simple.cuh" -#include +#include namespace cudf { namespace reduction { +namespace detail { std::unique_ptr segmented_max( column_view const& col, @@ -32,16 +33,10 @@ std::unique_ptr segmented_max( { CUDF_EXPECTS(col.type() == output_dtype, "segmented_max() operation requires matching output type"); + using reducer = simple::detail::same_column_type_dispatcher; return cudf::type_dispatcher( - col.type(), - simple::detail::same_column_type_dispatcher{}, - col, - offsets, - null_handling, - init, - stream, - mr); + col.type(), reducer{}, col, offsets, null_handling, init, stream, mr); } - +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/segmented/mean.cu b/cpp/src/reductions/segmented/mean.cu index b7a5bfa43d6..99f1533a154 100644 --- a/cpp/src/reductions/segmented/mean.cu +++ b/cpp/src/reductions/segmented/mean.cu @@ -16,12 +16,13 @@ #include "compound.cuh" -#include +#include #include namespace cudf { namespace reduction { +namespace detail { std::unique_ptr segmented_mean(column_view const& col, device_span offsets, @@ -30,11 +31,12 @@ std::unique_ptr segmented_mean(column_view const& col, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - using reducer = compound::detail::compound_segmented_dispatcher; + using reducer = compound::detail::compound_segmented_dispatcher; constexpr size_type ddof = 1; // ddof for mean calculation return cudf::type_dispatcher( col.type(), reducer{}, col, offsets, output_dtype, null_handling, ddof, stream, mr); } +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/segmented/min.cu b/cpp/src/reductions/segmented/min.cu index b7fbedf2690..f1597f90267 100644 --- a/cpp/src/reductions/segmented/min.cu +++ b/cpp/src/reductions/segmented/min.cu @@ -16,10 +16,11 @@ #include "simple.cuh" -#include +#include namespace cudf { namespace reduction { +namespace detail { std::unique_ptr segmented_min( column_view const& col, @@ -32,16 +33,10 @@ std::unique_ptr segmented_min( { CUDF_EXPECTS(col.type() == output_dtype, "segmented_min() operation requires matching output type"); + using reducer = simple::detail::same_column_type_dispatcher; return cudf::type_dispatcher( - col.type(), - simple::detail::same_column_type_dispatcher{}, - col, - offsets, - null_handling, - init, - stream, - mr); + col.type(), reducer{}, col, offsets, null_handling, init, stream, mr); } - +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/segmented/nunique.cu b/cpp/src/reductions/segmented/nunique.cu new file mode 100644 index 00000000000..bd1efb41df8 --- /dev/null +++ b/cpp/src/reductions/segmented/nunique.cu @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "update_validity.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace cudf { +namespace reduction { +namespace detail { +namespace { +template +struct is_unique_fn { + column_device_view const d_col; + ComparatorType row_equal; + null_policy null_handling; + size_type const* offsets; + size_type const* labels; + + __device__ size_type operator()(size_type idx) const + { + if (null_handling == null_policy::EXCLUDE && d_col.is_null(idx)) { return 0; } + return static_cast(offsets[labels[idx]] == idx || (!row_equal(idx, idx - 1))); + } +}; +} // namespace + +std::unique_ptr segmented_nunique(column_view const& col, + device_span offsets, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + // only support non-nested types + CUDF_EXPECTS(!cudf::is_nested(col.type()), + "segmented reduce nunique only supports non-nested column types"); + + // compute the unique identifiers within each segment + auto const identifiers = [&] { + auto const d_col = column_device_view::create(col, stream); + auto const comparator = + cudf::experimental::row::equality::self_comparator{table_view({col}), stream}; + auto const row_equal = + comparator.equal_to(cudf::nullate::DYNAMIC{col.has_nulls()}, null_equality::EQUAL); + + auto labels = rmm::device_uvector(col.size(), stream); + cudf::detail::label_segments( + offsets.begin(), offsets.end(), labels.begin(), labels.end(), stream); + auto fn = is_unique_fn{ + *d_col, row_equal, null_handling, offsets.data(), labels.data()}; + + auto identifiers = rmm::device_uvector(col.size(), stream); + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(col.size()), + identifiers.begin(), + fn); + return identifiers; + }(); + + auto result = cudf::make_numeric_column(data_type(type_to_id()), + static_cast(offsets.size() - 1), + cudf::mask_state::UNALLOCATED, + stream, + mr); + + // Sum the unique identifiers within each segment + auto add_op = op::sum{}; + cudf::reduction::detail::segmented_reduce(identifiers.begin(), + offsets.begin(), + offsets.end(), + result->mutable_view().data(), + add_op.get_binary_op(), + 0, + stream); + + // Compute the output null mask + // - only empty segments are tagged as null + // - nulls are counted appropriately above per null_handling policy + auto const bitmask_col = null_handling == null_policy::EXCLUDE ? col : result->view(); + cudf::reduction::detail::segmented_update_validity( + *result, bitmask_col, offsets, null_policy::EXCLUDE, std::nullopt, stream, mr); + + return result; +} +} // namespace detail +} // namespace reduction +} // namespace cudf diff --git a/cpp/src/reductions/segmented/product.cu b/cpp/src/reductions/segmented/product.cu index d5442126660..ea9c6f484c0 100644 --- a/cpp/src/reductions/segmented/product.cu +++ b/cpp/src/reductions/segmented/product.cu @@ -16,11 +16,11 @@ #include "simple.cuh" -#include +#include namespace cudf { namespace reduction { - +namespace detail { std::unique_ptr segmented_product( column_view const& col, device_span offsets, @@ -30,17 +30,10 @@ std::unique_ptr segmented_product( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + using reducer = simple::detail::column_type_dispatcher; return cudf::type_dispatcher( - col.type(), - simple::detail::column_type_dispatcher{}, - col, - offsets, - output_dtype, - null_handling, - init, - stream, - mr); + col.type(), reducer{}, col, offsets, output_dtype, null_handling, init, stream, mr); } - +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/segmented/reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp index 1de55b371b3..cee82560794 100644 --- a/cpp/src/reductions/segmented/reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -26,6 +26,7 @@ #include namespace cudf { +namespace reduction { namespace detail { struct segmented_reduce_dispatch_functor { column_view const& col; @@ -69,41 +70,34 @@ struct segmented_reduce_dispatch_functor { { switch (k) { case segmented_reduce_aggregation::SUM: - return reduction::segmented_sum( - col, offsets, output_dtype, null_handling, init, stream, mr); + return segmented_sum(col, offsets, output_dtype, null_handling, init, stream, mr); case segmented_reduce_aggregation::PRODUCT: - return reduction::segmented_product( - col, offsets, output_dtype, null_handling, init, stream, mr); + return segmented_product(col, offsets, output_dtype, null_handling, init, stream, mr); case segmented_reduce_aggregation::MIN: - return reduction::segmented_min( - col, offsets, output_dtype, null_handling, init, stream, mr); + return segmented_min(col, offsets, output_dtype, null_handling, init, stream, mr); case segmented_reduce_aggregation::MAX: - return reduction::segmented_max( - col, offsets, output_dtype, null_handling, init, stream, mr); + return segmented_max(col, offsets, output_dtype, null_handling, init, stream, mr); case segmented_reduce_aggregation::ANY: - return reduction::segmented_any( - col, offsets, output_dtype, null_handling, init, stream, mr); + return segmented_any(col, offsets, output_dtype, null_handling, init, stream, mr); case segmented_reduce_aggregation::ALL: - return reduction::segmented_all( - col, offsets, output_dtype, null_handling, init, stream, mr); + return segmented_all(col, offsets, output_dtype, null_handling, init, stream, mr); case segmented_reduce_aggregation::SUM_OF_SQUARES: - return reduction::segmented_sum_of_squares( - col, offsets, output_dtype, null_handling, stream, mr); + return segmented_sum_of_squares(col, offsets, output_dtype, null_handling, stream, mr); case segmented_reduce_aggregation::MEAN: - return reduction::segmented_mean(col, offsets, output_dtype, null_handling, stream, mr); - case aggregation::VARIANCE: { - auto var_agg = static_cast(agg); - return reduction::segmented_variance( + return segmented_mean(col, offsets, output_dtype, null_handling, stream, mr); + case segmented_reduce_aggregation::VARIANCE: { + auto var_agg = static_cast(agg); + return segmented_variance( col, offsets, output_dtype, null_handling, var_agg._ddof, stream, mr); } - case aggregation::STD: { - auto var_agg = static_cast(agg); - return reduction::segmented_standard_deviation( + case segmented_reduce_aggregation::STD: { + auto var_agg = static_cast(agg); + return segmented_standard_deviation( col, offsets, output_dtype, null_handling, var_agg._ddof, stream, mr); } - default: - CUDF_FAIL("Unsupported aggregation type."); - // TODO: Add support for compound_ops. GH #10432 + case segmented_reduce_aggregation::NUNIQUE: + return segmented_nunique(col, offsets, null_handling, stream, mr); + default: CUDF_FAIL("Unsupported aggregation type."); } } }; @@ -127,13 +121,14 @@ std::unique_ptr segmented_reduce(column_view const& segmented_values, } CUDF_EXPECTS(offsets.size() > 0, "`offsets` should have at least 1 element."); - return aggregation_dispatcher( + return cudf::detail::aggregation_dispatcher( agg.kind, segmented_reduce_dispatch_functor{ segmented_values, offsets, output_dtype, null_handling, init, stream, mr}, agg); } } // namespace detail +} // namespace reduction std::unique_ptr segmented_reduce(column_view const& segmented_values, device_span offsets, @@ -143,14 +138,14 @@ std::unique_ptr segmented_reduce(column_view const& segmented_values, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::segmented_reduce(segmented_values, - offsets, - agg, - output_dtype, - null_handling, - std::nullopt, - cudf::get_default_stream(), - mr); + return reduction::detail::segmented_reduce(segmented_values, + offsets, + agg, + output_dtype, + null_handling, + std::nullopt, + cudf::get_default_stream(), + mr); } std::unique_ptr segmented_reduce(column_view const& segmented_values, @@ -162,14 +157,14 @@ std::unique_ptr segmented_reduce(column_view const& segmented_values, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::segmented_reduce(segmented_values, - offsets, - agg, - output_dtype, - null_handling, - init, - cudf::get_default_stream(), - mr); + return reduction::detail::segmented_reduce(segmented_values, + offsets, + agg, + output_dtype, + null_handling, + init, + cudf::get_default_stream(), + mr); } } // namespace cudf diff --git a/cpp/src/reductions/segmented/simple.cuh b/cpp/src/reductions/segmented/simple.cuh index 0c22848fd89..32138f0835b 100644 --- a/cpp/src/reductions/segmented/simple.cuh +++ b/cpp/src/reductions/segmented/simple.cuh @@ -22,11 +22,11 @@ #include #include #include -#include #include #include #include #include +#include #include #include #include @@ -133,8 +133,8 @@ std::unique_ptr simple_segmented_reduction( template || - std::is_same_v)> + CUDF_ENABLE_IF(std::is_same_v || + std::is_same_v)> std::unique_ptr string_segmented_reduction(column_view const& col, device_span offsets, null_policy null_handling, @@ -147,7 +147,7 @@ std::unique_ptr string_segmented_reduction(column_view const& col, auto it = thrust::make_counting_iterator(0); auto const num_segments = static_cast(offsets.size()) - 1; - bool constexpr is_argmin = std::is_same_v; + bool constexpr is_argmin = std::is_same_v; auto string_comparator = cudf::detail::element_argminmax_fn{*device_col, col.has_nulls(), is_argmin}; auto constexpr identity = @@ -178,8 +178,8 @@ std::unique_ptr string_segmented_reduction(column_view const& col, template () && - !std::is_same_v())> + CUDF_ENABLE_IF(!std::is_same_v() && + !std::is_same_v())> std::unique_ptr string_segmented_reduction(column_view const& col, device_span offsets, null_policy null_handling, @@ -215,7 +215,7 @@ std::unique_ptr fixed_point_segmented_reduction( auto result = simple_segmented_reduction(col, offsets, null_handling, init, stream, mr); auto const scale = [&] { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { // The product aggregation requires updating the scale of the fixed-point output column. // The output scale needs to be the maximum count of all segments multiplied by // the input scale value. @@ -245,7 +245,7 @@ std::unique_ptr fixed_point_segmented_reduction( return new_scale; } - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { return numeric::scale_type{col.type().scale() * 2}; } diff --git a/cpp/src/reductions/segmented/std.cu b/cpp/src/reductions/segmented/std.cu index 6af5a9cf9b6..5f5ced63b8f 100644 --- a/cpp/src/reductions/segmented/std.cu +++ b/cpp/src/reductions/segmented/std.cu @@ -16,12 +16,13 @@ #include "compound.cuh" -#include +#include #include namespace cudf { namespace reduction { +namespace detail { std::unique_ptr segmented_standard_deviation(column_view const& col, device_span offsets, @@ -31,11 +32,11 @@ std::unique_ptr segmented_standard_deviation(column_view const& co rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - using reducer = - compound::detail::compound_segmented_dispatcher; + using reducer = compound::detail::compound_segmented_dispatcher; return cudf::type_dispatcher( col.type(), reducer(), col, offsets, output_dtype, null_handling, ddof, stream, mr); } +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/segmented/sum.cu b/cpp/src/reductions/segmented/sum.cu index 0cb8decdc58..7e84961dee0 100644 --- a/cpp/src/reductions/segmented/sum.cu +++ b/cpp/src/reductions/segmented/sum.cu @@ -16,10 +16,11 @@ #include "simple.cuh" -#include +#include namespace cudf { namespace reduction { +namespace detail { std::unique_ptr segmented_sum( column_view const& col, @@ -30,16 +31,10 @@ std::unique_ptr segmented_sum( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return cudf::type_dispatcher(col.type(), - simple::detail::column_type_dispatcher{}, - col, - offsets, - output_dtype, - null_handling, - init, - stream, - mr); + using reducer = simple::detail::column_type_dispatcher; + return cudf::type_dispatcher( + col.type(), reducer{}, col, offsets, output_dtype, null_handling, init, stream, mr); } - +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/segmented/sum_of_squares.cu b/cpp/src/reductions/segmented/sum_of_squares.cu index 1ee4f992b6d..6c3f286fd8d 100644 --- a/cpp/src/reductions/segmented/sum_of_squares.cu +++ b/cpp/src/reductions/segmented/sum_of_squares.cu @@ -16,12 +16,13 @@ #include "simple.cuh" -#include +#include #include namespace cudf { namespace reduction { +namespace detail { std::unique_ptr segmented_sum_of_squares(column_view const& col, device_span offsets, @@ -30,10 +31,11 @@ std::unique_ptr segmented_sum_of_squares(column_view const& col, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - using reducer = simple::detail::column_type_dispatcher; + using reducer = simple::detail::column_type_dispatcher; return cudf::type_dispatcher( col.type(), reducer{}, col, offsets, output_dtype, null_handling, std::nullopt, stream, mr); } +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/segmented/var.cu b/cpp/src/reductions/segmented/var.cu index 84adf353700..4ac815b542f 100644 --- a/cpp/src/reductions/segmented/var.cu +++ b/cpp/src/reductions/segmented/var.cu @@ -16,12 +16,13 @@ #include "compound.cuh" -#include +#include #include namespace cudf { namespace reduction { +namespace detail { std::unique_ptr segmented_variance(column_view const& col, device_span offsets, @@ -31,10 +32,10 @@ std::unique_ptr segmented_variance(column_view const& col, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - using reducer = compound::detail::compound_segmented_dispatcher; + using reducer = compound::detail::compound_segmented_dispatcher; return cudf::type_dispatcher( col.type(), reducer(), col, offsets, output_dtype, null_handling, ddof, stream, mr); } - +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh index c7c0d400106..189c17f9b28 100644 --- a/cpp/src/reductions/simple.cuh +++ b/cpp/src/reductions/simple.cuh @@ -16,13 +16,13 @@ #pragma once -#include +#include "struct_minmax_util.cuh" #include -#include #include #include #include +#include #include #include #include @@ -117,10 +117,10 @@ std::unique_ptr fixed_point_reduction( auto result = simple_reduction(col, init, stream, mr); auto const scale = [&] { - if (std::is_same_v) { + if (std::is_same_v) { auto const valid_count = static_cast(col.size() - col.null_count()); return numeric::scale_type{col.type().scale() * (valid_count + (init.has_value() ? 1 : 0))}; - } else if (std::is_same_v) { + } else if (std::is_same_v) { return numeric::scale_type{col.type().scale() * 2}; } return numeric::scale_type{col.type().scale()}; @@ -300,8 +300,8 @@ struct same_element_type_dispatcher { public: template && - (std::is_same_v || - std::is_same_v)>* = nullptr> + (std::is_same_v || + std::is_same_v)>* = nullptr> std::unique_ptr operator()(column_view const& input, std::optional> init, rmm::cuda_stream_view stream, diff --git a/cpp/src/reductions/std.cu b/cpp/src/reductions/std.cu index e9ba75f68e6..9df83634667 100644 --- a/cpp/src/reductions/std.cu +++ b/cpp/src/reductions/std.cu @@ -14,14 +14,16 @@ * limitations under the License. */ -#include +#include "compound.cuh" + #include -#include +#include #include namespace cudf { namespace reduction { +namespace detail { std::unique_ptr standard_deviation(column_view const& col, cudf::data_type const output_dtype, @@ -31,8 +33,7 @@ std::unique_ptr standard_deviation(column_view const& col, { // TODO: add cuda version check when the fix is available #if !defined(__CUDACC_DEBUG__) - using reducer = - compound::detail::element_type_dispatcher; + using reducer = compound::detail::element_type_dispatcher; auto col_type = cudf::is_dictionary(col.type()) ? dictionary_column_view(col).keys().type() : col.type(); return cudf::type_dispatcher(col_type, reducer(), col, output_dtype, ddof, stream, mr); @@ -43,5 +44,6 @@ std::unique_ptr standard_deviation(column_view const& col, #endif } +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/struct_minmax_util.cuh b/cpp/src/reductions/struct_minmax_util.cuh index 796d10a3477..7b56646b153 100644 --- a/cpp/src/reductions/struct_minmax_util.cuh +++ b/cpp/src/reductions/struct_minmax_util.cuh @@ -17,10 +17,10 @@ #pragma once #include -#include #include #include #include +#include #include #include #include @@ -102,7 +102,8 @@ class comparison_binop_generator { {}, std::vector{DEFAULT_NULL_ORDER}, cudf::structs::detail::column_nullability::MATCH_INCOMING, - stream)}, + stream, + rmm::mr::get_current_device_resource())}, d_flattened_input_ptr{ table_device_view::create(flattened_input->flattened_columns(), stream)}, is_min_op(is_min_op), @@ -118,7 +119,8 @@ class comparison_binop_generator { // level structs column (which is stored at the first position in the null_orders array) to // achieve this purpose. if (input.has_nulls()) { null_orders.front() = cudf::null_order::AFTER; } - null_orders_dvec = cudf::detail::make_device_uvector_async(null_orders, stream); + null_orders_dvec = cudf::detail::make_device_uvector_async( + null_orders, stream, rmm::mr::get_current_device_resource()); } // else: Don't need to generate nulls order to copy to device memory if we have all null orders // are BEFORE (that happens when we have is_min_op == false). @@ -133,10 +135,10 @@ class comparison_binop_generator { template static auto create(column_view const& input, rmm::cuda_stream_view stream) { - return comparison_binop_generator( - input, - stream, - std::is_same_v || std::is_same_v); + return comparison_binop_generator(input, + stream, + std::is_same_v || + std::is_same_v); } template diff --git a/cpp/src/reductions/sum.cu b/cpp/src/reductions/sum.cu index b919d871cc2..85c6b32dbaf 100644 --- a/cpp/src/reductions/sum.cu +++ b/cpp/src/reductions/sum.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,14 +14,16 @@ * limitations under the License. */ -#include +#include "simple.cuh" + #include -#include +#include #include namespace cudf { namespace reduction { +namespace detail { std::unique_ptr sum(column_view const& col, cudf::data_type const output_dtype, @@ -31,7 +33,7 @@ std::unique_ptr sum(column_view const& col, { return cudf::type_dispatcher( cudf::is_dictionary(col.type()) ? dictionary_column_view(col).keys().type() : col.type(), - simple::detail::element_type_dispatcher{}, + simple::detail::element_type_dispatcher{}, col, output_dtype, init, @@ -39,5 +41,6 @@ std::unique_ptr sum(column_view const& col, mr); } +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/sum_of_squares.cu b/cpp/src/reductions/sum_of_squares.cu index af28ba19c9a..7b85c4e6dc9 100644 --- a/cpp/src/reductions/sum_of_squares.cu +++ b/cpp/src/reductions/sum_of_squares.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,14 +14,16 @@ * limitations under the License. */ -#include +#include "simple.cuh" + #include -#include +#include #include namespace cudf { namespace reduction { +namespace detail { std::unique_ptr sum_of_squares(column_view const& col, cudf::data_type const output_dtype, @@ -30,7 +32,7 @@ std::unique_ptr sum_of_squares(column_view const& col, { return cudf::type_dispatcher( cudf::is_dictionary(col.type()) ? dictionary_column_view(col).keys().type() : col.type(), - simple::detail::element_type_dispatcher{}, + simple::detail::element_type_dispatcher{}, col, output_dtype, std::nullopt, @@ -38,5 +40,6 @@ std::unique_ptr sum_of_squares(column_view const& col, mr); } +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/var.cu b/cpp/src/reductions/var.cu index 4d86918d6c6..d559531dc59 100644 --- a/cpp/src/reductions/var.cu +++ b/cpp/src/reductions/var.cu @@ -14,14 +14,16 @@ * limitations under the License. */ -#include +#include "compound.cuh" + #include -#include +#include #include namespace cudf { namespace reduction { +namespace detail { std::unique_ptr variance(column_view const& col, cudf::data_type const output_dtype, @@ -31,7 +33,7 @@ std::unique_ptr variance(column_view const& col, { // TODO: add cuda version check when the fix is available #if !defined(__CUDACC_DEBUG__) - using reducer = compound::detail::element_type_dispatcher; + using reducer = compound::detail::element_type_dispatcher; auto col_type = cudf::is_dictionary(col.type()) ? dictionary_column_view(col).keys().type() : col.type(); return cudf::type_dispatcher(col_type, reducer(), col, output_dtype, ddof, stream, mr); @@ -42,5 +44,6 @@ std::unique_ptr variance(column_view const& col, #endif } +} // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu index a275683d82c..68b496e0ab8 100644 --- a/cpp/src/replace/clamp.cu +++ b/cpp/src/replace/clamp.cu @@ -213,28 +213,26 @@ std::enable_if_t, std::unique_ptr> } // namespace template -std::unique_ptr clamp( - column_view const& input, - OptionalScalarIterator lo_itr, - ReplaceScalarIterator lo_replace_itr, - OptionalScalarIterator hi_itr, - ReplaceScalarIterator hi_replace_itr, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr clamp(column_view const& input, + OptionalScalarIterator lo_itr, + ReplaceScalarIterator lo_replace_itr, + OptionalScalarIterator hi_itr, + ReplaceScalarIterator hi_replace_itr, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { return clamper(input, lo_itr, lo_replace_itr, hi_itr, hi_replace_itr, stream, mr); } struct dispatch_clamp { template - std::unique_ptr operator()( - column_view const& input, - scalar const& lo, - scalar const& lo_replace, - scalar const& hi, - scalar const& hi_replace, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::unique_ptr operator()(column_view const& input, + scalar const& lo, + scalar const& lo_replace, + scalar const& hi, + scalar const& hi_replace, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(lo.type() == input.type(), "mismatching types of scalar and input"); @@ -352,14 +350,13 @@ std::unique_ptr dispatch_clamp::operator()( * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr clamp( - column_view const& input, - scalar const& lo, - scalar const& lo_replace, - scalar const& hi, - scalar const& hi_replace, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr clamp(column_view const& input, + scalar const& lo, + scalar const& lo_replace, + scalar const& hi, + scalar const& hi_replace, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(lo.type() == hi.type(), "mismatching types of limit scalars"); CUDF_EXPECTS(lo_replace.type() == hi_replace.type(), "mismatching types of replace scalars"); diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu index 7f184f793de..373e5ee97e2 100644 --- a/cpp/src/replace/replace.cu +++ b/cpp/src/replace/replace.cu @@ -449,7 +449,9 @@ std::unique_ptr replace_kernel_forwarder::operator()({values.keys(), replacements.keys()}), stream); + std::vector({values.keys(), replacements.keys()}), + stream, + rmm::mr::get_current_device_resource()); return cudf::dictionary::detail::add_keys(input, new_keys->view(), stream, mr); }(); auto matched_view = cudf::dictionary_column_view(matched_input->view()); diff --git a/cpp/src/rolling/detail/lead_lag_nested.cuh b/cpp/src/rolling/detail/lead_lag_nested.cuh index 859ed7e5d53..d2fe9fabd1b 100644 --- a/cpp/src/rolling/detail/lead_lag_nested.cuh +++ b/cpp/src/rolling/detail/lead_lag_nested.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -191,7 +192,8 @@ std::unique_ptr compute_lead_lag_for_nested(aggregation::Kind op, scatter_map, out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, - stream); + stream, + rmm::mr::get_current_device_resource()); // Scatter defaults into locations where LEAD/LAG computed nulls. auto scattered_results = cudf::detail::scatter( diff --git a/cpp/src/rolling/detail/range_window_bounds.hpp b/cpp/src/rolling/detail/range_window_bounds.hpp index 506bd54e5eb..d1de7adba7a 100644 --- a/cpp/src/rolling/detail/range_window_bounds.hpp +++ b/cpp/src/rolling/detail/range_window_bounds.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -146,10 +146,9 @@ RepT range_comparable_value_impl(scalar const& range_scalar, * @return RepType Value of the range scalar */ template -range_rep_type range_comparable_value( - range_window_bounds const& range_bounds, - data_type const& order_by_data_type = data_type{type_to_id()}, - rmm::cuda_stream_view stream = cudf::get_default_stream()) +range_rep_type range_comparable_value(range_window_bounds const& range_bounds, + data_type const& order_by_data_type, + rmm::cuda_stream_view stream) { auto const& range_scalar = range_bounds.range_scalar(); using range_type = cudf::detail::range_type; diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu index 2b4b6373c35..b208e7cd980 100644 --- a/cpp/src/rolling/grouped_rolling.cu +++ b/cpp/src/rolling/grouped_rolling.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -467,8 +467,10 @@ get_null_bounds_for_orderby_column(column_view const& orderby_column, cudf::device_span(group_offsets.data(), num_groups); // When there are no nulls, just copy the input group offsets to the output. - return std::make_tuple(cudf::detail::make_device_uvector_async(group_offsets_span, stream), - cudf::detail::make_device_uvector_async(group_offsets_span, stream)); + return std::make_tuple(cudf::detail::make_device_uvector_async( + group_offsets_span, stream, rmm::mr::get_current_device_resource()), + cudf::detail::make_device_uvector_async( + group_offsets_span, stream, rmm::mr::get_current_device_resource())); } } diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index 046bfee9e41..403dc8c9189 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -583,7 +583,8 @@ table struct_scalar::init_data(table&& data, auto data_cols = data.release(); // push validity mask down - auto const validity = cudf::detail::create_null_mask(1, mask_state::ALL_NULL, stream); + auto const validity = cudf::detail::create_null_mask( + 1, mask_state::ALL_NULL, stream, rmm::mr::get_current_device_resource()); for (auto& col : data_cols) { col = cudf::structs::detail::superimpose_nulls( static_cast(validity.data()), 1, std::move(col), stream, mr); diff --git a/cpp/src/search/contains_scalar.cu b/cpp/src/search/contains_scalar.cu index 093a1f8f1ed..7c16a1b12ef 100644 --- a/cpp/src/search/contains_scalar.cu +++ b/cpp/src/search/contains_scalar.cu @@ -30,6 +30,7 @@ #include #include +#include namespace cudf { namespace detail { @@ -108,16 +109,24 @@ struct contains_scalar_dispatch { auto const haystack_cdv_ptr = column_device_view::create(haystack, stream); auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); - return thrust::count_if( - rmm::exec_policy(stream), - begin, - end, - [d_comp, check_nulls, d_haystack = *haystack_cdv_ptr] __device__(auto const idx) { - if (check_nulls && d_haystack.is_null_nocheck(static_cast(idx))) { - return false; - } - return d_comp(idx, rhs_index_type{0}); // compare haystack[idx] == needle[0]. - }) > 0; + + // Using a temporary buffer for intermediate transform results from the lambda containing + // the comparator speeds up compile-time significantly without much degradation in + // runtime performance over using the comparator in a transform iterator with thrust::count_if. + auto d_results = rmm::device_uvector(haystack.size(), stream); + thrust::transform( + rmm::exec_policy(stream), + begin, + end, + d_results.begin(), + [d_comp, check_nulls, d_haystack = *haystack_cdv_ptr] __device__(auto const idx) { + if (check_nulls && d_haystack.is_null_nocheck(static_cast(idx))) { + return false; + } + return d_comp(idx, rhs_index_type{0}); // compare haystack[idx] == needle[0]. + }); + + return thrust::count(rmm::exec_policy(stream), d_results.begin(), d_results.end(), true) > 0; } }; diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index f770b4598cf..1a2f242ef87 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -125,7 +125,10 @@ std::pair build_row_bitmask(table_view // If there are more than one nullable column, we compute `bitmask_and` of their null masks. // Otherwise, we have only one nullable column and can use its null mask directly. if (nullable_columns.size() > 1) { - auto row_bitmask = cudf::detail::bitmask_and(table_view{nullable_columns}, stream).first; + auto row_bitmask = + cudf::detail::bitmask_and( + table_view{nullable_columns}, stream, rmm::mr::get_current_device_resource()) + .first; auto const row_bitmask_ptr = static_cast(row_bitmask.data()); return std::pair(std::move(row_bitmask), row_bitmask_ptr); } @@ -322,13 +325,13 @@ rmm::device_uvector contains_without_lists_or_nans(table_view const& hayst auto const has_any_nulls = haystack_has_nulls || needles_has_nulls; // Flatten the input tables. - auto const flatten_nullability = has_any_nulls - ? structs::detail::column_nullability::FORCE - : structs::detail::column_nullability::MATCH_INCOMING; - auto const haystack_flattened_tables = - structs::detail::flatten_nested_columns(haystack, {}, {}, flatten_nullability, stream); - auto const needles_flattened_tables = - structs::detail::flatten_nested_columns(needles, {}, {}, flatten_nullability, stream); + auto const flatten_nullability = has_any_nulls + ? structs::detail::column_nullability::FORCE + : structs::detail::column_nullability::MATCH_INCOMING; + auto const haystack_flattened_tables = structs::detail::flatten_nested_columns( + haystack, {}, {}, flatten_nullability, stream, rmm::mr::get_current_device_resource()); + auto const needles_flattened_tables = structs::detail::flatten_nested_columns( + needles, {}, {}, flatten_nullability, stream, rmm::mr::get_current_device_resource()); auto const haystack_flattened = haystack_flattened_tables->flattened_columns(); auto const needles_flattened = needles_flattened_tables->flattened_columns(); auto const haystack_tdv_ptr = table_device_view::create(haystack_flattened, stream); diff --git a/cpp/src/sort/is_sorted.cu b/cpp/src/sort/is_sorted.cu index 4c5ad1ef0ea..25c594e9e74 100644 --- a/cpp/src/sort/is_sorted.cu +++ b/cpp/src/sort/is_sorted.cu @@ -27,13 +27,15 @@ #include #include +#include #include #include +#include namespace cudf { namespace detail { -auto is_sorted(cudf::table_view const& in, +bool is_sorted(cudf::table_view const& in, std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream) @@ -44,16 +46,25 @@ auto is_sorted(cudf::table_view const& in, if (cudf::detail::has_nested_columns(in)) { auto const device_comparator = comparator.less(has_nested_nulls(in)); - return thrust::is_sorted(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(in.num_rows()), - device_comparator); + // Using a temporary buffer for intermediate transform results from the lambda containing + // the comparator speeds up compile-time significantly over using the comparator directly + // in thrust::is_sorted. + auto d_results = rmm::device_uvector(in.num_rows(), stream); + thrust::transform(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(in.num_rows()), + d_results.begin(), + [device_comparator] __device__(auto idx) -> bool { + return (idx == 0) || device_comparator(idx - 1, idx); + }); + + return thrust::count(rmm::exec_policy(stream), d_results.begin(), d_results.end(), false) == 0; } else { auto const device_comparator = comparator.less(has_nested_nulls(in)); return thrust::is_sorted(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(in.num_rows()), + thrust::counting_iterator(0), + thrust::counting_iterator(in.num_rows()), device_comparator); } } diff --git a/cpp/src/sort/segmented_sort_impl.cuh b/cpp/src/sort/segmented_sort_impl.cuh index a32382b840f..b7347974173 100644 --- a/cpp/src/sort/segmented_sort_impl.cuh +++ b/cpp/src/sort/segmented_sort_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include @@ -72,8 +73,11 @@ struct column_fast_sort_fn { { // CUB's segmented sort functions cannot accept iterators. // We create a temporary column here for it to use. - auto temp_col = - cudf::detail::allocate_like(input, input.size(), mask_allocation_policy::NEVER, stream); + auto temp_col = cudf::detail::allocate_like(input, + input.size(), + mask_allocation_policy::NEVER, + stream, + rmm::mr::get_current_device_resource()); mutable_column_view output_view = temp_col->mutable_view(); // DeviceSegmentedSort is faster than DeviceSegmentedRadixSort at this time diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index e15d54b4251..cc60b2a12ea 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -25,6 +25,8 @@ #include #include +#include + #include #include #include @@ -102,7 +104,8 @@ rmm::device_uvector get_distinct_indices(table_view const& input, keep, nulls_equal, nans_equal, - stream); + stream, + rmm::mr::get_current_device_resource()); // Extract the desired output indices from reduction results. auto const map_end = [&] { @@ -145,8 +148,12 @@ std::unique_ptr
distinct(table_view const& input, return empty_like(input); } - auto const gather_map = - get_distinct_indices(input.select(keys), keep, nulls_equal, nans_equal, stream); + auto const gather_map = get_distinct_indices(input.select(keys), + keep, + nulls_equal, + nans_equal, + stream, + rmm::mr::get_current_device_resource()); return detail::gather(input, gather_map, out_of_bounds_policy::DONT_CHECK, diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu index 760fcf4bb6b..8c50f8d29e8 100644 --- a/cpp/src/stream_compaction/distinct_count.cu +++ b/cpp/src/stream_compaction/distinct_count.cu @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -150,7 +151,8 @@ cudf::size_type distinct_count(table_view const& keys, // when nulls are equal, insert non-null rows only to improve efficiency if (nulls_equal == null_equality::EQUAL and has_nulls) { thrust::counting_iterator stencil(0); - auto const [row_bitmask, null_count] = cudf::detail::bitmask_or(keys, stream); + auto const [row_bitmask, null_count] = + cudf::detail::bitmask_or(keys, stream, rmm::mr::get_current_device_resource()); row_validity pred{static_cast(row_bitmask.data())}; key_map.insert_if(iter, iter + num_rows, stencil, pred, hash_key, row_equal, stream.value()); @@ -209,6 +211,6 @@ cudf::size_type distinct_count(column_view const& input, cudf::size_type distinct_count(table_view const& input, null_equality nulls_equal) { CUDF_FUNC_RANGE(); - return detail::distinct_count(input, nulls_equal); + return detail::distinct_count(input, nulls_equal, cudf::get_default_stream()); } } // namespace cudf diff --git a/cpp/src/stream_compaction/distinct_reduce.cuh b/cpp/src/stream_compaction/distinct_reduce.cuh index e360d03280a..8ec1fa18205 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cuh +++ b/cpp/src/stream_compaction/distinct_reduce.cuh @@ -82,6 +82,6 @@ rmm::device_uvector hash_reduce_by_row( null_equality nulls_equal, nan_equality nans_equal, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr); } // namespace cudf::detail diff --git a/cpp/src/stream_compaction/stable_distinct.cu b/cpp/src/stream_compaction/stable_distinct.cu index dc80a454777..d45897930b0 100644 --- a/cpp/src/stream_compaction/stable_distinct.cu +++ b/cpp/src/stream_compaction/stable_distinct.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,8 +38,12 @@ std::unique_ptr
stable_distinct(table_view const& input, return empty_like(input); } - auto const distinct_indices = - get_distinct_indices(input.select(keys), keep, nulls_equal, nans_equal, stream); + auto const distinct_indices = get_distinct_indices(input.select(keys), + keep, + nulls_equal, + nans_equal, + stream, + rmm::mr::get_current_device_resource()); // Markers to denote which rows to be copied to the output. auto const output_markers = [&] { diff --git a/cpp/src/stream_compaction/unique.cu b/cpp/src/stream_compaction/unique.cu index 511a7b7ae1c..2d81c00e9d9 100644 --- a/cpp/src/stream_compaction/unique.cu +++ b/cpp/src/stream_compaction/unique.cu @@ -67,38 +67,51 @@ std::unique_ptr
unique(table_view const& input, auto comp = cudf::experimental::row::equality::self_comparator(keys_view, stream); - auto const comparator_helper = [&](auto const row_equal) { - // get indices of unique rows - auto result_end = unique_copy(thrust::counting_iterator(0), - thrust::counting_iterator(num_rows), - mutable_view->begin(), - row_equal, - keep, - stream); + size_type const unique_size = [&] { + if (cudf::detail::has_nested_columns(keys_view)) { + // Using a temporary buffer for intermediate transform results from the functor containing + // the comparator speeds up compile-time significantly without much degradation in + // runtime performance over using the comparator directly in thrust::unique_copy. + auto row_equal = + comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); + auto d_results = rmm::device_uvector(num_rows, stream); + auto itr = thrust::make_counting_iterator(0); + thrust::transform( + rmm::exec_policy(stream), + itr, + itr + num_rows, + d_results.begin(), + unique_copy_fn{itr, keep, row_equal, num_rows - 1}); + auto result_end = thrust::copy_if(rmm::exec_policy(stream), + itr, + itr + num_rows, + d_results.begin(), + mutable_view->begin(), + thrust::identity{}); + return static_cast(thrust::distance(mutable_view->begin(), result_end)); + } else { + // Using thrust::unique_copy with the comparator directly will compile more slowly but + // improves runtime by up to 2x over the transform/copy_if approach above. + auto row_equal = + comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); + auto result_end = unique_copy(thrust::counting_iterator(0), + thrust::counting_iterator(num_rows), + mutable_view->begin(), + row_equal, + keep, + stream); + return static_cast(thrust::distance(mutable_view->begin(), result_end)); + } + }(); + auto indices_view = cudf::detail::slice(column_view(*unique_indices), 0, unique_size); - auto indices_view = - cudf::detail::slice(column_view(*unique_indices), - 0, - thrust::distance(mutable_view->begin(), result_end)); - - // gather unique rows and return - return detail::gather(input, - indices_view, - out_of_bounds_policy::DONT_CHECK, - detail::negative_index_policy::NOT_ALLOWED, - stream, - mr); - }; - - if (cudf::detail::has_nested_columns(keys_view)) { - auto row_equal = - comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); - return comparator_helper(row_equal); - } else { - auto row_equal = - comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); - return comparator_helper(row_equal); - } + // gather unique rows and return + return detail::gather(input, + indices_view, + out_of_bounds_policy::DONT_CHECK, + detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); } } // namespace detail diff --git a/cpp/src/stream_compaction/unique_count.cu b/cpp/src/stream_compaction/unique_count.cu index c7c10438d7a..ac9924311c2 100644 --- a/cpp/src/stream_compaction/unique_count.cu +++ b/cpp/src/stream_compaction/unique_count.cu @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -37,6 +38,7 @@ #include #include #include +#include #include #include @@ -75,14 +77,23 @@ cudf::size_type unique_count(table_view const& keys, if (cudf::detail::has_nested_columns(keys)) { auto const comp = row_comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys)}, nulls_equal); - return thrust::count_if( - rmm::exec_policy(stream), - thrust::counting_iterator(0), - thrust::counting_iterator(keys.num_rows()), - [comp] __device__(cudf::size_type i) { return (i == 0 or not comp(i, i - 1)); }); + // Using a temporary buffer for intermediate transform results from the lambda containing + // the comparator speeds up compile-time significantly without much degradation in + // runtime performance over using the comparator directly in thrust::count_if. + auto d_results = rmm::device_uvector(keys.num_rows(), stream); + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(keys.num_rows()), + d_results.begin(), + [comp] __device__(auto i) { return (i == 0 or not comp(i, i - 1)); }); + + return static_cast( + thrust::count(rmm::exec_policy(stream), d_results.begin(), d_results.end(), true)); } else { auto const comp = row_comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys)}, nulls_equal); + // Using thrust::copy_if with the comparator directly will compile more slowly but + // improves runtime by up to 2x over the transform/count approach above. return thrust::count_if( rmm::exec_policy(stream), thrust::counting_iterator(0), @@ -144,7 +155,7 @@ cudf::size_type unique_count(column_view const& input, cudf::size_type unique_count(table_view const& input, null_equality nulls_equal) { CUDF_FUNC_RANGE(); - return detail::unique_count(input, nulls_equal); + return detail::unique_count(input, nulls_equal, cudf::get_default_stream()); } } // namespace cudf diff --git a/cpp/src/strings/attributes.cu b/cpp/src/strings/attributes.cu index 66288c7d14d..3a1b7044b56 100644 --- a/cpp/src/strings/attributes.cu +++ b/cpp/src/strings/attributes.cu @@ -60,16 +60,16 @@ constexpr size_type AVG_CHAR_BYTES_THRESHOLD = 64; /** * @brief Returns a numeric column containing lengths of each string in - * based on the provided unary function. + * based on the provided unary function * * Any null string will result in a null entry for that row in the output column. * - * @tparam UnaryFunction Device function that returns an integer given a string_view. - * @param strings Strings instance for this operation. - * @param ufn Function returns an integer for each string. - * @param stream CUDA stream used for device memory operations and kernel launches. + * @tparam UnaryFunction Device function that returns an integer given a string_view + * @param strings Strings instance for this operation + * @param ufn Function returns an integer for each string + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory - * @return New INT32 column with lengths for each string. + * @return New column with lengths for each string */ template std::unique_ptr counts_fn(strings_column_view const& strings, @@ -78,7 +78,7 @@ std::unique_ptr counts_fn(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { // create output column - auto results = make_numeric_column(data_type{type_id::INT32}, + auto results = make_numeric_column(data_type{type_to_id()}, strings.size(), cudf::detail::copy_bitmask(strings.parent(), stream, mr), strings.null_count(), @@ -176,12 +176,12 @@ std::unique_ptr count_characters(strings_column_view const& input, return count_characters_parallel(input, stream, mr); } -std::unique_ptr count_bytes(strings_column_view const& strings, +std::unique_ptr count_bytes(strings_column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { auto ufn = [] __device__(string_view const& d_str) { return d_str.size_bytes(); }; - return counts_fn(strings, ufn, stream, mr); + return counts_fn(input, ufn, stream, mr); } } // namespace detail @@ -214,19 +214,19 @@ struct code_points_fn { namespace detail { // -std::unique_ptr code_points(strings_column_view const& strings, +std::unique_ptr code_points(strings_column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto strings_column = column_device_view::create(strings.parent(), stream); + auto strings_column = column_device_view::create(input.parent(), stream); auto d_column = *strings_column; // create offsets vector to account for each string's character length - rmm::device_uvector offsets(strings.size() + 1, stream); + rmm::device_uvector offsets(input.size() + 1, stream); thrust::transform_inclusive_scan( rmm::exec_policy(stream), thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings.size()), + thrust::make_counting_iterator(input.size()), offsets.begin() + 1, [d_column] __device__(size_type idx) { size_type length = 0; @@ -248,7 +248,7 @@ std::unique_ptr code_points(strings_column_view const& strings, // now set the ranges from each strings' character values thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), - strings.size(), + input.size(), code_points_fn{d_column, offsets.data(), d_results}); results->set_null_count(0); @@ -259,25 +259,25 @@ std::unique_ptr code_points(strings_column_view const& strings, // external APIS -std::unique_ptr count_characters(strings_column_view const& strings, +std::unique_ptr count_characters(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::count_characters(strings, cudf::get_default_stream(), mr); + return detail::count_characters(input, cudf::get_default_stream(), mr); } -std::unique_ptr count_bytes(strings_column_view const& strings, +std::unique_ptr count_bytes(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::count_bytes(strings, cudf::get_default_stream(), mr); + return detail::count_bytes(input, cudf::get_default_stream(), mr); } -std::unique_ptr code_points(strings_column_view const& strings, +std::unique_ptr code_points(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::code_points(strings, cudf::get_default_stream(), mr); + return detail::code_points(input, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu index 177fcab03f9..8d273eff4bb 100644 --- a/cpp/src/strings/convert/convert_datetime.cu +++ b/cpp/src/strings/convert/convert_datetime.cu @@ -160,7 +160,8 @@ struct format_compiler { } // copy format_items to device memory - d_items = cudf::detail::make_device_uvector_async(items, stream); + d_items = cudf::detail::make_device_uvector_async( + items, stream, rmm::mr::get_current_device_resource()); } device_span format_items() { return device_span(d_items); } diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu index e3ee59c631f..92b71d128e1 100644 --- a/cpp/src/strings/copying/concatenate.cu +++ b/cpp/src/strings/copying/concatenate.cu @@ -85,7 +85,8 @@ auto create_strings_device_views(host_span views, rmm::cuda_s return static_cast(col.size()); }); thrust::inclusive_scan(thrust::host, offset_it, input_offsets.end(), offset_it); - auto d_input_offsets = cudf::detail::make_device_uvector_async(input_offsets, stream); + auto d_input_offsets = cudf::detail::make_device_uvector_async( + input_offsets, stream, rmm::mr::get_current_device_resource()); auto const output_size = input_offsets.back(); // Compute the partition offsets and size of chars column diff --git a/cpp/src/strings/count_matches.cu b/cpp/src/strings/count_matches.cu index 9d29bbb8c96..1fde3a54089 100644 --- a/cpp/src/strings/count_matches.cu +++ b/cpp/src/strings/count_matches.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,7 +63,7 @@ std::unique_ptr count_matches(column_device_view const& d_strings, assert(output_size >= d_strings.size() and "Unexpected output size"); auto results = make_numeric_column( - data_type{type_id::INT32}, output_size, mask_state::UNALLOCATED, stream, mr); + data_type{type_to_id()}, output_size, mask_state::UNALLOCATED, stream, mr); if (d_strings.size() == 0) return results; diff --git a/cpp/src/strings/filter_chars.cu b/cpp/src/strings/filter_chars.cu index 8a6a4d44b1e..3e38b5fa775 100644 --- a/cpp/src/strings/filter_chars.cu +++ b/cpp/src/strings/filter_chars.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -132,7 +132,8 @@ std::unique_ptr filter_characters( characters_to_filter.begin(), characters_to_filter.end(), htable.begin(), [](auto entry) { return char_range{entry.first, entry.second}; }); - rmm::device_uvector table = cudf::detail::make_device_uvector_async(htable, stream); + rmm::device_uvector table = + cudf::detail::make_device_uvector_async(htable, stream, rmm::mr::get_current_device_resource()); auto d_strings = column_device_view::create(strings.parent(), stream); diff --git a/cpp/src/strings/json/json_path.cu b/cpp/src/strings/json/json_path.cu index c6ea47ec0f3..128d450cbe8 100644 --- a/cpp/src/strings/json/json_path.cu +++ b/cpp/src/strings/json/json_path.cu @@ -673,11 +673,10 @@ std::pair>, int> build_comma } while (op.type != path_operator_type::END); auto const is_empty = h_operators.size() == 1 && h_operators[0].type == path_operator_type::END; - return is_empty - ? std::pair(thrust::nullopt, 0) - : std::pair( - thrust::make_optional(cudf::detail::make_device_uvector_sync(h_operators, stream)), - max_stack_depth); + return is_empty ? std::pair(thrust::nullopt, 0) + : std::pair(thrust::make_optional(cudf::detail::make_device_uvector_sync( + h_operators, stream, rmm::mr::get_current_device_resource())), + max_stack_depth); } #define PARSE_TRY(_x) \ diff --git a/cpp/src/strings/replace/backref_re.cu b/cpp/src/strings/replace/backref_re.cu index 383337c9088..d25af8c8931 100644 --- a/cpp/src/strings/replace/backref_re.cu +++ b/cpp/src/strings/replace/backref_re.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -118,9 +118,9 @@ std::unique_ptr replace_with_backrefs(strings_column_view const& input, // parse the repl string for back-ref indicators auto group_count = std::min(99, d_prog->group_counts()); // group count should NOT exceed 99 - auto const parse_result = parse_backrefs(replacement, group_count); - rmm::device_uvector backrefs = - cudf::detail::make_device_uvector_async(parse_result.second, stream); + auto const parse_result = parse_backrefs(replacement, group_count); + rmm::device_uvector backrefs = cudf::detail::make_device_uvector_async( + parse_result.second, stream, rmm::mr::get_current_device_resource()); string_scalar repl_scalar(parse_result.first, true, stream); string_view const d_repl_template = repl_scalar.value(); diff --git a/cpp/src/strings/replace/multi.cu b/cpp/src/strings/replace/multi.cu new file mode 100644 index 00000000000..92ace4e7bc7 --- /dev/null +++ b/cpp/src/strings/replace/multi.cu @@ -0,0 +1,500 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cudf { +namespace strings { +namespace detail { +namespace { + +/** + * @brief Threshold to decide on using string or character-parallel functions. + * + * If the average byte length of a string in a column exceeds this value then + * the character-parallel function is used. + * Otherwise, a regular string-parallel function is used. + * + * This value was found using the replace-multi benchmark results using an + * RTX A6000. + */ +constexpr size_type AVG_CHAR_BYTES_THRESHOLD = 256; + +/** + * @brief Type used for holding the target position (first) and the + * target index (second). + */ +using target_pair = thrust::pair; + +/** + * @brief Helper functions for performing character-parallel replace + */ +struct replace_multi_parallel_fn { + __device__ char const* get_base_ptr() const + { + return d_strings.child(strings_column_view::chars_column_index).data(); + } + + __device__ size_type const* get_offsets_ptr() const + { + return d_strings.child(strings_column_view::offsets_column_index).data() + + d_strings.offset(); + } + + __device__ string_view const get_string(size_type idx) const + { + return d_strings.element(idx); + } + + __device__ string_view const get_replacement_string(size_type idx) const + { + return d_replacements.size() == 1 ? d_replacements[0] : d_replacements[idx]; + } + + __device__ bool is_valid(size_type idx) const { return d_strings.is_valid(idx); } + + /** + * @brief Returns the index of the target string found at the given byte position + * in the input strings column + * + * @param idx Index of the byte position in the chars column + * @param chars_bytes Number of bytes in the chars column + */ + __device__ thrust::optional has_target(size_type idx, size_type chars_bytes) const + { + auto const d_offsets = get_offsets_ptr(); + auto const d_chars = get_base_ptr() + d_offsets[0] + idx; + size_type str_idx = -1; + for (std::size_t t = 0; t < d_targets.size(); ++t) { + auto const d_tgt = d_targets[t]; + if (!d_tgt.empty() && (idx + d_tgt.size_bytes() <= chars_bytes) && + (d_tgt.compare(d_chars, d_tgt.size_bytes()) == 0)) { + if (str_idx < 0) { + auto const idx_itr = + thrust::upper_bound(thrust::seq, d_offsets, d_offsets + d_strings.size(), idx); + str_idx = thrust::distance(d_offsets, idx_itr) - 1; + } + auto const d_str = get_string(str_idx - d_offsets[0]); + if ((d_chars + d_tgt.size_bytes()) <= (d_str.data() + d_str.size_bytes())) { return t; } + } + } + return thrust::nullopt; + } + + /** + * @brief Count the number of strings that will be produced by the replace + * + * This includes segments of the string that are not replaced as well as those + * that are replaced. + * + * @param idx Index of the row in d_strings to be processed + * @param d_positions Positions of the targets found in the chars column + * @param d_targets_offsets Offsets identify which target positions go with the current string + * @return Number of substrings resulting from the replace operations on this row + */ + __device__ size_type count_strings(size_type idx, + target_pair const* d_positions, + size_type const* d_targets_offsets) const + { + if (!is_valid(idx)) { return 0; } + + auto const d_str = get_string(idx); + auto const d_str_end = d_str.data() + d_str.size_bytes(); + auto const base_ptr = get_base_ptr(); + auto const targets_positions = cudf::device_span( + d_positions + d_targets_offsets[idx], d_targets_offsets[idx + 1] - d_targets_offsets[idx]); + + size_type count = 1; // always at least one string + auto str_ptr = d_str.data(); + for (auto d_pair : targets_positions) { + auto const d_pos = d_pair.first; + auto const d_tgt = d_targets[d_pair.second]; + auto const tgt_ptr = base_ptr + d_pos; + if (str_ptr <= tgt_ptr && tgt_ptr < d_str_end) { + auto const keep_size = static_cast(thrust::distance(str_ptr, tgt_ptr)); + if (keep_size > 0) { count++; } // don't bother counting empty strings + + auto const d_repl = get_replacement_string(d_pair.second); + if (!d_repl.empty()) { count++; } + + str_ptr += keep_size + d_tgt.size_bytes(); + } + } + + return count; + } + + /** + * @brief Retrieve the strings for each row + * + * This will return string segments as string_index_pair objects for + * parts of the string that are not replaced interlaced with the + * appropriate replacement string where replacement targets are found. + * + * This function is called only once to produce both the string_index_pair objects + * and the output row size in bytes. + * + * @param idx Index of the row in d_strings + * @param d_offsets Offsets to identify where to store the results of the replace for this string + * @param d_positions The target positions found in the chars column + * @param d_targets_offsets The offsets to identify which target positions go with this string + * @param d_all_strings The output of all the produced string segments + * @return The size in bytes of the output string for this row + */ + __device__ size_type get_strings(size_type idx, + size_type const* d_offsets, + target_pair const* d_positions, + size_type const* d_targets_offsets, + string_index_pair* d_all_strings) const + { + if (!is_valid(idx)) { return 0; } + + auto const d_output = d_all_strings + d_offsets[idx]; + auto const d_str = get_string(idx); + auto const d_str_end = d_str.data() + d_str.size_bytes(); + auto const base_ptr = get_base_ptr(); + + auto const targets_positions = cudf::device_span( + d_positions + d_targets_offsets[idx], d_targets_offsets[idx + 1] - d_targets_offsets[idx]); + + size_type output_idx = 0; + size_type output_size = 0; + auto str_ptr = d_str.data(); + for (auto d_pair : targets_positions) { + auto const d_pos = d_pair.first; + auto const d_tgt = d_targets[d_pair.second]; + auto const tgt_ptr = base_ptr + d_pos; + if (str_ptr <= tgt_ptr && tgt_ptr < d_str_end) { + auto const keep_size = static_cast(thrust::distance(str_ptr, tgt_ptr)); + if (keep_size > 0) { d_output[output_idx++] = string_index_pair{str_ptr, keep_size}; } + output_size += keep_size; + + auto const d_repl = get_replacement_string(d_pair.second); + if (!d_repl.empty()) { + d_output[output_idx++] = string_index_pair{d_repl.data(), d_repl.size_bytes()}; + } + output_size += d_repl.size_bytes(); + + str_ptr += keep_size + d_tgt.size_bytes(); + } + } + // include any leftover parts of the string + if (str_ptr <= d_str_end) { + auto const left_size = static_cast(thrust::distance(str_ptr, d_str_end)); + d_output[output_idx] = string_index_pair{str_ptr, left_size}; + output_size += left_size; + } + return output_size; + } + + replace_multi_parallel_fn(column_device_view const& d_strings, + device_span d_targets, + device_span d_replacements) + : d_strings(d_strings), d_targets{d_targets}, d_replacements{d_replacements} + { + } + + protected: + column_device_view d_strings; + device_span d_targets; + device_span d_replacements; +}; + +/** + * @brief Used by the copy-if function to produce target_pair objects + * + * Using an inplace lambda caused a runtime crash in thrust::copy_if + * (this happens sometimes when passing device lambdas to thrust algorithms) + */ +struct pair_generator { + __device__ target_pair operator()(int idx) const + { + auto pos = fn.has_target(idx, chars_bytes); + return target_pair{idx, pos.value_or(-1)}; + } + replace_multi_parallel_fn fn; + size_type chars_bytes; +}; + +struct copy_if_fn { + __device__ bool operator()(target_pair pos) { return pos.second >= 0; } +}; + +std::unique_ptr replace_character_parallel(strings_column_view const& input, + strings_column_view const& targets, + strings_column_view const& repls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto d_strings = column_device_view::create(input.parent(), stream); + + auto const strings_count = input.size(); + auto const chars_bytes = + cudf::detail::get_value(input.offsets(), input.offset() + strings_count, stream) - + cudf::detail::get_value(input.offsets(), input.offset(), stream); + + auto d_targets = + create_string_vector_from_column(targets, stream, rmm::mr::get_current_device_resource()); + auto d_replacements = + create_string_vector_from_column(repls, stream, rmm::mr::get_current_device_resource()); + + replace_multi_parallel_fn fn{*d_strings, d_targets, d_replacements}; + + // count the number of targets in the entire column + auto const target_count = thrust::count_if(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(chars_bytes), + [fn, chars_bytes] __device__(size_type idx) { + return fn.has_target(idx, chars_bytes).has_value(); + }); + // Create a vector of every target position in the chars column. + // These may include overlapping targets which will be resolved later. + auto targets_positions = rmm::device_uvector(target_count, stream); + auto d_positions = targets_positions.data(); + + auto const copy_itr = + cudf::detail::make_counting_transform_iterator(0, pair_generator{fn, chars_bytes}); + auto const copy_end = thrust::copy_if( + rmm::exec_policy(stream), copy_itr, copy_itr + chars_bytes, d_positions, copy_if_fn{}); + + // create a vector of offsets to each string's set of target positions + auto const targets_offsets = [&] { + auto string_indices = rmm::device_uvector(target_count, stream); + + auto const pos_itr = cudf::detail::make_counting_transform_iterator( + 0, [d_positions] __device__(auto idx) -> size_type { return d_positions[idx].first; }); + auto pos_count = std::distance(d_positions, copy_end); + + thrust::upper_bound(rmm::exec_policy(stream), + input.offsets_begin(), + input.offsets_end(), + pos_itr, + pos_itr + pos_count, + string_indices.begin()); + + // compute offsets per string + auto targets_offsets = rmm::device_uvector(strings_count + 1, stream); + auto d_targets_offsets = targets_offsets.data(); + + // memset to zero-out the target counts for any null-entries or strings with no targets + thrust::uninitialized_fill( + rmm::exec_policy(stream), targets_offsets.begin(), targets_offsets.end(), 0); + + // next, count the number of targets per string + auto d_string_indices = string_indices.data(); + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + target_count, + [d_string_indices, d_targets_offsets] __device__(size_type idx) { + auto const str_idx = d_string_indices[idx] - 1; + atomicAdd(d_targets_offsets + str_idx, 1); + }); + // finally, convert the counts into offsets + thrust::exclusive_scan(rmm::exec_policy(stream), + targets_offsets.begin(), + targets_offsets.end(), + targets_offsets.begin()); + return targets_offsets; + }(); + auto const d_targets_offsets = targets_offsets.data(); + + // compute the number of string segments produced by replace in each string + auto counts = rmm::device_uvector(strings_count, stream); + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(strings_count), + counts.begin(), + [fn, d_positions, d_targets_offsets] __device__(size_type idx) -> size_type { + return fn.count_strings(idx, d_positions, d_targets_offsets); + }); + + // create offsets from the counts + auto offsets = + std::get<0>(cudf::detail::make_offsets_child_column(counts.begin(), counts.end(), stream, mr)); + auto const total_strings = + cudf::detail::get_value(offsets->view(), strings_count, stream); + auto const d_strings_offsets = offsets->view().data(); + + // build a vector of all the positions for all the strings + auto indices = rmm::device_uvector(total_strings, stream); + auto d_indices = indices.data(); + auto d_sizes = counts.data(); // reusing this vector to hold output sizes now + thrust::for_each_n( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + strings_count, + [fn, d_strings_offsets, d_positions, d_targets_offsets, d_indices, d_sizes] __device__( + size_type idx) { + d_sizes[idx] = + fn.get_strings(idx, d_strings_offsets, d_positions, d_targets_offsets, d_indices); + }); + + // use this utility to gather the string parts into a contiguous chars column + auto chars = make_strings_column(indices.begin(), indices.end(), stream, mr); + + // create offsets from the sizes + offsets = + std::get<0>(cudf::detail::make_offsets_child_column(counts.begin(), counts.end(), stream, mr)); + + // build the strings columns from the chars and offsets + return make_strings_column(strings_count, + std::move(offsets), + std::move(chars->release().children.back()), + input.null_count(), + copy_bitmask(input.parent(), stream, mr)); +} + +/** + * @brief Function logic for the replace_string_parallel + * + * Performs the multi-replace operation with a thread per string. + * This performs best on smaller strings. @see AVG_CHAR_BYTES_THRESHOLD + */ +struct replace_multi_fn { + column_device_view const d_strings; + column_device_view const d_targets; + column_device_view const d_repls; + int32_t* d_offsets{}; + char* d_chars{}; + + __device__ void operator()(size_type idx) + { + if (d_strings.is_null(idx)) { + if (!d_chars) { d_offsets[idx] = 0; } + return; + } + auto const d_str = d_strings.element(idx); + char const* in_ptr = d_str.data(); + + size_type bytes = d_str.size_bytes(); + size_type spos = 0; + size_type lpos = 0; + char* out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr; + + // check each character against each target + while (spos < d_str.size_bytes()) { + for (int tgt_idx = 0; tgt_idx < d_targets.size(); ++tgt_idx) { + auto const d_tgt = d_targets.element(tgt_idx); + if ((d_tgt.size_bytes() <= (d_str.size_bytes() - spos)) && // check fit + (d_tgt.compare(in_ptr + spos, d_tgt.size_bytes()) == 0)) // and match + { + auto const d_repl = (d_repls.size() == 1) ? d_repls.element(0) + : d_repls.element(tgt_idx); + bytes += d_repl.size_bytes() - d_tgt.size_bytes(); + if (out_ptr) { + out_ptr = copy_and_increment(out_ptr, in_ptr + lpos, spos - lpos); + out_ptr = copy_string(out_ptr, d_repl); + lpos = spos + d_tgt.size_bytes(); + } + spos += d_tgt.size_bytes() - 1; + break; + } + } + ++spos; + } + if (out_ptr) // copy remainder + memcpy(out_ptr, in_ptr + lpos, d_str.size_bytes() - lpos); + else + d_offsets[idx] = bytes; + } +}; + +std::unique_ptr replace_string_parallel(strings_column_view const& input, + strings_column_view const& targets, + strings_column_view const& repls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto d_strings = column_device_view::create(input.parent(), stream); + auto d_targets = column_device_view::create(targets.parent(), stream); + auto d_replacements = column_device_view::create(repls.parent(), stream); + + auto children = cudf::strings::detail::make_strings_children( + replace_multi_fn{*d_strings, *d_targets, *d_replacements}, input.size(), stream, mr); + + return make_strings_column(input.size(), + std::move(children.first), + std::move(children.second), + input.null_count(), + cudf::detail::copy_bitmask(input.parent(), stream, mr)); +} + +} // namespace + +std::unique_ptr replace(strings_column_view const& input, + strings_column_view const& targets, + strings_column_view const& repls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + if (input.is_empty()) { return make_empty_column(type_id::STRING); } + CUDF_EXPECTS(((targets.size() > 0) && (targets.null_count() == 0)), + "Parameters targets must not be empty and must not have nulls"); + CUDF_EXPECTS(((repls.size() > 0) && (repls.null_count() == 0)), + "Parameters repls must not be empty and must not have nulls"); + if (repls.size() > 1) + CUDF_EXPECTS(repls.size() == targets.size(), "Sizes for targets and repls must match"); + + return (input.size() == input.null_count() || + ((input.chars_size() / (input.size() - input.null_count())) < AVG_CHAR_BYTES_THRESHOLD)) + ? replace_string_parallel(input, targets, repls, stream, mr) + : replace_character_parallel(input, targets, repls, stream, mr); +} + +} // namespace detail + +// external API + +std::unique_ptr replace(strings_column_view const& strings, + strings_column_view const& targets, + strings_column_view const& repls, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::replace(strings, targets, repls, cudf::get_default_stream(), mr); +} + +} // namespace strings +} // namespace cudf diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu index f3bc7fc82ec..50b2dc27671 100644 --- a/cpp/src/strings/replace/multi_re.cu +++ b/cpp/src/strings/replace/multi_re.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -169,7 +169,8 @@ std::unique_ptr replace_re(strings_column_view const& input, prog->set_working_memory(d_buffer, size); return *prog; }); - auto d_progs = cudf::detail::make_device_uvector_async(progs, stream); + auto d_progs = + cudf::detail::make_device_uvector_async(progs, stream, rmm::mr::get_current_device_resource()); auto const d_strings = column_device_view::create(input.parent(), stream); auto const d_repls = column_device_view::create(replacements.parent(), stream); diff --git a/cpp/src/strings/replace/replace.cu b/cpp/src/strings/replace/replace.cu index d1a377a4bda..3fc969a4c1f 100644 --- a/cpp/src/strings/replace/replace.cu +++ b/cpp/src/strings/replace/replace.cu @@ -704,92 +704,6 @@ std::unique_ptr replace_slice(strings_column_view const& strings, cudf::detail::copy_bitmask(strings.parent(), stream, mr)); } -namespace { -/** - * @brief Function logic for the replace_multi API. - * - * This will perform the multi-replace operation on each string. - */ -struct replace_multi_fn { - column_device_view const d_strings; - column_device_view const d_targets; - column_device_view const d_repls; - int32_t* d_offsets{}; - char* d_chars{}; - - __device__ void operator()(size_type idx) - { - if (d_strings.is_null(idx)) { - if (!d_chars) d_offsets[idx] = 0; - return; - } - auto const d_str = d_strings.element(idx); - char const* in_ptr = d_str.data(); - - size_type bytes = d_str.size_bytes(); - size_type spos = 0; - size_type lpos = 0; - char* out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr; - - // check each character against each target - while (spos < d_str.size_bytes()) { - for (int tgt_idx = 0; tgt_idx < d_targets.size(); ++tgt_idx) { - auto const d_tgt = d_targets.element(tgt_idx); - if ((d_tgt.size_bytes() <= (d_str.size_bytes() - spos)) && // check fit - (d_tgt.compare(in_ptr + spos, d_tgt.size_bytes()) == 0)) // and match - { - auto const d_repl = (d_repls.size() == 1) ? d_repls.element(0) - : d_repls.element(tgt_idx); - bytes += d_repl.size_bytes() - d_tgt.size_bytes(); - if (out_ptr) { - out_ptr = copy_and_increment(out_ptr, in_ptr + lpos, spos - lpos); - out_ptr = copy_string(out_ptr, d_repl); - lpos = spos + d_tgt.size_bytes(); - } - spos += d_tgt.size_bytes() - 1; - break; - } - } - ++spos; - } - if (out_ptr) // copy remainder - memcpy(out_ptr, in_ptr + lpos, d_str.size_bytes() - lpos); - else - d_offsets[idx] = bytes; - } -}; - -} // namespace - -std::unique_ptr replace(strings_column_view const& strings, - strings_column_view const& targets, - strings_column_view const& repls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - if (strings.is_empty()) return make_empty_column(type_id::STRING); - CUDF_EXPECTS(((targets.size() > 0) && (targets.null_count() == 0)), - "Parameters targets must not be empty and must not have nulls"); - CUDF_EXPECTS(((repls.size() > 0) && (repls.null_count() == 0)), - "Parameters repls must not be empty and must not have nulls"); - if (repls.size() > 1) - CUDF_EXPECTS(repls.size() == targets.size(), "Sizes for targets and repls must match"); - - auto d_strings = column_device_view::create(strings.parent(), stream); - auto d_targets = column_device_view::create(targets.parent(), stream); - auto d_repls = column_device_view::create(repls.parent(), stream); - - // this utility calls the given functor to build the offsets and chars columns - auto children = cudf::strings::detail::make_strings_children( - replace_multi_fn{*d_strings, *d_targets, *d_repls}, strings.size(), stream, mr); - - return make_strings_column(strings.size(), - std::move(children.first), - std::move(children.second), - strings.null_count(), - cudf::detail::copy_bitmask(strings.parent(), stream, mr)); -} - std::unique_ptr replace_nulls(strings_column_view const& strings, string_scalar const& repl, rmm::cuda_stream_view stream, @@ -854,14 +768,5 @@ std::unique_ptr replace_slice(strings_column_view const& strings, return detail::replace_slice(strings, repl, start, stop, cudf::get_default_stream(), mr); } -std::unique_ptr replace(strings_column_view const& strings, - strings_column_view const& targets, - strings_column_view const& repls, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::replace(strings, targets, repls, cudf::get_default_stream(), mr); -} - } // namespace strings } // namespace cudf diff --git a/cpp/src/strings/translate.cu b/cpp/src/strings/translate.cu index 7f134059ded..e7b637c52f3 100644 --- a/cpp/src/strings/translate.cu +++ b/cpp/src/strings/translate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -106,7 +106,7 @@ std::unique_ptr translate(strings_column_view const& strings, }); // copy translate table to device memory rmm::device_uvector table = - cudf::detail::make_device_uvector_async(htable, stream); + cudf::detail::make_device_uvector_async(htable, stream, rmm::mr::get_current_device_resource()); auto d_strings = column_device_view::create(strings.parent(), stream); diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 8a63a6f6411..6997de18be5 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -25,6 +25,8 @@ #include #include +#include + #include namespace cudf { @@ -268,7 +270,8 @@ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) dremel_device_views.push_back(dremel_data.back()); } } - auto d_dremel_device_views = detail::make_device_uvector_sync(dremel_device_views, stream); + auto d_dremel_device_views = detail::make_device_uvector_sync( + dremel_device_views, stream, rmm::mr::get_current_device_resource()); return std::make_tuple(std::move(dremel_data), std::move(d_dremel_device_views)); } @@ -333,7 +336,7 @@ void check_shape_compatibility(table_view const& lhs, table_view const& rhs) CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Cannot compare tables with different number of columns"); for (size_type i = 0; i < lhs.num_columns(); ++i) { - CUDF_EXPECTS(column_types_equal(lhs.column(i), rhs.column(i)), + CUDF_EXPECTS(column_types_equivalent(lhs.column(i), rhs.column(i)), "Cannot compare tables with different column types"); } } @@ -355,10 +358,13 @@ std::shared_ptr preprocessed_table::create( auto [verticalized_lhs, new_column_order, new_null_precedence, verticalized_col_depths] = decompose_structs(t, column_order, null_precedence); - auto d_t = table_device_view::create(verticalized_lhs, stream); - auto d_column_order = detail::make_device_uvector_async(new_column_order, stream); - auto d_null_precedence = detail::make_device_uvector_async(new_null_precedence, stream); - auto d_depths = detail::make_device_uvector_async(verticalized_col_depths, stream); + auto d_t = table_device_view::create(verticalized_lhs, stream); + auto d_column_order = detail::make_device_uvector_async( + new_column_order, stream, rmm::mr::get_current_device_resource()); + auto d_null_precedence = detail::make_device_uvector_async( + new_null_precedence, stream, rmm::mr::get_current_device_resource()); + auto d_depths = detail::make_device_uvector_async( + verticalized_col_depths, stream, rmm::mr::get_current_device_resource()); if (detail::has_nested_columns(t)) { auto [dremel_data, d_dremel_device_view] = list_lex_preprocess(verticalized_lhs, stream); @@ -397,9 +403,10 @@ std::shared_ptr preprocessed_table::create(table_view const& { check_eq_compatibility(t); - auto [null_pushed_table, nullable_data] = structs::detail::push_down_nulls(t, stream); - auto struct_offset_removed_table = remove_struct_child_offsets(null_pushed_table); - auto verticalized_t = std::get<0>(decompose_structs(struct_offset_removed_table)); + auto [null_pushed_table, nullable_data] = + structs::detail::push_down_nulls(t, stream, rmm::mr::get_current_device_resource()); + auto struct_offset_removed_table = remove_struct_child_offsets(null_pushed_table); + auto verticalized_t = std::get<0>(decompose_structs(struct_offset_removed_table)); auto d_t = table_device_view_owner(table_device_view::create(verticalized_t, stream)); return std::shared_ptr(new preprocessed_table( diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu index 1d5a738f8ce..8039729d749 100644 --- a/cpp/src/text/generate_ngrams.cu +++ b/cpp/src/text/generate_ngrams.cu @@ -108,7 +108,8 @@ std::unique_ptr generate_ngrams(cudf::strings_column_view const& s if (d_strings.is_null(idx)) return false; return !d_strings.element(idx).empty(); }, - stream) + stream, + rmm::mr::get_current_device_resource()) ->release(); strings_count = table_offsets.front()->size() - 1; auto result = std::move(table_offsets.front()); diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu index 634fdd70831..b982a010e6e 100644 --- a/cpp/src/transform/row_bit_count.cu +++ b/cpp/src/transform/row_bit_count.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -500,7 +500,8 @@ std::unique_ptr row_bit_count(table_view const& t, auto d_cols = contiguous_copy_column_device_views(cols, stream); // move stack info to the gpu - rmm::device_uvector d_info = cudf::detail::make_device_uvector_async(info, stream); + rmm::device_uvector d_info = + cudf::detail::make_device_uvector_async(info, stream, rmm::mr::get_current_device_resource()); // each thread needs to maintain a stack of row spans of size max_branch_depth. we will use // shared memory to do this rather than allocating a potentially gigantic temporary buffer diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu index b569ce04c31..6e19fc2ca3f 100644 --- a/cpp/src/unary/cast_ops.cu +++ b/cpp/src/unary/cast_ops.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -328,7 +328,7 @@ struct dispatch_unary_cast_to { auto output = std::make_unique(cudf::data_type{type.id(), input.type().scale()}, size, rmm::device_buffer{size * cudf::size_of(type), stream}, - copy_bitmask(input, stream), + copy_bitmask(input, stream, mr), input.null_count()); mutable_column_view output_mutable = *output; diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp index d297148de45..d6f5c65593a 100644 --- a/cpp/src/utilities/type_checks.cpp +++ b/cpp/src/utilities/type_checks.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,4 +69,10 @@ bool column_types_equal(column_view const& lhs, column_view const& rhs) return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs); } +bool column_types_equivalent(column_view const& lhs, column_view const& rhs) +{ + if (lhs.type().id() != rhs.type().id()) { return false; } + return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs); +} + } // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 7c021a73eb5..bd4077aff4e 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -13,12 +13,32 @@ # ============================================================================= # ################################################################################################## -# * compiler function ----------------------------------------------------------------------------- +# enable testing ################################################################################ +# ################################################################################################## +enable_testing() + +include(rapids-test) +rapids_test_init() # This function takes in a test name and test source and handles setting all of the associated # properties and linking to build the test function(ConfigureTest CMAKE_TEST_NAME) - add_executable(${CMAKE_TEST_NAME} ${ARGN}) + set(options) + set(one_value GPUS PERCENT) + set(multi_value) + cmake_parse_arguments(_CUDF_TEST "${options}" "${one_value}" "${multi_value}" ${ARGN}) + if(NOT DEFINED _CUDF_TEST_GPUS AND NOT DEFINED _CUDF_TEST_PERCENT) + set(_CUDF_TEST_GPUS 1) + set(_CUDF_TEST_PERCENT 15) + endif() + if(NOT DEFINED _CUDF_TEST_GPUS) + set(_CUDF_TEST_GPUS 1) + endif() + if(NOT DEFINED _CUDF_TEST_PERCENT) + set(_CUDF_TEST_PERCENT 100) + endif() + + add_executable(${CMAKE_TEST_NAME} ${_CUDF_TEST_UNPARSED_ARGUMENTS}) set_target_properties( ${CMAKE_TEST_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$" @@ -35,12 +55,12 @@ function(ConfigureTest CMAKE_TEST_NAME) ${CMAKE_TEST_NAME} PRIVATE cudftestutil GTest::gmock_main GTest::gtest_main $ ) - add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME}) - install( - TARGETS ${CMAKE_TEST_NAME} - COMPONENT testing - DESTINATION bin/gtests/libcudf - EXCLUDE_FROM_ALL + rapids_test_add( + NAME ${CMAKE_TEST_NAME} + COMMAND ${CMAKE_TEST_NAME} + GPUS ${_CUDF_TEST_GPUS} + PERCENT ${_CUDF_TEST_PERCENT} + INSTALL_COMPONENT_SET testing ) endfunction() @@ -112,6 +132,8 @@ ConfigureTest( groupby/sum_tests.cpp groupby/tdigest_tests.cu groupby/var_tests.cpp + GPUS 1 + PERCENT 100 ) # ################################################################################################## @@ -138,6 +160,8 @@ ConfigureTest(HASHING_TEST hashing/hash_test.cpp) ConfigureTest( PARTITIONING_TEST partitioning/hash_partition_test.cpp partitioning/round_robin_test.cpp partitioning/partition_test.cpp + GPUS 1 + PERCENT 70 ) # ################################################################################################## @@ -149,6 +173,8 @@ ConfigureTest(HASH_MAP_TEST hash_map/map_test.cu) ConfigureTest( QUANTILES_TEST quantiles/percentile_approx_test.cpp quantiles/quantile_test.cpp quantiles/quantiles_test.cpp + GPUS 1 + PERCENT 70 ) # ################################################################################################## @@ -162,6 +188,8 @@ ConfigureTest( reductions/segmented_reduction_tests.cpp reductions/list_rank_test.cpp reductions/tdigest_tests.cu + GPUS 1 + PERCENT 70 ) # ################################################################################################## @@ -221,17 +249,41 @@ ConfigureTest( # * io tests -------------------------------------------------------------------------------------- ConfigureTest(DECOMPRESSION_TEST io/comp/decomp_test.cpp) -ConfigureTest(CSV_TEST io/csv_test.cpp) -ConfigureTest(FILE_IO_TEST io/file_io_test.cpp) -ConfigureTest(ORC_TEST io/orc_test.cpp) -ConfigureTest(PARQUET_TEST io/parquet_test.cpp io/parquet_chunked_reader_test.cpp) -ConfigureTest(JSON_TEST io/json_test.cpp io/json_chunked_reader.cpp) +ConfigureTest( + CSV_TEST io/csv_test.cpp + GPUS 1 + PERCENT 30 +) +ConfigureTest( + FILE_IO_TEST io/file_io_test.cpp + GPUS 1 + PERCENT 30 +) +ConfigureTest( + ORC_TEST io/orc_test.cpp + GPUS 1 + PERCENT 30 +) +ConfigureTest( + PARQUET_TEST io/parquet_test.cpp io/parquet_chunked_reader_test.cpp + GPUS 1 + PERCENT 30 +) +ConfigureTest( + JSON_TEST io/json_test.cpp io/json_chunked_reader.cpp + GPUS 1 + PERCENT 30 +) ConfigureTest(JSON_WRITER_TEST io/json_writer.cpp) ConfigureTest(JSON_TYPE_CAST_TEST io/json_type_cast_test.cu) ConfigureTest(NESTED_JSON_TEST io/nested_json_test.cpp io/json_tree.cpp) ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp) ConfigureTest(MULTIBYTE_SPLIT_TEST io/text/multibyte_split_test.cpp) -ConfigureTest(DATA_CHUNK_SOURCE_TEST io/text/data_chunk_source_test.cpp) +ConfigureTest( + DATA_CHUNK_SOURCE_TEST io/text/data_chunk_source_test.cpp + GPUS 1 + PERCENT 30 +) target_link_libraries(DATA_CHUNK_SOURCE_TEST PRIVATE ZLIB::ZLIB) ConfigureTest(LOGICAL_STACK_TEST io/fst/logical_stack_test.cu) ConfigureTest(FST_TEST io/fst/fst_test.cu) @@ -245,6 +297,8 @@ endif() ConfigureTest( SORT_TEST sort/segmented_sort_tests.cpp sort/sort_test.cpp sort/stable_sort_tests.cpp sort/rank_test.cpp + GPUS 1 + PERCENT 70 ) # ################################################################################################## @@ -275,6 +329,8 @@ ConfigureTest( copying/split_tests.cpp copying/utility_tests.cpp copying/reverse_tests.cpp + GPUS 1 + PERCENT 70 ) # ################################################################################################## @@ -318,7 +374,11 @@ ConfigureTest(DEVICE_ATOMICS_TEST device_atomics/device_atomics_test.cu) # ################################################################################################## # * transpose tests ------------------------------------------------------------------------------- -ConfigureTest(TRANSPOSE_TEST transpose/transpose_test.cpp) +ConfigureTest( + TRANSPOSE_TEST transpose/transpose_test.cpp + GPUS 1 + PERCENT 70 +) # ################################################################################################## # * table tests ----------------------------------------------------------------------------------- @@ -359,6 +419,8 @@ ConfigureTest( rolling/range_rolling_window_test.cpp rolling/range_window_bounds_test.cpp rolling/rolling_test.cpp + GPUS 1 + PERCENT 70 ) # ################################################################################################## @@ -504,6 +566,8 @@ ConfigureTest( lists/sort_lists_tests.cpp lists/stream_compaction/apply_boolean_mask_tests.cpp lists/stream_compaction/distinct_tests.cpp + GPUS 1 + PERCENT 70 ) # ################################################################################################## @@ -519,12 +583,11 @@ ConfigureTest( # tests by manually invoking the executable, so we'll have to manually pass this environment # variable in that setup. set_tests_properties( - STREAM_IDENTIFICATION_TEST PROPERTIES ENVIRONMENT - LD_PRELOAD=$ + STREAM_IDENTIFICATION_TEST + PROPERTIES ENVIRONMENT LD_PRELOAD=$ ) # ################################################################################################## -# enable testing ################################################################################ +# Install tests #################################################################################### # ################################################################################################## - -enable_testing() +rapids_test_install_relocatable(INSTALL_COMPONENT_SET testing DESTINATION bin/gtests/libcudf) diff --git a/cpp/tests/bitmask/bitmask_tests.cpp b/cpp/tests/bitmask/bitmask_tests.cpp index 00ec7bd218b..7805828ad55 100644 --- a/cpp/tests/bitmask/bitmask_tests.cpp +++ b/cpp/tests/bitmask/bitmask_tests.cpp @@ -87,7 +87,7 @@ rmm::device_uvector make_mask(cudf::size_type size, bool fil { if (!fill_valid) { return cudf::detail::make_zeroed_device_uvector_sync( - size, cudf::get_default_stream()); + size, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); } else { auto ret = rmm::device_uvector(size, cudf::get_default_stream()); CUDF_CUDA_TRY(cudaMemsetAsync(ret.data(), diff --git a/cpp/tests/bitmask/valid_if_tests.cu b/cpp/tests/bitmask/valid_if_tests.cu index cdc453be8e4..cb086cda179 100644 --- a/cpp/tests/bitmask/valid_if_tests.cu +++ b/cpp/tests/bitmask/valid_if_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,8 @@ TEST_F(ValidIfTest, EmptyRange) auto actual = cudf::detail::valid_if(thrust::make_counting_iterator(0), thrust::make_counting_iterator(0), odds_valid{}, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); auto const& buffer = actual.first; EXPECT_EQ(0u, buffer.size()); EXPECT_EQ(nullptr, buffer.data()); @@ -55,7 +56,8 @@ TEST_F(ValidIfTest, InvalidRange) EXPECT_THROW(cudf::detail::valid_if(thrust::make_counting_iterator(1), thrust::make_counting_iterator(0), odds_valid{}, - cudf::get_default_stream()), + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()), cudf::logic_error); } @@ -66,7 +68,8 @@ TEST_F(ValidIfTest, OddsValid) auto actual = cudf::detail::valid_if(thrust::make_counting_iterator(0), thrust::make_counting_iterator(10000), odds_valid{}, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.data(), actual.first.data(), expected.size()); EXPECT_EQ(5000, actual.second); } @@ -78,7 +81,8 @@ TEST_F(ValidIfTest, AllValid) auto actual = cudf::detail::valid_if(thrust::make_counting_iterator(0), thrust::make_counting_iterator(10000), all_valid{}, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.data(), actual.first.data(), expected.size()); EXPECT_EQ(0, actual.second); } @@ -90,7 +94,8 @@ TEST_F(ValidIfTest, AllNull) auto actual = cudf::detail::valid_if(thrust::make_counting_iterator(0), thrust::make_counting_iterator(10000), all_null{}, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.data(), actual.first.data(), expected.size()); EXPECT_EQ(10000, actual.second); } diff --git a/cpp/tests/copying/detail_gather_tests.cu b/cpp/tests/copying/detail_gather_tests.cu index bf2937ae8ab..aae511413ef 100644 --- a/cpp/tests/copying/detail_gather_tests.cu +++ b/cpp/tests/copying/detail_gather_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,7 +58,12 @@ TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest) // test with device vector iterators { std::unique_ptr result = - cudf::detail::gather(source_table, gather_map.begin(), gather_map.end()); + cudf::detail::gather(source_table, + gather_map.begin(), + gather_map.end(), + cudf::out_of_bounds_policy::DONT_CHECK, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); for (auto i = 0; i < source_table.num_columns(); ++i) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(source_table.column(i), result->view().column(i)); @@ -70,7 +75,12 @@ TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest) // test with raw pointers { std::unique_ptr result = - cudf::detail::gather(source_table, gather_map.data(), gather_map.data() + gather_map.size()); + cudf::detail::gather(source_table, + gather_map.begin(), + gather_map.data() + gather_map.size(), + cudf::out_of_bounds_policy::DONT_CHECK, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); for (auto i = 0; i < source_table.num_columns(); ++i) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(source_table.column(i), result->view().column(i)); @@ -97,7 +107,8 @@ TYPED_TEST(GatherTest, GatherDetailInvalidIndexTest) gather_map, cudf::out_of_bounds_policy::NULLIFY, cudf::detail::negative_index_policy::NOT_ALLOWED, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); auto expect_data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 2) ? 0 : i; }); diff --git a/cpp/tests/copying/gather_str_tests.cpp b/cpp/tests/copying/gather_str_tests.cpp index 3db2ce399cc..7810566fbf1 100644 --- a/cpp/tests/copying/gather_str_tests.cpp +++ b/cpp/tests/copying/gather_str_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,8 @@ #include #include +#include + class GatherTestStr : public cudf::test::BaseFixture { }; @@ -87,7 +89,8 @@ TEST_F(GatherTestStr, Gather) gather_map, cudf::out_of_bounds_policy::NULLIFY, cudf::detail::negative_index_policy::NOT_ALLOWED, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); std::vector h_expected; std::vector expected_validity; @@ -118,7 +121,8 @@ TEST_F(GatherTestStr, GatherDontCheckOutOfBounds) gather_map, cudf::out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); std::vector h_expected; for (auto itr = h_map.begin(); itr != h_map.end(); ++itr) { @@ -137,7 +141,8 @@ TEST_F(GatherTestStr, GatherEmptyMapStringsColumn) gather_map, cudf::out_of_bounds_policy::NULLIFY, cudf::detail::negative_index_policy::NOT_ALLOWED, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); cudf::test::expect_column_empty(results->get_column(0).view()); } @@ -151,6 +156,7 @@ TEST_F(GatherTestStr, GatherZeroSizeStringsColumn) gather_map, cudf::out_of_bounds_policy::NULLIFY, cudf::detail::negative_index_policy::NOT_ALLOWED, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, results->get_column(0).view()); } diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp index 1c51eab1f94..a35bbab0176 100644 --- a/cpp/tests/copying/get_value_tests.cpp +++ b/cpp/tests/copying/get_value_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -812,7 +812,7 @@ TYPED_TEST(StructGetValueTestTyped, mixed_types_valid) // col fields cudf::test::fixed_width_column_wrapper f1{1, 2, 3}; cudf::test::strings_column_wrapper f2{"aa", "bbb", "c"}; - cudf::test::dictionary_column_wrapper f3{42, 42, 24}; + cudf::test::dictionary_column_wrapper f3{42, 42, 24}; LCW f4{LCW{8, 8, 8}, LCW{9, 9}, LCW{10}}; cudf::test::structs_column_wrapper col{f1, f2, f3, f4}; @@ -824,7 +824,7 @@ TYPED_TEST(StructGetValueTestTyped, mixed_types_valid) // expect fields cudf::test::fixed_width_column_wrapper ef1{3}; cudf::test::strings_column_wrapper ef2{"c"}; - cudf::test::dictionary_column_wrapper ef3{24}; + cudf::test::dictionary_column_wrapper ef3{24}; LCW ef4{LCW{10}}; cudf::table_view expect_data{{ef1, ef2, ef3, ef4}}; diff --git a/cpp/tests/device_atomics/device_atomics_test.cu b/cpp/tests/device_atomics/device_atomics_test.cu index 43874b84114..5694513647b 100644 --- a/cpp/tests/device_atomics/device_atomics_test.cu +++ b/cpp/tests/device_atomics/device_atomics_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -141,9 +141,10 @@ struct AtomicsTest : public cudf::test::BaseFixture { result_init[4] = result_init[1]; result_init[5] = result_init[2]; - auto dev_data = cudf::detail::make_device_uvector_sync(v, cudf::get_default_stream()); - auto dev_result = - cudf::detail::make_device_uvector_sync(result_init, cudf::get_default_stream()); + auto dev_data = cudf::detail::make_device_uvector_sync( + v, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto dev_result = cudf::detail::make_device_uvector_sync( + result_init, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); if (block_size == 0) { block_size = vec_size; } @@ -294,8 +295,10 @@ struct AtomicsBitwiseOpTest : public cudf::test::BaseFixture { exact[2] = std::accumulate( v.begin(), v.end(), identity[2], [](T acc, uint64_t i) { return acc ^ T(i); }); - auto dev_result = cudf::detail::make_device_uvector_sync(identity, cudf::get_default_stream()); - auto dev_data = cudf::detail::make_device_uvector_sync(v, cudf::get_default_stream()); + auto dev_result = cudf::detail::make_device_uvector_sync( + identity, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto dev_data = cudf::detail::make_device_uvector_sync( + v, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); if (block_size == 0) { block_size = vec_size; } diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu index eb4a3e895f9..5b842322681 100644 --- a/cpp/tests/error/error_handling_test.cu +++ b/cpp/tests/error/error_handling_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,8 @@ */ #include +#include +#include #include #include @@ -125,10 +127,14 @@ int main(int argc, char** argv) ::testing::InitGoogleTest(&argc, argv); auto const cmd_opts = parse_cudf_test_opts(argc, argv); auto const stream_mode = cmd_opts["stream_mode"].as(); - if (stream_mode == "custom") { - auto resource = rmm::mr::get_current_device_resource(); - auto adapter = make_stream_checking_resource_adaptor(resource); - rmm::mr::set_current_device_resource(&adapter); + if ((stream_mode == "new_cudf_default") || (stream_mode == "new_testing_default")) { + auto resource = rmm::mr::get_current_device_resource(); + auto const stream_error_mode = cmd_opts["stream_error_mode"].as(); + auto const error_on_invalid_stream = (stream_error_mode == "error"); + auto const check_default_stream = (stream_mode == "new_cudf_default"); + auto adaptor = make_stream_checking_resource_adaptor( + resource, error_on_invalid_stream, check_default_stream); + rmm::mr::set_current_device_resource(&adaptor); } return RUN_ALL_TESTS(); } diff --git a/cpp/tests/filling/sequence_tests.cpp b/cpp/tests/filling/sequence_tests.cpp index 383a69affa1..1f55cdf3df0 100644 --- a/cpp/tests/filling/sequence_tests.cpp +++ b/cpp/tests/filling/sequence_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,6 @@ * limitations under the License. */ -#include - #include #include #include @@ -23,11 +21,7 @@ #include #include -#include -#include - -using namespace cudf; -using namespace cudf::test; +#include template class SequenceTypedTestFixture : public cudf::test::BaseFixture { @@ -44,13 +38,13 @@ TYPED_TEST(SequenceTypedTestFixture, Incrementing) { using T = TypeParam; - numeric_scalar init(0); - numeric_scalar step(1); + cudf::numeric_scalar init(0); + cudf::numeric_scalar step(1); - size_type num_els = 10; + cudf::size_type num_els = 10; T expected[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - fixed_width_column_wrapper expected_w(expected, expected + num_els); + cudf::test::fixed_width_column_wrapper expected_w(expected, expected + num_els); auto result = cudf::sequence(num_els, init, step); @@ -61,13 +55,13 @@ TYPED_TEST(SequenceTypedTestFixture, Decrementing) { using T = TypeParam; - numeric_scalar init(0); - numeric_scalar step(-5); + cudf::numeric_scalar init(0); + cudf::numeric_scalar step(-5); - size_type num_els = 10; + cudf::size_type num_els = 10; T expected[] = {0, -5, -10, -15, -20, -25, -30, -35, -40, -45}; - fixed_width_column_wrapper expected_w(expected, expected + num_els); + cudf::test::fixed_width_column_wrapper expected_w(expected, expected + num_els); auto result = cudf::sequence(num_els, init, step); @@ -78,13 +72,13 @@ TYPED_TEST(SequenceTypedTestFixture, EmptyOutput) { using T = TypeParam; - numeric_scalar init(0); - numeric_scalar step(-5); + cudf::numeric_scalar init(0); + cudf::numeric_scalar step(-5); - size_type num_els = 0; + cudf::size_type num_els = 0; T expected[] = {}; - fixed_width_column_wrapper expected_w(expected, expected + num_els); + cudf::test::fixed_width_column_wrapper expected_w(expected, expected + num_els); auto result = cudf::sequence(num_els, init, step); @@ -93,31 +87,31 @@ TYPED_TEST(SequenceTypedTestFixture, EmptyOutput) TEST_F(SequenceTestFixture, BadTypes) { - string_scalar string_init("zero"); - string_scalar string_step("???"); + cudf::string_scalar string_init("zero"); + cudf::string_scalar string_step("???"); EXPECT_THROW(cudf::sequence(10, string_init, string_step), cudf::logic_error); - numeric_scalar bool_init(true); - numeric_scalar bool_step(false); + cudf::numeric_scalar bool_init(true); + cudf::numeric_scalar bool_step(false); EXPECT_THROW(cudf::sequence(10, bool_init, bool_step), cudf::logic_error); - timestamp_scalar ts_init(duration_s{10}, true); - timestamp_scalar ts_step(duration_s{10}, true); + cudf::timestamp_scalar ts_init(cudf::duration_s{10}, true); + cudf::timestamp_scalar ts_step(cudf::duration_s{10}, true); EXPECT_THROW(cudf::sequence(10, ts_init, ts_step), cudf::logic_error); } TEST_F(SequenceTestFixture, MismatchedInputs) { - numeric_scalar init(0); - numeric_scalar step(-5); + cudf::numeric_scalar init(0); + cudf::numeric_scalar step(-5); EXPECT_THROW(cudf::sequence(10, init, step), cudf::logic_error); - numeric_scalar init2(0); - numeric_scalar step2(-5); + cudf::numeric_scalar init2(0); + cudf::numeric_scalar step2(-5); EXPECT_THROW(cudf::sequence(10, init2, step2), cudf::logic_error); - numeric_scalar init3(0); - numeric_scalar step3(-5); + cudf::numeric_scalar init3(0); + cudf::numeric_scalar step3(-5); EXPECT_THROW(cudf::sequence(10, init3, step3), cudf::logic_error); } @@ -125,12 +119,12 @@ TYPED_TEST(SequenceTypedTestFixture, DefaultStep) { using T = TypeParam; - numeric_scalar init(0); + cudf::numeric_scalar init(0); - size_type num_els = 10; + cudf::size_type num_els = 10; T expected[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - fixed_width_column_wrapper expected_w(expected, expected + num_els); + cudf::test::fixed_width_column_wrapper expected_w(expected, expected + num_els); auto result = cudf::sequence(num_els, init); @@ -140,11 +134,11 @@ TYPED_TEST(SequenceTypedTestFixture, DefaultStep) TEST_F(SequenceTestFixture, DateSequenceBasic) { // Timestamp generated using https://www.epochconverter.com/ - timestamp_scalar init(1629852896L, true); // 2021-08-25 00:54:56 GMT - size_type size{5}; - size_type months{1}; + cudf::timestamp_scalar init(1629852896L, true); // 2021-08-25 00:54:56 GMT + cudf::size_type size{5}; + cudf::size_type months{1}; - fixed_width_column_wrapper expected{ + cudf::test::fixed_width_column_wrapper expected{ 1629852896L, // 2021-08-25 00:54:56 GMT 1632531296L, // 2021-09-25 00:54:56 GMT 1635123296L, // 2021-10-25 00:54:56 GMT @@ -160,11 +154,11 @@ TEST_F(SequenceTestFixture, DateSequenceBasic) TEST_F(SequenceTestFixture, DateSequenceLeapYear) { // Timestamp generated using https://www.epochconverter.com/ - timestamp_scalar init(951876379L, true); // 2000-02-29 02:06:19 GMT - size_type size{5}; - size_type months{12}; + cudf::timestamp_scalar init(951876379L, true); // 2000-02-29 02:06:19 GMT + cudf::size_type size{5}; + cudf::size_type months{12}; - fixed_width_column_wrapper expected{ + cudf::test::fixed_width_column_wrapper expected{ 951876379L, // 2000-02-29 02:06:19 GMT Leap Year 983412379L, // 2001-02-28 02:06:19 GMT 1014948379L, // 2002-02-28 02:06:19 GMT @@ -179,9 +173,9 @@ TEST_F(SequenceTestFixture, DateSequenceLeapYear) TEST_F(SequenceTestFixture, DateSequenceBadTypes) { - numeric_scalar init(951876379, true); - size_type size = 5; - size_type months = 12; + cudf::numeric_scalar init(951876379, true); + cudf::size_type size = 5; + cudf::size_type months = 12; EXPECT_THROW(calendrical_month_sequence(size, init, months), cudf::logic_error); } diff --git a/cpp/tests/fixed_point/fixed_point_tests.cu b/cpp/tests/fixed_point/fixed_point_tests.cu index ab9970dc370..9631e433a5e 100644 --- a/cpp/tests/fixed_point/fixed_point_tests.cu +++ b/cpp/tests/fixed_point/fixed_point_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -83,7 +83,8 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice) using decimal32 = fixed_point; std::vector vec1(1000, decimal32{1, scale_type{-2}}); - auto d_vec1 = cudf::detail::make_device_uvector_sync(vec1, cudf::get_default_stream()); + auto d_vec1 = cudf::detail::make_device_uvector_sync( + vec1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const sum = thrust::reduce(rmm::exec_policy(cudf::get_default_stream()), std::cbegin(d_vec1), @@ -96,7 +97,8 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice) // change inclusive scan to run on device (avoid copying to host) thrust::inclusive_scan(std::cbegin(vec1), std::cend(vec1), std::begin(vec1)); - d_vec1 = cudf::detail::make_device_uvector_sync(vec1, cudf::get_default_stream()); + d_vec1 = cudf::detail::make_device_uvector_sync( + vec1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); std::vector vec2(1000); std::iota(std::begin(vec2), std::end(vec2), 1); diff --git a/cpp/tests/groupby/tdigest_tests.cu b/cpp/tests/groupby/tdigest_tests.cu index d7446d4dabb..4052201b064 100644 --- a/cpp/tests/groupby/tdigest_tests.cu +++ b/cpp/tests/groupby/tdigest_tests.cu @@ -471,13 +471,16 @@ TEST_F(TDigestMergeTest, EmptyGroups) cudf::test::fixed_width_column_wrapper keys{0, 0, 0, 0, 0, 0, 0}; int const delta = 1000; - auto a = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream()); + auto a = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); auto b = cudf::type_dispatcher( static_cast(values_b).type(), tdigest_gen_grouped{}, keys, values_b, delta); - auto c = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream()); + auto c = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); auto d = cudf::type_dispatcher( static_cast(values_d).type(), tdigest_gen_grouped{}, keys, values_d, delta); - auto e = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream()); + auto e = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); std::vector cols; cols.push_back(*a); diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp index d2b159fc208..3f4d5bcf20f 100644 --- a/cpp/tests/interop/from_arrow_test.cpp +++ b/cpp/tests/interop/from_arrow_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -264,7 +264,7 @@ TEST_F(FromArrowTest, DictionaryIndicesType) auto arrow_table = arrow::Table::Make(schema, {array1, array2, array3}); std::vector> columns; - auto col = cudf::test::fixed_width_column_wrapper({1, 2, 5, 2, 7}, {1, 0, 1, 1, 1}); + auto col = cudf::test::fixed_width_column_wrapper({1, 2, 5, 2, 7}, {1, 0, 1, 1, 1}); columns.emplace_back(std::move(cudf::dictionary::encode(col))); columns.emplace_back(std::move(cudf::dictionary::encode(col))); columns.emplace_back(std::move(cudf::dictionary::encode(col))); diff --git a/cpp/tests/io/json_tree.cpp b/cpp/tests/io/json_tree.cpp index c6b181fe8a1..0ae0360c4d9 100644 --- a/cpp/tests/io/json_tree.cpp +++ b/cpp/tests/io/json_tree.cpp @@ -586,11 +586,12 @@ TEST_F(JsonTest, TreeRepresentation) cudf::io::json_reader_options const options{}; // Parse the JSON and get the token stream - const auto [tokens_gpu, token_indices_gpu] = - cudf::io::json::detail::get_token_stream(d_input, options, stream); + const auto [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( + d_input, options, stream, rmm::mr::get_current_device_resource()); // Get the JSON's tree representation - auto gpu_tree = cuio_json::detail::get_tree_representation(tokens_gpu, token_indices_gpu, stream); + auto gpu_tree = cuio_json::detail::get_tree_representation( + tokens_gpu, token_indices_gpu, stream, rmm::mr::get_current_device_resource()); // host tree generation auto cpu_tree = get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream); compare_trees(cpu_tree, gpu_tree); @@ -672,11 +673,12 @@ TEST_F(JsonTest, TreeRepresentation2) cudf::io::json_reader_options const options{}; // Parse the JSON and get the token stream - const auto [tokens_gpu, token_indices_gpu] = - cudf::io::json::detail::get_token_stream(d_input, options, stream); + const auto [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( + d_input, options, stream, rmm::mr::get_current_device_resource()); // Get the JSON's tree representation - auto gpu_tree = cuio_json::detail::get_tree_representation(tokens_gpu, token_indices_gpu, stream); + auto gpu_tree = cuio_json::detail::get_tree_representation( + tokens_gpu, token_indices_gpu, stream, rmm::mr::get_current_device_resource()); // host tree generation auto cpu_tree = get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream); compare_trees(cpu_tree, gpu_tree); @@ -745,11 +747,12 @@ TEST_F(JsonTest, TreeRepresentation3) options.enable_lines(true); // Parse the JSON and get the token stream - const auto [tokens_gpu, token_indices_gpu] = - cudf::io::json::detail::get_token_stream(d_input, options, stream); + const auto [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( + d_input, options, stream, rmm::mr::get_current_device_resource()); // Get the JSON's tree representation - auto gpu_tree = cuio_json::detail::get_tree_representation(tokens_gpu, token_indices_gpu, stream); + auto gpu_tree = cuio_json::detail::get_tree_representation( + tokens_gpu, token_indices_gpu, stream, rmm::mr::get_current_device_resource()); // host tree generation auto cpu_tree = get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream); compare_trees(cpu_tree, gpu_tree); @@ -769,12 +772,13 @@ TEST_F(JsonTest, TreeRepresentationError) cudf::io::json_reader_options const options{}; // Parse the JSON and get the token stream - const auto [tokens_gpu, token_indices_gpu] = - cudf::io::json::detail::get_token_stream(d_input, options, stream); + const auto [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( + d_input, options, stream, rmm::mr::get_current_device_resource()); // Get the JSON's tree representation // This JSON is invalid and will raise an exception. - EXPECT_THROW(cuio_json::detail::get_tree_representation(tokens_gpu, token_indices_gpu, stream), + EXPECT_THROW(cuio_json::detail::get_tree_representation( + tokens_gpu, token_indices_gpu, stream, rmm::mr::get_current_device_resource()), cudf::logic_error); } @@ -851,8 +855,8 @@ TEST_P(JsonTreeTraversalTest, CPUvsGPUTraversal) static_cast(d_scalar.size())}; // Parse the JSON and get the token stream - const auto [tokens_gpu, token_indices_gpu] = - cudf::io::json::detail::get_token_stream(d_input, options, stream); + const auto [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( + d_input, options, stream, rmm::mr::get_current_device_resource()); // host tree generation auto cpu_tree = get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream); bool const is_array_of_arrays = @@ -864,15 +868,21 @@ TEST_P(JsonTreeTraversalTest, CPUvsGPUTraversal) auto [cpu_col_id, cpu_row_offsets] = records_orient_tree_traversal_cpu(input, cpu_tree, is_array_of_arrays, json_lines, stream); // gpu tree generation - auto gpu_tree = cuio_json::detail::get_tree_representation(tokens_gpu, token_indices_gpu, stream); + auto gpu_tree = cuio_json::detail::get_tree_representation( + tokens_gpu, token_indices_gpu, stream, rmm::mr::get_current_device_resource()); // Print tree representation if (std::getenv("NJP_DEBUG_DUMP") != nullptr) { printf("BEFORE traversal (gpu_tree):\n"); print_tree(gpu_tree); } // gpu tree traversal - auto [gpu_col_id, gpu_row_offsets] = cuio_json::detail::records_orient_tree_traversal( - d_input, gpu_tree, is_array_of_arrays, json_lines, stream); + auto [gpu_col_id, gpu_row_offsets] = + cuio_json::detail::records_orient_tree_traversal(d_input, + gpu_tree, + is_array_of_arrays, + json_lines, + stream, + rmm::mr::get_current_device_resource()); // Print tree representation if (std::getenv("NJP_DEBUG_DUMP") != nullptr) { printf("AFTER traversal (gpu_tree):\n"); diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/nested_json_test.cpp index 3c01bd4de25..5b797a00ca1 100644 --- a/cpp/tests/io/nested_json_test.cpp +++ b/cpp/tests/io/nested_json_test.cpp @@ -262,8 +262,8 @@ TEST_F(JsonTest, TokenStream) cudf::device_span{d_scalar.data(), static_cast(d_scalar.size())}; // Parse the JSON and get the token stream - auto [d_tokens_gpu, d_token_indices_gpu] = - cuio_json::detail::get_token_stream(d_input, default_options, stream); + auto [d_tokens_gpu, d_token_indices_gpu] = cuio_json::detail::get_token_stream( + d_input, default_options, stream, rmm::mr::get_current_device_resource()); // Copy back the number of tokens that were written thrust::host_vector const tokens_gpu = cudf::detail::make_host_vector_async(d_tokens_gpu, stream); @@ -398,8 +398,8 @@ TEST_F(JsonTest, TokenStream2) cudf::device_span{d_scalar.data(), static_cast(d_scalar.size())}; // Parse the JSON and get the token stream - auto [d_tokens_gpu, d_token_indices_gpu] = - cuio_json::detail::get_token_stream(d_input, default_options, stream); + auto [d_tokens_gpu, d_token_indices_gpu] = cuio_json::detail::get_token_stream( + d_input, default_options, stream, rmm::mr::get_current_device_resource()); // Copy back the number of tokens that were written thrust::host_vector const tokens_gpu = cudf::detail::make_host_vector_async(d_tokens_gpu, stream); @@ -470,7 +470,9 @@ TEST_P(JsonParserTest, ExtractColumn) std::string const input = R"( [{"a":0.0, "b":1.0}, {"a":0.1, "b":1.1}, {"a":0.2, "b":1.2}] )"; auto const d_input = cudf::detail::make_device_uvector_async( - cudf::host_span{input.c_str(), input.size()}, stream); + cudf::host_span{input.c_str(), input.size()}, + stream, + rmm::mr::get_current_device_resource()); // Get the JSON's tree representation auto const cudf_table = json_parser(d_input, default_options, stream, mr); @@ -508,7 +510,9 @@ TEST_P(JsonParserTest, UTF_JSON) {"a":1,"b":null,"c":null}, {"a":1,"b":Infinity,"c":[null], "d": {"year":-600,"author": "Kaniyan"}}])"; auto const d_ascii_pass = cudf::detail::make_device_uvector_sync( - cudf::host_span{ascii_pass.c_str(), ascii_pass.size()}, stream); + cudf::host_span{ascii_pass.c_str(), ascii_pass.size()}, + stream, + rmm::mr::get_current_device_resource()); CUDF_EXPECT_NO_THROW(json_parser(d_ascii_pass, default_options, stream, mr)); @@ -521,7 +525,9 @@ TEST_P(JsonParserTest, UTF_JSON) {"a":1,"b":null,"c":null}, {"a":1,"b":Infinity,"c":[null], "d": {"year":-600,"author": "filip ʒakotɛ"}}])"; auto const d_utf_failed = cudf::detail::make_device_uvector_sync( - cudf::host_span{utf_failed.c_str(), utf_failed.size()}, stream); + cudf::host_span{utf_failed.c_str(), utf_failed.size()}, + stream, + rmm::mr::get_current_device_resource()); CUDF_EXPECT_NO_THROW(json_parser(d_utf_failed, default_options, stream, mr)); // utf-8 string that passes parsing. @@ -534,7 +540,9 @@ TEST_P(JsonParserTest, UTF_JSON) {"a":1,"b":Infinity,"c":[null], "d": {"year":-600,"author": "Kaniyan"}}, {"a":1,"b":NaN,"c":[null, null], "d": {"year": 2, "author": "filip ʒakotɛ"}}])"; auto const d_utf_pass = cudf::detail::make_device_uvector_sync( - cudf::host_span{utf_pass.c_str(), utf_pass.size()}, stream); + cudf::host_span{utf_pass.c_str(), utf_pass.size()}, + stream, + rmm::mr::get_current_device_resource()); CUDF_EXPECT_NO_THROW(json_parser(d_utf_pass, default_options, stream, mr)); } @@ -555,7 +563,9 @@ TEST_P(JsonParserTest, ExtractColumnWithQuotes) std::string const input = R"( [{"a":"0.0", "b":1.0}, {"b":1.1}, {"b":2.1, "a":"2.0"}] )"; auto const d_input = cudf::detail::make_device_uvector_async( - cudf::host_span{input.c_str(), input.size()}, stream); + cudf::host_span{input.c_str(), input.size()}, + stream, + rmm::mr::get_current_device_resource()); // Get the JSON's tree representation auto const cudf_table = json_parser(d_input, options, stream, mr); @@ -599,14 +609,18 @@ TEST_P(JsonParserTest, ExpectFailMixStructAndList) // libcudf does not currently support a mix of lists and structs. for (auto const& input : inputs_fail) { auto const d_input = cudf::detail::make_device_uvector_async( - cudf::host_span{input.c_str(), input.size()}, stream); + cudf::host_span{input.c_str(), input.size()}, + stream, + rmm::mr::get_current_device_resource()); EXPECT_THROW(auto const cudf_table = json_parser(d_input, options, stream, mr), cudf::logic_error); } for (auto const& input : inputs_succeed) { auto const d_input = cudf::detail::make_device_uvector_async( - cudf::host_span{input.c_str(), input.size()}, stream); + cudf::host_span{input.c_str(), input.size()}, + stream, + rmm::mr::get_current_device_resource()); CUDF_EXPECT_NO_THROW(auto const cudf_table = json_parser(d_input, options, stream, mr)); } } @@ -626,8 +640,10 @@ TEST_P(JsonParserTest, EmptyString) cudf::io::json_reader_options default_options{}; std::string const input = R"([])"; - auto const d_input = cudf::detail::make_device_uvector_sync( - cudf::host_span{input.c_str(), input.size()}, stream); + auto const d_input = + cudf::detail::make_device_uvector_sync(cudf::host_span{input.c_str(), input.size()}, + stream, + rmm::mr::get_current_device_resource()); // Get the JSON's tree representation auto const cudf_table = json_parser(d_input, default_options, stream, mr); diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index b682ecbbae9..8a16fd9a05a 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -1128,7 +1128,8 @@ TEST_F(ParquetWriterTest, BufferSource) auto const d_input = cudf::detail::make_device_uvector_sync( cudf::host_span{reinterpret_cast(out_buffer.data()), out_buffer.size()}, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); auto const d_buffer = cudf::device_span( reinterpret_cast(d_input.data()), d_input.size()); cudf::io::parquet_reader_options in_opts = @@ -4279,6 +4280,9 @@ TEST_F(ParquetWriterTest, CheckColumnOffsetIndexNulls) auto const ci = read_column_index(source, chunk); auto const stats = parse_statistics(chunk); + // should be half nulls, except no nulls in column 0 + EXPECT_EQ(stats.null_count, c == 0 ? 0 : num_rows / 2); + // schema indexing starts at 1 auto const ptype = fmd.schema[c + 1].type; auto const ctype = fmd.schema[c + 1].converted_type; @@ -4364,6 +4368,9 @@ TEST_F(ParquetWriterTest, CheckColumnOffsetIndexNullColumn) auto const ci = read_column_index(source, chunk); auto const stats = parse_statistics(chunk); + // there should be no nulls except column 1 which is all nulls + EXPECT_EQ(stats.null_count, c == 1 ? num_rows : 0); + // schema indexing starts at 1 auto const ptype = fmd.schema[c + 1].type; auto const ctype = fmd.schema[c + 1].converted_type; @@ -4465,6 +4472,133 @@ TEST_F(ParquetWriterTest, CheckColumnOffsetIndexStruct) } } +TEST_F(ParquetWriterTest, CheckColumnIndexListWithNulls) +{ + using cudf::test::iterators::null_at; + using cudf::test::iterators::nulls_at; + using lcw = cudf::test::lists_column_wrapper; + + // 4 nulls + // [NULL, 2, NULL] + // [] + // [4, 5] + // NULL + lcw col0{{{{1, 2, 3}, nulls_at({0, 2})}, {}, {4, 5}, {}}, null_at(3)}; + + // 4 nulls + // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]] + // [[7, 8]] + // [] + // [[]] + lcw col1{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{7, 8}}, lcw{}, lcw{lcw{}}}; + + // 4 nulls + // [[1, 2, 3], [], [4, 5], NULL, [0, 6, 0]] + // [[7, 8]] + // [] + // [[]] + lcw col2{{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, null_at(3)}, {{7, 8}}, lcw{}, lcw{lcw{}}}; + + // 6 nulls + // [[1, 2, 3], [], [4, 5], NULL, [NULL, 6, NULL]] + // [[7, 8]] + // [] + // [[]] + using dlcw = cudf::test::lists_column_wrapper; + dlcw col3{{{{1., 2., 3.}, {}, {4., 5.}, {}, {{0., 6., 0.}, nulls_at({0, 2})}}, null_at(3)}, + {{7., 8.}}, + dlcw{}, + dlcw{dlcw{}}}; + + // 4 nulls + // [[1, 2, 3], [], [4, 5], NULL, [0, 6, 0]] + // [[7, 8]] + // [] + // NULL + using ui16lcw = cudf::test::lists_column_wrapper; + cudf::test::lists_column_wrapper col4{ + {{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, null_at(3)}, {{7, 8}}, ui16lcw{}, ui16lcw{ui16lcw{}}}, + null_at(3)}; + + // 6 nulls + // [[1, 2, 3], [], [4, 5], NULL, [NULL, 6, NULL]] + // [[7, 8]] + // [] + // NULL + lcw col5{{{{{1, 2, 3}, {}, {4, 5}, {}, {{0, 6, 0}, nulls_at({0, 2})}}, null_at(3)}, + {{7, 8}}, + lcw{}, + lcw{lcw{}}}, + null_at(3)}; + + // 4 nulls + using strlcw = cudf::test::lists_column_wrapper; + cudf::test::lists_column_wrapper col6{ + {{"Monday", "Monday", "Friday"}, {}, {"Monday", "Friday"}, {}, {"Sunday", "Funday"}}, + {{"bee", "sting"}}, + strlcw{}, + strlcw{strlcw{}}}; + + // 11 nulls + // [[[NULL,2,NULL,4]], [[NULL,6,NULL], [8,9]]] + // [NULL, [[13],[14,15,16]], NULL] + // [NULL, [], NULL, [[]]] + // NULL + lcw col7{{ + {{{{1, 2, 3, 4}, nulls_at({0, 2})}}, {{{5, 6, 7}, nulls_at({0, 2})}, {8, 9}}}, + {{{{10, 11}, {12}}, {{13}, {14, 15, 16}}, {{17, 18}}}, nulls_at({0, 2})}, + {{lcw{lcw{}}, lcw{}, lcw{}, lcw{lcw{}}}, nulls_at({0, 2})}, + lcw{lcw{lcw{}}}, + }, + null_at(3)}; + + table_view expected({col0, col1, col2, col3, col4, col5, col6, col7}); + + int64_t const expected_null_counts[] = {4, 4, 4, 6, 4, 6, 4, 11}; + + auto const filepath = temp_env->get_temp_filepath("ColumnIndexListWithNulls.parquet"); + auto out_opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN) + .compression(cudf::io::compression_type::NONE); + + cudf::io::write_parquet(out_opts); + + auto const source = cudf::io::datasource::create(filepath); + cudf::io::parquet::FileMetaData fmd; + + read_footer(source, &fmd); + + for (size_t r = 0; r < fmd.row_groups.size(); r++) { + auto const& rg = fmd.row_groups[r]; + for (size_t c = 0; c < rg.columns.size(); c++) { + auto const& chunk = rg.columns[c]; + + // loop over offsets, read each page header, make sure it's a data page and that + // the first row index is correct + auto const oi = read_offset_index(source, chunk); + + int64_t num_vals = 0; + for (size_t o = 0; o < oi.page_locations.size(); o++) { + auto const& page_loc = oi.page_locations[o]; + auto const ph = read_page_header(source, page_loc); + EXPECT_EQ(ph.type, cudf::io::parquet::PageType::DATA_PAGE); + // last column has 2 values per row + EXPECT_EQ(page_loc.first_row_index * (c == rg.columns.size() - 1 ? 2 : 1), num_vals); + num_vals += ph.data_page_header.num_values; + } + + // check null counts in column chunk stats and page indexes + auto const ci = read_column_index(source, chunk); + auto const stats = parse_statistics(chunk); + EXPECT_EQ(stats.null_count, expected_null_counts[c]); + + // should only be one page + EXPECT_FALSE(ci.null_pages[0]); + EXPECT_EQ(ci.null_counts[0], expected_null_counts[c]); + } + } +} + TEST_F(ParquetWriterTest, CheckColumnIndexTruncation) { const char* coldata[] = { @@ -5210,4 +5344,51 @@ TYPED_TEST(ParquetReaderSourceTest, BufferSourceArrayTypes) } } +TEST_F(ParquetWriterTest, UserNullability) +{ + auto weight_col = cudf::test::fixed_width_column_wrapper{{57.5, 51.1, 15.3}}; + auto ages_col = cudf::test::fixed_width_column_wrapper{{30, 27, 5}}; + auto struct_col = cudf::test::structs_column_wrapper{weight_col, ages_col}; + + auto expected = table_view({struct_col}); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_nullability(false); + expected_metadata.column_metadata[0].child(0).set_nullability(true); + + auto filepath = temp_env->get_temp_filepath("SingleWriteNullable.parquet"); + cudf::io::parquet_writer_options write_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .metadata(&expected_metadata); + cudf::io::write_parquet(write_opts); + + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); + + EXPECT_FALSE(result.tbl->view().column(0).nullable()); + EXPECT_TRUE(result.tbl->view().column(0).child(0).nullable()); + EXPECT_FALSE(result.tbl->view().column(0).child(1).nullable()); +} + +TEST_F(ParquetWriterTest, UserNullabilityInvalid) +{ + auto valids = + cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 2; }); + auto col = cudf::test::fixed_width_column_wrapper{{57.5, 51.1, 15.3}, valids}; + auto expected = table_view({col}); + + auto filepath = temp_env->get_temp_filepath("SingleWriteNullableInvalid.parquet"); + cudf::io::parquet_writer_options write_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); + // Should work without the nullability option + EXPECT_NO_THROW(cudf::io::write_parquet(write_opts)); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_nullability(false); + write_opts.set_metadata(&expected_metadata); + // Can't write a column with nulls as not nullable + EXPECT_THROW(cudf::io::write_parquet(write_opts), cudf::logic_error); +} + CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/io/type_inference_test.cu b/cpp/tests/io/type_inference_test.cu index ea6eb9b93ef..81c6563cd2d 100644 --- a/cpp/tests/io/type_inference_test.cu +++ b/cpp/tests/io/type_inference_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,12 +51,12 @@ TEST_F(TypeInference, Basic) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - auto const string_offset = std::vector{1, 4, 7}; - auto const string_length = std::vector{2, 2, 1}; - auto const d_string_offset = - cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); - auto const d_string_length = - cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); + auto const string_offset = std::vector{1, 4, 7}; + auto const string_length = std::vector{2, 2, 1}; + auto const d_string_offset = cudf::detail::make_device_uvector_async( + string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto const d_string_length = cudf::detail::make_device_uvector_async( + string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -84,12 +84,12 @@ TEST_F(TypeInference, Null) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - auto const string_offset = std::vector{1, 1, 4}; - auto const string_length = std::vector{0, 2, 1}; - auto const d_string_offset = - cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); - auto const d_string_length = - cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); + auto const string_offset = std::vector{1, 1, 4}; + auto const string_length = std::vector{0, 2, 1}; + auto const d_string_offset = cudf::detail::make_device_uvector_async( + string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto const d_string_length = cudf::detail::make_device_uvector_async( + string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -117,12 +117,12 @@ TEST_F(TypeInference, AllNull) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - auto const string_offset = std::vector{1, 1, 1}; - auto const string_length = std::vector{0, 0, 4}; - auto const d_string_offset = - cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); - auto const d_string_length = - cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); + auto const string_offset = std::vector{1, 1, 1}; + auto const string_length = std::vector{0, 0, 4}; + auto const d_string_offset = cudf::detail::make_device_uvector_async( + string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto const d_string_length = cudf::detail::make_device_uvector_async( + string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -150,12 +150,12 @@ TEST_F(TypeInference, String) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - auto const string_offset = std::vector{1, 8, 12}; - auto const string_length = std::vector{6, 3, 4}; - auto const d_string_offset = - cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); - auto const d_string_length = - cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); + auto const string_offset = std::vector{1, 8, 12}; + auto const string_length = std::vector{6, 3, 4}; + auto const d_string_offset = cudf::detail::make_device_uvector_async( + string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto const d_string_length = cudf::detail::make_device_uvector_async( + string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -183,12 +183,12 @@ TEST_F(TypeInference, Bool) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - auto const string_offset = std::vector{1, 6, 12}; - auto const string_length = std::vector{4, 5, 5}; - auto const d_string_offset = - cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); - auto const d_string_length = - cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); + auto const string_offset = std::vector{1, 6, 12}; + auto const string_length = std::vector{4, 5, 5}; + auto const d_string_offset = cudf::detail::make_device_uvector_async( + string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto const d_string_length = cudf::detail::make_device_uvector_async( + string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -216,12 +216,12 @@ TEST_F(TypeInference, Timestamp) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - auto const string_offset = std::vector{1, 10}; - auto const string_length = std::vector{8, 9}; - auto const d_string_offset = - cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); - auto const d_string_length = - cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); + auto const string_offset = std::vector{1, 10}; + auto const string_length = std::vector{8, 9}; + auto const d_string_offset = cudf::detail::make_device_uvector_async( + string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto const d_string_length = cudf::detail::make_device_uvector_async( + string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -250,12 +250,12 @@ TEST_F(TypeInference, InvalidInput) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - auto const string_offset = std::vector{1, 3, 5, 7, 9}; - auto const string_length = std::vector{1, 1, 1, 1, 1}; - auto const d_string_offset = - cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); - auto const d_string_length = - cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); + auto const string_offset = std::vector{1, 3, 5, 7, 9}; + auto const string_length = std::vector{1, 1, 1, 1, 1}; + auto const d_string_offset = cudf::detail::make_device_uvector_async( + string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto const d_string_length = cudf::detail::make_device_uvector_async( + string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); diff --git a/cpp/tests/iterator/iterator_tests.cuh b/cpp/tests/iterator/iterator_tests.cuh index 894e117ba40..882de994e67 100644 --- a/cpp/tests/iterator/iterator_tests.cuh +++ b/cpp/tests/iterator/iterator_tests.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -87,8 +87,8 @@ struct IteratorTest : public cudf::test::BaseFixture { { InputIterator d_in_last = d_in + num_items; EXPECT_EQ(thrust::distance(d_in, d_in_last), num_items); - auto dev_expected = - cudf::detail::make_device_uvector_sync(expected, cudf::get_default_stream()); + auto dev_expected = cudf::detail::make_device_uvector_sync( + expected, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); // using a temporary vector and calling transform and all_of separately is // equivalent to thrust::equal but compiles ~3x faster diff --git a/cpp/tests/iterator/value_iterator_test.cuh b/cpp/tests/iterator/value_iterator_test.cuh index fa931d34a0e..8252ce88f39 100644 --- a/cpp/tests/iterator/value_iterator_test.cuh +++ b/cpp/tests/iterator/value_iterator_test.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,8 @@ template void non_null_iterator(IteratorTest& testFixture) { auto host_array = cudf::test::make_type_param_vector({0, 6, 0, -14, 13, 64, -13, -20, 45}); - auto dev_array = cudf::detail::make_device_uvector_sync(host_array, cudf::get_default_stream()); + auto dev_array = cudf::detail::make_device_uvector_sync( + host_array, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); // calculate the expected value by CPU. thrust::host_vector replaced_array(host_array); diff --git a/cpp/tests/iterator/value_iterator_test_strings.cu b/cpp/tests/iterator/value_iterator_test_strings.cu index 8b4080fa493..d0e62c09a03 100644 --- a/cpp/tests/iterator/value_iterator_test_strings.cu +++ b/cpp/tests/iterator/value_iterator_test_strings.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,7 +30,8 @@ auto strings_to_string_views(std::vector& input_strings) std::vector offsets; std::tie(chars, offsets) = cudf::test::detail::make_chars_and_offsets( input_strings.begin(), input_strings.end(), all_valid); - auto dev_chars = cudf::detail::make_device_uvector_sync(chars, cudf::get_default_stream()); + auto dev_chars = cudf::detail::make_device_uvector_sync( + chars, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); // calculate the expected value by CPU. (but contains device pointers) thrust::host_vector replaced_array(input_strings.size()); @@ -51,8 +52,9 @@ TEST_F(StringIteratorTest, string_view_null_iterator) using T = cudf::string_view; std::string zero("zero"); // the char data has to be in GPU - auto initmsg = cudf::detail::make_device_uvector_sync(zero, cudf::get_default_stream()); - T init = T{initmsg.data(), int(initmsg.size())}; + auto initmsg = cudf::detail::make_device_uvector_sync( + zero, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + T init = T{initmsg.data(), int(initmsg.size())}; // data and valid arrays std::vector host_values( @@ -86,8 +88,9 @@ TEST_F(StringIteratorTest, string_view_no_null_iterator) // T init = T{"", 0}; std::string zero("zero"); // the char data has to be in GPU - auto initmsg = cudf::detail::make_device_uvector_sync(zero, cudf::get_default_stream()); - T init = T{initmsg.data(), int(initmsg.size())}; + auto initmsg = cudf::detail::make_device_uvector_sync( + zero, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + T init = T{initmsg.data(), int(initmsg.size())}; // data array std::vector host_values( @@ -110,8 +113,9 @@ TEST_F(StringIteratorTest, string_scalar_iterator) // T init = T{"", 0}; std::string zero("zero"); // the char data has to be in GPU - auto initmsg = cudf::detail::make_device_uvector_sync(zero, cudf::get_default_stream()); - T init = T{initmsg.data(), int(initmsg.size())}; + auto initmsg = cudf::detail::make_device_uvector_sync( + zero, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + T init = T{initmsg.data(), int(initmsg.size())}; // data array std::vector host_values(100, zero); diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp index 31500319592..404ff7d8380 100644 --- a/cpp/tests/join/join_tests.cpp +++ b/cpp/tests/join/join_tests.cpp @@ -1865,8 +1865,8 @@ TEST_F(JoinTest, Repro_StructsWithoutNullsPushedDown) // Note: Join result might not have nulls pushed down, since it's an output of gather(). // Must superimpose parent nulls before comparisons. - auto [superimposed_results, _] = - cudf::structs::detail::push_down_nulls(*result, cudf::get_default_stream()); + auto [superimposed_results, _] = cudf::structs::detail::push_down_nulls( + *result, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expected = [] { auto fact_ints = ints{0}; diff --git a/cpp/tests/partitioning/hash_partition_test.cpp b/cpp/tests/partitioning/hash_partition_test.cpp index 9d206c5397d..a1508b5b973 100644 --- a/cpp/tests/partitioning/hash_partition_test.cpp +++ b/cpp/tests/partitioning/hash_partition_test.cpp @@ -308,8 +308,8 @@ void run_fixed_width_test(size_t cols, // Make a table view of the partition numbers constexpr cudf::data_type dtype{cudf::type_id::INT32}; - auto d_partitions = - cudf::detail::make_device_uvector_sync(partitions, cudf::get_default_stream()); + auto d_partitions = cudf::detail::make_device_uvector_sync( + partitions, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); cudf::column_view partitions_col(dtype, rows, d_partitions.data()); cudf::table_view partitions_table({partitions_col}); diff --git a/cpp/tests/quantiles/percentile_approx_test.cpp b/cpp/tests/quantiles/percentile_approx_test.cpp index 5809501fe2f..819b342ff8f 100644 --- a/cpp/tests/quantiles/percentile_approx_test.cpp +++ b/cpp/tests/quantiles/percentile_approx_test.cpp @@ -373,7 +373,8 @@ struct PercentileApproxTest : public cudf::test::BaseFixture { TEST_F(PercentileApproxTest, EmptyInput) { - auto empty_ = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream()); + auto empty_ = cudf::tdigest::detail::make_empty_tdigest_column( + cudf::get_default_stream(), rmm::mr::get_current_device_resource()); cudf::test::fixed_width_column_wrapper percentiles{0.0, 0.25, 0.3}; std::vector input; diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 74c5e7fb504..77fdad09c0b 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -49,9 +49,9 @@ TYPED_TEST(SegmentedReductionTest, SumExcludeNulls) // output nullmask: {1, 1, 1, 0, 0, 0} auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{6, 4, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}}; @@ -97,9 +97,9 @@ TYPED_TEST(SegmentedReductionTest, ProductExcludeNulls) // output nullmask: {1, 1, 1, 0, 0, 0} auto const input = cudf::test::fixed_width_column_wrapper{ {1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 1, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{15, 15, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}}; @@ -147,9 +147,9 @@ TYPED_TEST(SegmentedReductionTest, MaxExcludeNulls) // output nullmask: {1, 1, 1, 0, 0, 0} auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{3, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}}; @@ -195,9 +195,9 @@ TYPED_TEST(SegmentedReductionTest, MinExcludeNulls) // output nullmask: {1, 1, 1, 0, 0, 0} auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{1, 1, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}}; @@ -244,43 +244,32 @@ TYPED_TEST(SegmentedReductionTest, AnyExcludeNulls) auto const input = cudf::test::fixed_width_column_wrapper{ {0, 0, 0, 0, XXX, 0, 0, 1, 0, 1, XXX, 0, 0, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 9, 12, 12, 13, 14, 15, 17}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 9, 12, 12, 13, 14, 15, 17}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{ {false, false, true, true, bool{XXX}, false, true, bool{XXX}, bool{XXX}}, {true, true, true, true, false, true, true, false, false}}; - auto res = - cudf::segmented_reduce(input, - d_offsets, - *cudf::make_any_aggregation(), - cudf::data_type{cudf::type_id::BOOL8}, - cudf::null_policy::EXCLUDE); + auto const agg = cudf::make_any_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::BOOL8}; + auto const policy = cudf::null_policy::EXCLUDE; + + auto res = cudf::segmented_reduce(input, d_offsets, *agg, output_type, policy); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); // Test with initial value - auto const init_scalar = cudf::make_fixed_width_scalar(1); + auto const init_scalar = cudf::make_fixed_width_scalar(0); auto const init_expect = cudf::test::fixed_width_column_wrapper{ - {true, true, true, true, true, true, true, true, true}, + {false, false, true, true, false, false, true, false, false}, {true, true, true, true, true, true, true, true, true}}; - res = cudf::segmented_reduce(input, - d_offsets, - *cudf::make_any_aggregation(), - cudf::data_type{cudf::type_id::BOOL8}, - cudf::null_policy::EXCLUDE, - *init_scalar); + res = cudf::segmented_reduce(input, d_offsets, *agg, output_type, policy, *init_scalar); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, init_expect); // Test with null initial value init_scalar->set_valid_async(false); - res = cudf::segmented_reduce(input, - d_offsets, - *cudf::make_any_aggregation(), - cudf::data_type{cudf::type_id::BOOL8}, - cudf::null_policy::EXCLUDE, - *init_scalar); + res = cudf::segmented_reduce(input, d_offsets, *agg, output_type, policy, *init_scalar); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); } @@ -295,9 +284,9 @@ TYPED_TEST(SegmentedReductionTest, AllExcludeNulls) auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX, 1, 0, 3, 1, XXX, 0, 0}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}}; - auto const offsets = std::vector{0, 3, 6, 6, 7, 8, 10, 13, 16, 17}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 6, 7, 8, 10, 13, 16, 17}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{ {true, true, bool{XXX}, true, bool{XXX}, bool{XXX}, false, false, false}, {true, true, false, true, false, false, true, true, true}}; @@ -346,9 +335,9 @@ TYPED_TEST(SegmentedReductionTest, SumIncludeNulls) // output nullmask: {1, 0, 1, 0, 0, 0} auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{6, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}}; @@ -397,9 +386,9 @@ TYPED_TEST(SegmentedReductionTest, ProductIncludeNulls) // output nullmask: {1, 0, 1, 0, 0, 0} auto const input = cudf::test::fixed_width_column_wrapper{ {1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 1, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{15, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}}; @@ -450,9 +439,9 @@ TYPED_TEST(SegmentedReductionTest, MaxIncludeNulls) // output nullmask: {1, 0, 1, 0, 0, 0} auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{3, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}}; @@ -501,9 +490,9 @@ TYPED_TEST(SegmentedReductionTest, MinIncludeNulls) // output nullmask: {1, 0, 1, 0, 0, 0} auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{1, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}}; @@ -553,9 +542,9 @@ TYPED_TEST(SegmentedReductionTest, AnyIncludeNulls) auto const input = cudf::test::fixed_width_column_wrapper{ {0, 0, 0, 0, XXX, 0, 0, 1, 0, 1, XXX, 0, 0, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 9, 12, 12, 13, 14, 15, 17}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 9, 12, 12, 13, 14, 15, 17}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{ {false, bool{XXX}, true, bool{XXX}, bool{XXX}, false, true, bool{XXX}, bool{XXX}}, {true, false, true, false, false, true, true, false, false}}; @@ -616,9 +605,9 @@ TYPED_TEST(SegmentedReductionTest, AllIncludeNulls) auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX, 1, 0, 3, 1, XXX, 0, 0}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}}; - auto const offsets = std::vector{0, 3, 6, 6, 7, 8, 10, 13, 16, 17}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 6, 7, 8, 10, 13, 16, 17}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{ {true, bool{XXX}, bool{XXX}, true, bool{XXX}, bool{XXX}, false, bool{XXX}, false}, {true, false, false, true, false, false, true, false, true}}; @@ -681,9 +670,9 @@ TEST_F(SegmentedReductionTestUntyped, PartialSegmentReduction) auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 4, 5, 6, 7}, {true, true, true, true, true, true, true}}; - auto const offsets = std::vector{1, 3, 4}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{1, 3, 4}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{5, 4}, {true, true}}; auto res = @@ -731,10 +720,10 @@ TEST_F(SegmentedReductionTestUntyped, NonNullableInput) // outputs: {1, 5, 4} // output nullmask: {1, 1, 1} - auto const input = cudf::test::fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7}; - auto const offsets = std::vector{0, 1, 1, 3, 7}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const input = cudf::test::fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7}; + auto const offsets = std::vector{0, 1, 1, 3, 7}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{1, XXX, 5, 22}, {true, false, true, true}}; @@ -778,9 +767,9 @@ TEST_F(SegmentedReductionTestUntyped, Mean) { auto const input = cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_mean_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT32}; @@ -797,9 +786,9 @@ TEST_F(SegmentedReductionTestUntyped, MeanNulls) { auto const input = cudf::test::fixed_width_column_wrapper( {10, 20, 30, 40, 50, 60, 0, 80, 90}, {1, 1, 1, 1, 1, 1, 0, 1, 1}); - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_mean_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT64}; @@ -817,9 +806,9 @@ TEST_F(SegmentedReductionTestUntyped, SumOfSquares) { auto const input = cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_sum_of_squares_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::INT32}; @@ -837,9 +826,9 @@ TEST_F(SegmentedReductionTestUntyped, SumOfSquaresNulls) { auto const input = cudf::test::fixed_width_column_wrapper( {10, 20, 30, 40, 50, 60, 0, 80, 90}, {1, 1, 1, 1, 1, 1, 0, 1, 1}); - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_sum_of_squares_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::INT64}; @@ -859,9 +848,9 @@ TEST_F(SegmentedReductionTestUntyped, StandardDeviation) constexpr float NaN{std::numeric_limits::quiet_NaN()}; auto const input = cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_std_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT32}; @@ -879,9 +868,9 @@ TEST_F(SegmentedReductionTestUntyped, StandardDeviationNulls) constexpr double NaN{std::numeric_limits::quiet_NaN()}; auto const input = cudf::test::fixed_width_column_wrapper( {10, 0, 20, 30, 54, 63, 0, 72, 81}, {1, 0, 1, 1, 1, 1, 0, 1, 1}); - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_std_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT64}; @@ -901,9 +890,9 @@ TEST_F(SegmentedReductionTestUntyped, Variance) constexpr float NaN{std::numeric_limits::quiet_NaN()}; auto const input = cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_variance_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT32}; @@ -921,9 +910,9 @@ TEST_F(SegmentedReductionTestUntyped, VarianceNulls) constexpr double NaN{std::numeric_limits::quiet_NaN()}; auto const input = cudf::test::fixed_width_column_wrapper( {10, 0, 20, 30, 54, 63, 0, 72, 81}, {1, 0, 1, 1, 1, 1, 0, 1, 1}); - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_variance_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT64}; @@ -938,13 +927,55 @@ TEST_F(SegmentedReductionTestUntyped, VarianceNulls) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); } +TEST_F(SegmentedReductionTestUntyped, NUnique) +{ + auto const input = + cudf::test::fixed_width_column_wrapper({10, 15, 20, 30, 60, 60, 70, 70, 80}); + auto const offsets = std::vector{0, 1, 1, 2, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto const agg = cudf::make_nunique_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::INT32}; + + auto expected = + cudf::test::fixed_width_column_wrapper{{1, 0, 1, 2, 3}, {1, 0, 1, 1, 1}}; + auto result = + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} + +TEST_F(SegmentedReductionTestUntyped, NUniqueNulls) +{ + auto const input = cudf::test::fixed_width_column_wrapper( + {10, 0, 20, 30, 60, 60, 70, 70, 0}, {1, 0, 1, 1, 1, 1, 1, 1, 0}); + auto const offsets = std::vector{0, 1, 1, 2, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto const agg = cudf::make_nunique_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::INT32}; + + auto expected = + cudf::test::fixed_width_column_wrapper{{1, 0, 0, 2, 2}, {1, 0, 0, 1, 1}}; + auto result = + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + + expected = + cudf::test::fixed_width_column_wrapper{{1, 0, 1, 2, 3}, {1, 0, 1, 1, 1}}; + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} + TEST_F(SegmentedReductionTestUntyped, Errors) { auto const input = cudf::test::fixed_width_column_wrapper( {10, 0, 20, 30, 54, 63, 0, 72, 81}, {1, 0, 1, 1, 1, 1, 0, 1, 1}); - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const null_policy = cudf::null_policy::EXCLUDE; auto const output_type = cudf::data_type{cudf::type_id::TIMESTAMP_DAYS}; auto const str_input = @@ -1010,10 +1041,10 @@ TEST_F(SegmentedReductionTestUntyped, Errors) TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn) { - auto const input = cudf::test::fixed_width_column_wrapper{}; - auto const offsets = std::vector{0}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const input = cudf::test::fixed_width_column_wrapper{}; + auto const offsets = std::vector{0}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{}; auto res = @@ -1047,10 +1078,10 @@ TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn) TEST_F(SegmentedReductionTestUntyped, EmptyInputWithOffsets) { - auto const input = cudf::test::fixed_width_column_wrapper{}; - auto const offsets = std::vector{0, 0, 0, 0, 0, 0}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const input = cudf::test::fixed_width_column_wrapper{}; + auto const offsets = std::vector{0, 0, 0, 0, 0, 0}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{XXX, XXX, XXX, XXX, XXX}, {0, 0, 0, 0, 0}}; @@ -1098,9 +1129,9 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MaxWithNulls) { using RepType = cudf::device_storage_type_t; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_max_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1126,9 +1157,9 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MinWithNulls) { using RepType = cudf::device_storage_type_t; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_min_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1154,9 +1185,9 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MaxNonNullableInput) { using RepType = cudf::device_storage_type_t; - auto const offsets = std::vector{0, 3, 4, 4}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 4, 4}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_max_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1179,9 +1210,9 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MinNonNullableInput) { using RepType = cudf::device_storage_type_t; - auto const offsets = std::vector{0, 3, 4, 4}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 4, 4}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_min_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1204,9 +1235,9 @@ TYPED_TEST(SegmentedReductionFixedPointTest, Sum) { using RepType = cudf::device_storage_type_t; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_sum_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1242,9 +1273,9 @@ TYPED_TEST(SegmentedReductionFixedPointTest, Product) { using RepType = cudf::device_storage_type_t; - auto const offsets = std::vector{0, 3, 6, 7, 8, 12, 12}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 12, 12}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_product_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1279,9 +1310,9 @@ TYPED_TEST(SegmentedReductionFixedPointTest, SumOfSquares) { using RepType = cudf::device_storage_type_t; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_sum_of_squares_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1442,10 +1473,10 @@ TEST_F(SegmentedReductionStringTest, MinExcludeNulls) TEST_F(SegmentedReductionStringTest, EmptyInputWithOffsets) { - auto const input = cudf::test::strings_column_wrapper{}; - auto const offsets = std::vector{0, 0, 0, 0}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const input = cudf::test::strings_column_wrapper{}; + auto const offsets = std::vector{0, 0, 0, 0}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::strings_column_wrapper({XXX, XXX, XXX}, {0, 0, 0}); auto result = diff --git a/cpp/tests/rolling/range_window_bounds_test.cpp b/cpp/tests/rolling/range_window_bounds_test.cpp index 1b753fb6040..c70e0a78100 100644 --- a/cpp/tests/rolling/range_window_bounds_test.cpp +++ b/cpp/tests/rolling/range_window_bounds_test.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -57,34 +58,43 @@ TYPED_TEST(TimestampRangeWindowBoundsTest, BoundsConstruction) using OrderByType = TypeParam; using range_type = cudf::detail::range_type; using rep_type = cudf::detail::range_rep_type; + auto const dtype = cudf::data_type{cudf::type_to_id()}; static_assert(cudf::is_duration()); auto range_3 = cudf::range_window_bounds::get(cudf::duration_scalar{3, true}); EXPECT_FALSE(range_3.is_unbounded() && "range_window_bounds constructed from scalar cannot be unbounded."); - EXPECT_EQ(cudf::detail::range_comparable_value(range_3), rep_type{3}); + EXPECT_EQ( + cudf::detail::range_comparable_value(range_3, dtype, cudf::get_default_stream()), + rep_type{3}); auto range_unbounded = cudf::range_window_bounds::unbounded(cudf::data_type{cudf::type_to_id()}); EXPECT_TRUE(range_unbounded.is_unbounded() && "range_window_bounds::unbounded() must return an unbounded range."); - EXPECT_EQ(cudf::detail::range_comparable_value(range_unbounded), rep_type{}); + EXPECT_EQ(cudf::detail::range_comparable_value( + range_unbounded, dtype, cudf::get_default_stream()), + rep_type{}); } TYPED_TEST(TimestampRangeWindowBoundsTest, WrongRangeType) { using OrderByType = TypeParam; + auto const dtype = cudf::data_type{cudf::type_to_id()}; using wrong_range_type = std::conditional_t, cudf::duration_ns, cudf::duration_D>; auto range_3 = cudf::range_window_bounds::get(cudf::duration_scalar{3, true}); - EXPECT_THROW(cudf::detail::range_comparable_value(range_3), cudf::logic_error); + EXPECT_THROW( + cudf::detail::range_comparable_value(range_3, dtype, cudf::get_default_stream()), + cudf::logic_error); auto range_unbounded = cudf::range_window_bounds::unbounded(cudf::data_type{cudf::type_to_id()}); - EXPECT_THROW(cudf::detail::range_comparable_value(range_unbounded), + EXPECT_THROW(cudf::detail::range_comparable_value( + range_unbounded, dtype, cudf::get_default_stream()), cudf::logic_error); } @@ -112,33 +122,42 @@ TYPED_TEST(NumericRangeWindowBoundsTest, BoundsConstruction) using OrderByType = TypeParam; using range_type = cudf::detail::range_type; using rep_type = cudf::detail::range_rep_type; + auto const dtype = cudf::data_type{cudf::type_to_id()}; static_assert(std::is_integral_v); auto range_3 = cudf::range_window_bounds::get(cudf::numeric_scalar{3, true}); EXPECT_FALSE(range_3.is_unbounded() && "range_window_bounds constructed from scalar cannot be unbounded."); - EXPECT_EQ(cudf::detail::range_comparable_value(range_3), rep_type{3}); + EXPECT_EQ( + cudf::detail::range_comparable_value(range_3, dtype, cudf::get_default_stream()), + rep_type{3}); auto range_unbounded = cudf::range_window_bounds::unbounded(cudf::data_type{cudf::type_to_id()}); EXPECT_TRUE(range_unbounded.is_unbounded() && "range_window_bounds::unbounded() must return an unbounded range."); - EXPECT_EQ(cudf::detail::range_comparable_value(range_unbounded), rep_type{}); + EXPECT_EQ(cudf::detail::range_comparable_value( + range_unbounded, dtype, cudf::get_default_stream()), + rep_type{}); } TYPED_TEST(NumericRangeWindowBoundsTest, WrongRangeType) { using OrderByType = TypeParam; + auto const dtype = cudf::data_type{cudf::type_to_id()}; using wrong_range_type = std::conditional_t, int16_t, int32_t>; auto range_3 = cudf::range_window_bounds::get(cudf::numeric_scalar{3, true}); - EXPECT_THROW(cudf::detail::range_comparable_value(range_3), cudf::logic_error); + EXPECT_THROW( + cudf::detail::range_comparable_value(range_3, dtype, cudf::get_default_stream()), + cudf::logic_error); auto range_unbounded = cudf::range_window_bounds::unbounded(cudf::data_type{cudf::type_to_id()}); - EXPECT_THROW(cudf::detail::range_comparable_value(range_unbounded), + EXPECT_THROW(cudf::detail::range_comparable_value( + range_unbounded, dtype, cudf::get_default_stream()), cudf::logic_error); } @@ -150,8 +169,9 @@ TYPED_TEST_SUITE(DecimalRangeBoundsTest, cudf::test::FixedPointTypes); TYPED_TEST(DecimalRangeBoundsTest, BoundsConstruction) { - using DecimalT = TypeParam; - using Rep = cudf::detail::range_rep_type; + using DecimalT = TypeParam; + using Rep = cudf::detail::range_rep_type; + auto const dtype = cudf::data_type{cudf::type_to_id()}; // Interval type must match the decimal type. static_assert(std::is_same_v, DecimalT>); @@ -160,7 +180,9 @@ TYPED_TEST(DecimalRangeBoundsTest, BoundsConstruction) cudf::fixed_point_scalar{Rep{3}, numeric::scale_type{0}}); EXPECT_FALSE(range_3.is_unbounded() && "range_window_bounds constructed from scalar cannot be unbounded."); - EXPECT_EQ(cudf::detail::range_comparable_value(range_3), Rep{3}); + EXPECT_EQ( + cudf::detail::range_comparable_value(range_3, dtype, cudf::get_default_stream()), + Rep{3}); auto const range_unbounded = cudf::range_window_bounds::unbounded(cudf::data_type{cudf::type_to_id()}); @@ -183,8 +205,8 @@ TYPED_TEST(DecimalRangeBoundsTest, Rescale) for (auto const range_scale : {-2, -1, 0, 1, 2}) { auto const decimal_range_bounds = cudf::range_window_bounds::get( cudf::fixed_point_scalar{RepT{20}, numeric::scale_type{range_scale}}); - auto const rescaled_range_rep = - cudf::detail::range_comparable_value(decimal_range_bounds, order_by_data_type); + auto const rescaled_range_rep = cudf::detail::range_comparable_value( + decimal_range_bounds, order_by_data_type, cudf::get_default_stream()); EXPECT_EQ(rescaled_range_rep, RepT{20} * pow10[range_scale - order_by_scale]); } @@ -192,8 +214,8 @@ TYPED_TEST(DecimalRangeBoundsTest, Rescale) { auto const decimal_range_bounds = cudf::range_window_bounds::get( cudf::fixed_point_scalar{RepT{200}, numeric::scale_type{-3}}); - EXPECT_THROW( - cudf::detail::range_comparable_value(decimal_range_bounds, order_by_data_type), - cudf::logic_error); + EXPECT_THROW(cudf::detail::range_comparable_value( + decimal_range_bounds, order_by_data_type, cudf::get_default_stream()), + cudf::logic_error); } } diff --git a/cpp/tests/scalar/scalar_device_view_test.cu b/cpp/tests/scalar/scalar_device_view_test.cu index c7365d63e1c..9e0f68573a5 100644 --- a/cpp/tests/scalar/scalar_device_view_test.cu +++ b/cpp/tests/scalar/scalar_device_view_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -130,7 +130,8 @@ TEST_F(StringScalarDeviceViewTest, Value) auto scalar_device_view = cudf::get_scalar_device_view(s); rmm::device_scalar result{cudf::get_default_stream()}; - auto value_v = cudf::detail::make_device_uvector_sync(value, cudf::get_default_stream()); + auto value_v = cudf::detail::make_device_uvector_sync( + value, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); test_string_value<<<1, 1, 0, cudf::get_default_stream().value()>>>( scalar_device_view, value_v.data(), value.size(), result.data()); diff --git a/cpp/tests/strings/contains_tests.cpp b/cpp/tests/strings/contains_tests.cpp index 5331c4c34d8..316f24e4167 100644 --- a/cpp/tests/strings/contains_tests.cpp +++ b/cpp/tests/strings/contains_tests.cpp @@ -298,9 +298,11 @@ TEST_F(StringsContainsTests, HexTest) std::vector offsets( {thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + count + 1}); - auto d_chars = cudf::detail::make_device_uvector_sync(ascii_chars, cudf::get_default_stream()); - auto d_offsets = cudf::detail::make_device_uvector_sync(offsets, cudf::get_default_stream()); - auto input = cudf::make_strings_column(d_chars, d_offsets); + auto d_chars = cudf::detail::make_device_uvector_sync( + ascii_chars, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto d_offsets = cudf::detail::make_device_uvector_sync( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto input = cudf::make_strings_column(d_chars, d_offsets); auto strings_view = cudf::strings_column_view(input->view()); for (auto ch : ascii_chars) { diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu index e3df8db721d..77857049e7a 100644 --- a/cpp/tests/strings/factories_test.cu +++ b/cpp/tests/strings/factories_test.cu @@ -78,7 +78,8 @@ TEST_F(StringsFactoriesTest, CreateColumnFromPair) } h_offsets[idx + 1] = offset; } - auto d_strings = cudf::detail::make_device_uvector_sync(strings, cudf::get_default_stream()); + auto d_strings = cudf::detail::make_device_uvector_sync( + strings, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); CUDF_CUDA_TRY(cudaMemcpy(d_buffer.data(), h_buffer.data(), memsize, cudaMemcpyDefault)); auto column = cudf::make_strings_column(d_strings); EXPECT_EQ(column->type(), cudf::data_type{cudf::type_id::STRING}); @@ -143,10 +144,13 @@ TEST_F(StringsFactoriesTest, CreateColumnFromOffsets) } std::vector h_nulls{h_null_mask}; - auto d_buffer = cudf::detail::make_device_uvector_sync(h_buffer, cudf::get_default_stream()); - auto d_offsets = cudf::detail::make_device_uvector_sync(h_offsets, cudf::get_default_stream()); - auto d_nulls = cudf::detail::make_device_uvector_sync(h_nulls, cudf::get_default_stream()); - auto column = cudf::make_strings_column(d_buffer, d_offsets, d_nulls, null_count); + auto d_buffer = cudf::detail::make_device_uvector_sync( + h_buffer, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto d_offsets = cudf::detail::make_device_uvector_sync( + h_offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto d_nulls = cudf::detail::make_device_uvector_sync( + h_nulls, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto column = cudf::make_strings_column(d_buffer, d_offsets, d_nulls, null_count); EXPECT_EQ(column->type(), cudf::data_type{cudf::type_id::STRING}); EXPECT_EQ(column->null_count(), null_count); EXPECT_EQ(2, column->num_children()); @@ -184,8 +188,8 @@ TEST_F(StringsFactoriesTest, CreateScalar) TEST_F(StringsFactoriesTest, EmptyStringsColumn) { rmm::device_uvector d_chars{0, cudf::get_default_stream()}; - auto d_offsets = - cudf::detail::make_zeroed_device_uvector_sync(1, cudf::get_default_stream()); + auto d_offsets = cudf::detail::make_zeroed_device_uvector_sync( + 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); rmm::device_uvector d_nulls{0, cudf::get_default_stream()}; auto results = cudf::make_strings_column(d_chars, d_offsets, d_nulls, 0); diff --git a/cpp/tests/strings/integers_tests.cpp b/cpp/tests/strings/integers_tests.cpp index 04e6886a08a..79e96ff5121 100644 --- a/cpp/tests/strings/integers_tests.cpp +++ b/cpp/tests/strings/integers_tests.cpp @@ -297,8 +297,9 @@ TYPED_TEST(StringsIntegerConvertTest, FromToInteger) std::iota(h_integers.begin(), h_integers.end(), -(TypeParam)(h_integers.size() / 2)); h_integers.push_back(std::numeric_limits::min()); h_integers.push_back(std::numeric_limits::max()); - auto d_integers = cudf::detail::make_device_uvector_sync(h_integers, cudf::get_default_stream()); - auto integers = cudf::make_numeric_column(cudf::data_type{cudf::type_to_id()}, + auto d_integers = cudf::detail::make_device_uvector_sync( + h_integers, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto integers = cudf::make_numeric_column(cudf::data_type{cudf::type_to_id()}, (cudf::size_type)d_integers.size()); auto integers_view = integers->mutable_view(); CUDF_CUDA_TRY(cudaMemcpy(integers_view.data(), diff --git a/cpp/tests/strings/replace_tests.cpp b/cpp/tests/strings/replace_tests.cpp index 32e097838c0..85185b2deab 100644 --- a/cpp/tests/strings/replace_tests.cpp +++ b/cpp/tests/strings/replace_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -290,28 +290,22 @@ TEST_F(StringsReplaceTest, ReplaceSlice) TEST_F(StringsReplaceTest, ReplaceSliceError) { - std::vector h_strings{"Héllo", "thesé", nullptr, "are not", "important", ""}; - cudf::test::strings_column_wrapper strings( - h_strings.begin(), - h_strings.end(), - thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); - auto strings_view = cudf::strings_column_view(strings); - EXPECT_THROW(cudf::strings::replace_slice(strings_view, cudf::string_scalar(""), 4, 1), - cudf::logic_error); + cudf::test::strings_column_wrapper input({"Héllo", "thesé", "are not", "important", ""}); + EXPECT_THROW( + cudf::strings::replace_slice(cudf::strings_column_view(input), cudf::string_scalar(""), 4, 1), + cudf::logic_error); } TEST_F(StringsReplaceTest, ReplaceMulti) { - auto strings = build_corpus(); - auto strings_view = cudf::strings_column_view(strings); + auto input = build_corpus(); + auto strings_view = cudf::strings_column_view(input); - std::vector h_targets{"the ", "a ", "to "}; - cudf::test::strings_column_wrapper targets(h_targets.begin(), h_targets.end()); + cudf::test::strings_column_wrapper targets({"the ", "a ", "to "}); auto targets_view = cudf::strings_column_view(targets); { - std::vector h_repls{"_ ", "A ", "2 "}; - cudf::test::strings_column_wrapper repls(h_repls.begin(), h_repls.end()); + cudf::test::strings_column_wrapper repls({"_ ", "A ", "2 "}); auto repls_view = cudf::strings_column_view(repls); auto results = cudf::strings::replace(strings_view, targets_view, repls_view); @@ -331,8 +325,7 @@ TEST_F(StringsReplaceTest, ReplaceMulti) } { - std::vector h_repls{"* "}; - cudf::test::strings_column_wrapper repls(h_repls.begin(), h_repls.end()); + cudf::test::strings_column_wrapper repls({"* "}); auto repls_view = cudf::strings_column_view(repls); auto results = cudf::strings::replace(strings_view, targets_view, repls_view); @@ -352,6 +345,129 @@ TEST_F(StringsReplaceTest, ReplaceMulti) } } +TEST_F(StringsReplaceTest, ReplaceMultiLong) +{ + // The length of the strings are to trigger the code path governed by the AVG_CHAR_BYTES_THRESHOLD + // setting in the multi.cu. + auto input = cudf::test::strings_column_wrapper( + {"This string needs to be very long to trigger the long-replace internal functions. " + "This string needs to be very long to trigger the long-replace internal functions. " + "This string needs to be very long to trigger the long-replace internal functions. " + "This string needs to be very long to trigger the long-replace internal functions.", + "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012" + "345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345" + "678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678" + "901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901" + "2345678901234567890123456789", + "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012" + "345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345" + "678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678" + "901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901" + "2345678901234567890123456789", + "Test string for overlap check: bananaápple bananá ápplebananá banápple ápple bananá " + "Test string for overlap check: bananaápple bananá ápplebananá banápple ápple bananá " + "Test string for overlap check: bananaápple bananá ápplebananá banápple ápple bananá " + "Test string for overlap check: bananaápple bananá ápplebananá banápple ápple bananá " + "Test string for overlap check: bananaápple bananá ápplebananá banápple ápple bananá", + "", + ""}, + {1, 1, 1, 1, 0, 1}); + auto strings_view = cudf::strings_column_view(input); + + auto targets = cudf::test::strings_column_wrapper({"78901", "bananá", "ápple", "78"}); + auto targets_view = cudf::strings_column_view(targets); + + { + cudf::test::strings_column_wrapper repls({"x", "PEAR", "avocado", "$$"}); + auto repls_view = cudf::strings_column_view(repls); + + auto results = cudf::strings::replace(strings_view, targets_view, repls_view); + + cudf::test::strings_column_wrapper expected( + {"This string needs to be very long to trigger the long-replace internal functions. " + "This string needs to be very long to trigger the long-replace internal functions. " + "This string needs to be very long to trigger the long-replace internal functions. " + "This string needs to be very long to trigger the long-replace internal functions.", + "0123456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456" + "x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x" + "23456x23456x23456x23456x23456x23456x23456x23456x23456x23456$$9", + "0123456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456" + "x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x23456x" + "23456x23456x23456x23456x23456x23456x23456x23456x23456x23456$$9", + "Test string for overlap check: bananaavocado PEAR avocadoPEAR banavocado avocado PEAR " + "Test string for overlap check: bananaavocado PEAR avocadoPEAR banavocado avocado PEAR " + "Test string for overlap check: bananaavocado PEAR avocadoPEAR banavocado avocado PEAR " + "Test string for overlap check: bananaavocado PEAR avocadoPEAR banavocado avocado PEAR " + "Test string for overlap check: bananaavocado PEAR avocadoPEAR banavocado avocado PEAR", + "", + ""}, + {1, 1, 1, 1, 0, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + } + + { + cudf::test::strings_column_wrapper repls({"*"}); + auto repls_view = cudf::strings_column_view(repls); + + auto results = cudf::strings::replace(strings_view, targets_view, repls_view); + + cudf::test::strings_column_wrapper expected( + {"This string needs to be very long to trigger the long-replace internal functions. " + "This string needs to be very long to trigger the long-replace internal functions. " + "This string needs to be very long to trigger the long-replace internal functions. " + "This string needs to be very long to trigger the long-replace internal functions.", + "0123456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*" + "23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*" + "23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*9", + "0123456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*" + "23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*" + "23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*23456*9", + "Test string for overlap check: banana* * ** ban* * * Test string for overlap check: " + "banana* * ** ban* * * Test string for overlap check: banana* * ** ban* * * Test string for " + "overlap check: banana* * ** ban* * * Test string for overlap check: banana* * ** ban* * *", + "", + ""}, + {1, 1, 1, 1, 0, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + } + + { + targets = + cudf::test::strings_column_wrapper({"01234567890123456789012345678901234567890123456789012345" + "6789012345678901234567890123456789012" + "34567890123456789012345678901234567890123456789012345678" + "9012345678901234567890123456789012345" + "67890123456789012345678901234567890123456789012345678901" + "2345678901234567890123456789012345678" + "90123456789012345678901234567890123456789012345678901234" + "5678901234567890123456789012345678901" + "2345678901234567890123456789", + "78"}); + targets_view = cudf::strings_column_view(targets); + auto repls = cudf::test::strings_column_wrapper({""}); + auto repls_view = cudf::strings_column_view(repls); + + auto results = cudf::strings::replace(strings_view, targets_view, repls_view); + + cudf::test::strings_column_wrapper expected( + {"This string needs to be very long to trigger the long-replace internal functions. " + "This string needs to be very long to trigger the long-replace internal functions. " + "This string needs to be very long to trigger the long-replace internal functions. " + "This string needs to be very long to trigger the long-replace internal functions.", + "", + "", + "Test string for overlap check: bananaápple bananá ápplebananá banápple ápple bananá " + "Test string for overlap check: bananaápple bananá ápplebananá banápple ápple bananá " + "Test string for overlap check: bananaápple bananá ápplebananá banápple ápple bananá " + "Test string for overlap check: bananaápple bananá ápplebananá banápple ápple bananá " + "Test string for overlap check: bananaápple bananá ápplebananá banápple ápple bananá", + "", + ""}, + {1, 1, 1, 1, 0, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + } +} + TEST_F(StringsReplaceTest, EmptyStringsColumn) { cudf::column_view zero_size_strings_column( diff --git a/cpp/tests/structs/utilities_tests.cpp b/cpp/tests/structs/utilities_tests.cpp index e92b96553c0..327fede6126 100644 --- a/cpp/tests/structs/utilities_tests.cpp +++ b/cpp/tests/structs/utilities_tests.cpp @@ -53,9 +53,14 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevel) auto lists_col = lists{{0, 1}, {22, 33}, {44, 55, 66}}; auto nums_col = nums{{0, 1, 2}, cudf::test::iterators::null_at(6)}; - auto table = cudf::table_view{{lists_col, nums_col}}; - auto flattened_table = cudf::structs::detail::flatten_nested_columns( - table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()); + auto table = cudf::table_view{{lists_col, nums_col}}; + auto flattened_table = + cudf::structs::detail::flatten_nested_columns(table, + {}, + {}, + cudf::structs::detail::column_nullability::FORCE, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_TABLES_EQUAL(table, flattened_table->flattened_columns()); } @@ -76,7 +81,8 @@ TYPED_TEST(TypedStructUtilitiesTest, NestedListsUnsupported) {}, {}, cudf::structs::detail::column_nullability::FORCE, - cudf::get_default_stream()), + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()), cudf::logic_error); } @@ -90,9 +96,14 @@ TYPED_TEST(TypedStructUtilitiesTest, NoStructs) {"", "1", "22", "333", "4444", "55555", "666666"}, cudf::test::iterators::null_at(1)}; auto nuther_nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, cudf::test::iterators::null_at(6)}; - auto table = cudf::table_view{{nums_col, strings_col, nuther_nums_col}}; - auto flattened_table = cudf::structs::detail::flatten_nested_columns( - table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()); + auto table = cudf::table_view{{nums_col, strings_col, nuther_nums_col}}; + auto flattened_table = + cudf::structs::detail::flatten_nested_columns(table, + {}, + {}, + cudf::structs::detail::column_nullability::FORCE, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_TABLES_EQUAL(table, flattened_table->flattened_columns()); } @@ -118,8 +129,13 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct) auto expected = cudf::table_view{ {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}}; - auto flattened_table = cudf::structs::detail::flatten_nested_columns( - table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()); + auto flattened_table = + cudf::structs::detail::flatten_nested_columns(table, + {}, + {}, + cudf::structs::detail::column_nullability::FORCE, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns()); } @@ -146,8 +162,13 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls) auto expected = cudf::table_view{ {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}}; - auto flattened_table = cudf::structs::detail::flatten_nested_columns( - table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()); + auto flattened_table = + cudf::structs::detail::flatten_nested_columns(table, + {}, + {}, + cudf::structs::detail::column_nullability::FORCE, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns()); } @@ -185,8 +206,13 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct) expected_nums_col_3, expected_strings_col}}; - auto flattened_table = cudf::structs::detail::flatten_nested_columns( - table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()); + auto flattened_table = + cudf::structs::detail::flatten_nested_columns(table, + {}, + {}, + cudf::structs::detail::column_nullability::FORCE, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns()); } @@ -225,8 +251,13 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel) expected_nums_col_3, expected_strings_col}}; - auto flattened_table = cudf::structs::detail::flatten_nested_columns( - table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()); + auto flattened_table = + cudf::structs::detail::flatten_nested_columns(table, + {}, + {}, + cudf::structs::detail::column_nullability::FORCE, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns()); } @@ -266,8 +297,13 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel) expected_nums_col_3, expected_strings_col}}; - auto flattened_table = cudf::structs::detail::flatten_nested_columns( - table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()); + auto flattened_table = + cudf::structs::detail::flatten_nested_columns(table, + {}, + {}, + cudf::structs::detail::column_nullability::FORCE, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns()); } @@ -307,8 +343,13 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels) expected_nums_col_3, expected_strings_col}}; - auto flattened_table = cudf::structs::detail::flatten_nested_columns( - table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()); + auto flattened_table = + cudf::structs::detail::flatten_nested_columns(table, + {}, + {}, + cudf::structs::detail::column_nullability::FORCE, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns()); } @@ -330,7 +371,8 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported) {}, {}, cudf::structs::detail::column_nullability::FORCE, - cudf::get_default_stream()), + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()), cudf::logic_error); } @@ -346,8 +388,8 @@ TYPED_TEST_SUITE(TypedSuperimposeTest, cudf::test::FixedWidthTypes); void test_non_struct_columns(cudf::column_view const& input) { // push_down_nulls() on non-struct columns should return the input column, unchanged. - auto [superimposed, backing_data] = - cudf::structs::detail::push_down_nulls(input, cudf::get_default_stream()); + auto [superimposed, backing_data] = cudf::structs::detail::push_down_nulls( + input, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(input, superimposed); EXPECT_TRUE(backing_data.new_null_masks.empty()); @@ -410,8 +452,8 @@ TYPED_TEST(TypedSuperimposeTest, BasicStruct) CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_view.child(1), make_lists_member(cudf::test::iterators::nulls_at({4, 5}))); - auto [output, backing_data] = - cudf::structs::detail::push_down_nulls(structs_view, cudf::get_default_stream()); + auto [output, backing_data] = cudf::structs::detail::push_down_nulls( + structs_view, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); // After push_down_nulls(), the struct nulls (i.e. at index-0) should have been pushed // down to the children. All members should have nulls at row-index 0. @@ -436,8 +478,8 @@ TYPED_TEST(TypedSuperimposeTest, NonNullableParentStruct) cudf::test::iterators::no_nulls()} .release(); - auto [output, backing_data] = - cudf::structs::detail::push_down_nulls(structs_input->view(), cudf::get_default_stream()); + auto [output, backing_data] = cudf::structs::detail::push_down_nulls( + structs_input->view(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); // After push_down_nulls(), none of the child structs should have changed, // because the parent had no nulls to begin with. @@ -471,8 +513,8 @@ TYPED_TEST(TypedSuperimposeTest, NestedStruct_ChildNullable_ParentNonNullable) auto structs_of_structs = cudf::test::structs_column_wrapper{std::move(outer_struct_members)}.release(); - auto [output, backing_data] = - cudf::structs::detail::push_down_nulls(structs_of_structs->view(), cudf::get_default_stream()); + auto [output, backing_data] = cudf::structs::detail::push_down_nulls( + structs_of_structs->view(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); // After push_down_nulls(), outer-struct column should not have pushed nulls to child // structs. But the child struct column must push its nulls to its own children. @@ -514,8 +556,8 @@ TYPED_TEST(TypedSuperimposeTest, NestedStruct_ChildNullable_ParentNullable) cudf::detail::set_null_mask( structs_of_structs_view.null_mask(), 1, 2, false, cudf::get_default_stream()); - auto [output, backing_data] = - cudf::structs::detail::push_down_nulls(structs_of_structs->view(), cudf::get_default_stream()); + auto [output, backing_data] = cudf::structs::detail::push_down_nulls( + structs_of_structs->view(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); // After push_down_nulls(), outer-struct column should not have pushed nulls to child // structs. But the child struct column must push its nulls to its own children. @@ -570,8 +612,8 @@ TYPED_TEST(TypedSuperimposeTest, Struct_Sliced) // nums_member: 11011 // lists_member: 00111 - auto [output, backing_data] = - cudf::structs::detail::push_down_nulls(sliced_structs, cudf::get_default_stream()); + auto [output, backing_data] = cudf::structs::detail::push_down_nulls( + sliced_structs, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); // After push_down_nulls(), the null masks should be: // STRUCT: 11110 @@ -623,8 +665,8 @@ TYPED_TEST(TypedSuperimposeTest, NestedStruct_Sliced) // nums_member: 11010 // lists_member: 00110 - auto [output, backing_data] = - cudf::structs::detail::push_down_nulls(sliced_structs, cudf::get_default_stream()); + auto [output, backing_data] = cudf::structs::detail::push_down_nulls( + sliced_structs, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); // After push_down_nulls(), the null masks will be: // STRUCT: 11101 diff --git a/cpp/tests/table/table_view_tests.cu b/cpp/tests/table/table_view_tests.cu index 0542d007ca0..5127f69162f 100644 --- a/cpp/tests/table/table_view_tests.cu +++ b/cpp/tests/table/table_view_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,8 +47,8 @@ void row_comparison(cudf::table_view input1, auto device_table_1 = cudf::table_device_view::create(input1, stream); auto device_table_2 = cudf::table_device_view::create(input2, stream); - auto d_column_order = - cudf::detail::make_device_uvector_sync(column_order, cudf::get_default_stream()); + auto d_column_order = cudf::detail::make_device_uvector_sync( + column_order, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto comparator = cudf::row_lexicographic_comparator( cudf::nullate::NO{}, *device_table_1, *device_table_2, d_column_order.data()); diff --git a/cpp/tests/types/type_dispatcher_test.cu b/cpp/tests/types/type_dispatcher_test.cu index 911911851f2..a27d8931ee6 100644 --- a/cpp/tests/types/type_dispatcher_test.cu +++ b/cpp/tests/types/type_dispatcher_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,7 +69,8 @@ __global__ void dispatch_test_kernel(cudf::type_id id, bool* d_result) TYPED_TEST(TypedDispatcherTest, DeviceDispatch) { - auto result = cudf::detail::make_zeroed_device_uvector_sync(1, cudf::get_default_stream()); + auto result = cudf::detail::make_zeroed_device_uvector_sync( + 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); dispatch_test_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>( cudf::type_to_id(), result.data()); CUDF_CUDA_TRY(cudaDeviceSynchronize()); @@ -130,7 +131,8 @@ __global__ void double_dispatch_test_kernel(cudf::type_id id1, cudf::type_id id2 TYPED_TEST(TypedDoubleDispatcherTest, DeviceDoubleDispatch) { - auto result = cudf::detail::make_zeroed_device_uvector_sync(1, cudf::get_default_stream()); + auto result = cudf::detail::make_zeroed_device_uvector_sync( + 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); double_dispatch_test_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>( cudf::type_to_id(), cudf::type_to_id(), result.data()); CUDF_CUDA_TRY(cudaDeviceSynchronize()); diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 6c441539621..133ca99b31f 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -371,55 +372,56 @@ struct column_property_comparator { } }; +template class corresponding_rows_unequal { public: - corresponding_rows_unequal(table_device_view d_lhs, - table_device_view d_rhs, - column_device_view lhs_row_indices_, + corresponding_rows_unequal(column_device_view lhs_row_indices_, column_device_view rhs_row_indices_, - size_type /*fp_ulps*/) - : comp(cudf::nullate::YES{}, d_lhs, d_rhs, cudf::null_equality::EQUAL), - lhs_row_indices(lhs_row_indices_), - rhs_row_indices(rhs_row_indices_) + size_type /*fp_ulps*/, + DeviceComparator comp_, + column_device_view /*lhs*/, + column_device_view /*rhs*/) + : lhs_row_indices(lhs_row_indices_), rhs_row_indices(rhs_row_indices_), comp(comp_) { } - cudf::row_equality_comparator comp; - __device__ bool operator()(size_type index) { - return !comp(lhs_row_indices.element(index), - rhs_row_indices.element(index)); + using cudf::experimental::row::lhs_index_type; + using cudf::experimental::row::rhs_index_type; + + return !comp(lhs_index_type{lhs_row_indices.element(index)}, + rhs_index_type{rhs_row_indices.element(index)}); } column_device_view lhs_row_indices; column_device_view rhs_row_indices; + DeviceComparator comp; }; +template class corresponding_rows_not_equivalent { - table_device_view d_lhs; - table_device_view d_rhs; - column_device_view lhs_row_indices; column_device_view rhs_row_indices; - size_type const fp_ulps; + DeviceComparator comp; + column_device_view lhs; + column_device_view rhs; public: - corresponding_rows_not_equivalent(table_device_view d_lhs, - table_device_view d_rhs, - column_device_view lhs_row_indices_, + corresponding_rows_not_equivalent(column_device_view lhs_row_indices_, column_device_view rhs_row_indices_, - size_type fp_ulps_) - : d_lhs(d_lhs), - d_rhs(d_rhs), - comp(cudf::nullate::YES{}, d_lhs, d_rhs, null_equality::EQUAL), - lhs_row_indices(lhs_row_indices_), + size_type fp_ulps_, + DeviceComparator comp_, + column_device_view lhs_, + column_device_view rhs_) + : lhs_row_indices(lhs_row_indices_), rhs_row_indices(rhs_row_indices_), - fp_ulps(fp_ulps_) + fp_ulps(fp_ulps_), + comp(comp_), + lhs(lhs_), + rhs(rhs_) { - CUDF_EXPECTS(d_lhs.num_columns() == 1 and d_rhs.num_columns() == 1, - "Unsupported number of columns"); } struct typed_element_not_equivalent { @@ -459,23 +461,17 @@ class corresponding_rows_not_equivalent { } }; - cudf::row_equality_comparator comp; - __device__ bool operator()(size_type index) { + using cudf::experimental::row::lhs_index_type; + using cudf::experimental::row::rhs_index_type; + auto const lhs_index = lhs_row_indices.element(index); auto const rhs_index = rhs_row_indices.element(index); - if (not comp(lhs_index, rhs_index)) { - auto lhs_col = this->d_lhs.column(0); - auto rhs_col = this->d_rhs.column(0); - return type_dispatcher(lhs_col.type(), - typed_element_not_equivalent{}, - lhs_col, - rhs_col, - lhs_index, - rhs_index, - fp_ulps); + if (not comp(lhs_index_type{lhs_index}, rhs_index_type{rhs_index})) { + return type_dispatcher( + lhs.type(), typed_element_not_equivalent{}, lhs, rhs, lhs_index, rhs_index, fp_ulps); } return false; } @@ -536,25 +532,46 @@ struct column_comparator_impl { size_type fp_ulps, int depth) { - auto d_lhs = cudf::table_device_view::create(table_view{{lhs}}); - auto d_rhs = cudf::table_device_view::create(table_view{{rhs}}); - auto d_lhs_row_indices = cudf::column_device_view::create(lhs_row_indices); auto d_rhs_row_indices = cudf::column_device_view::create(rhs_row_indices); - using ComparatorType = std::conditional_t; + auto d_lhs = cudf::column_device_view::create(lhs); + auto d_rhs = cudf::column_device_view::create(rhs); + + auto lhs_tview = table_view{{lhs}}; + auto rhs_tview = table_view{{rhs}}; + + auto const comparator = cudf::experimental::row::equality::two_table_comparator{ + lhs_tview, rhs_tview, cudf::get_default_stream()}; + auto const has_nulls = cudf::has_nulls(lhs_tview) or cudf::has_nulls(rhs_tview); + + auto const device_comparator = comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}); + + using ComparatorType = + std::conditional_t, + corresponding_rows_not_equivalent>; auto differences = rmm::device_uvector( - lhs.size(), cudf::get_default_stream()); // worst case: everything different + lhs_row_indices.size(), cudf::get_default_stream()); // worst case: everything different auto input_iter = thrust::make_counting_iterator(0); - auto diff_iter = thrust::copy_if( + + auto diff_map = rmm::device_uvector(lhs_row_indices.size(), cudf::get_default_stream()); + + thrust::transform( rmm::exec_policy(cudf::get_default_stream()), input_iter, input_iter + lhs_row_indices.size(), - differences.begin(), - ComparatorType(*d_lhs, *d_rhs, *d_lhs_row_indices, *d_rhs_row_indices, fp_ulps)); + diff_map.begin(), + ComparatorType( + *d_lhs_row_indices, *d_rhs_row_indices, fp_ulps, device_comparator, *d_lhs, *d_rhs)); + + auto diff_iter = thrust::copy_if(rmm::exec_policy(cudf::get_default_stream()), + input_iter, + input_iter + lhs_row_indices.size(), + diff_map.begin(), + differences.begin(), + thrust::identity{}); differences.resize(thrust::distance(differences.begin(), diff_iter), cudf::get_default_stream()); // shrink back down diff --git a/cpp/tests/utilities/default_stream.cpp b/cpp/tests/utilities/default_stream.cpp new file mode 100644 index 00000000000..52752f78bb9 --- /dev/null +++ b/cpp/tests/utilities/default_stream.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +namespace cudf { +namespace test { + +rmm::cuda_stream_view const get_default_stream() { return cudf::get_default_stream(); } + +} // namespace test +} // namespace cudf diff --git a/cpp/tests/utilities/identify_stream_usage.cpp b/cpp/tests/utilities/identify_stream_usage.cpp index 87301a7d49d..a4d19a8f552 100644 --- a/cpp/tests/utilities/identify_stream_usage.cpp +++ b/cpp/tests/utilities/identify_stream_usage.cpp @@ -19,18 +19,58 @@ #include +#include #include #include #include #include #include +#include #include -/** - * @brief Print a backtrace and raise an error if stream is a default stream. - */ -void check_stream_and_error(cudaStream_t stream) +// We control whether to override cudf::test::get_default_stream or +// cudf::get_default_stream with a compile-time flag. Thesee are the two valid +// options: +// 1. STREAM_MODE_TESTING=OFF: In this mode, cudf::get_default_stream will +// return a custom stream and stream_is_invalid will return true if any CUDA +// API is called using any of CUDA's default stream constants +// (cudaStreamLegacy, cudaStreamDefault, or cudaStreamPerThread). This check +// is sufficient to ensure that cudf is using cudf::get_default_stream +// everywhere internally rather than implicitly using stream 0, +// cudaStreamDefault, cudaStreamLegacy, thrust execution policies, etc. It +// is not sufficient to guarantee a stream-ordered API because it will not +// identify places in the code that use cudf::get_default_stream instead of +// properly forwarding along a user-provided stream. +// 2. STREAM_MODE_TESTING=ON: In this mode, cudf::test::get_default_stream +// returns a custom stream and stream_is_invalid returns true if any CUDA +// API is called using any stream other than cudf::test::get_default_stream. +// This is a necessary and sufficient condition to ensure that libcudf is +// properly passing streams through all of its (tested) APIs. + +namespace cudf { + +#ifdef STREAM_MODE_TESTING +namespace test { +#endif + +rmm::cuda_stream_view const get_default_stream() { + static rmm::cuda_stream stream{}; + return {stream}; +} + +#ifdef STREAM_MODE_TESTING +} // namespace test +#endif + +} // namespace cudf + +bool stream_is_invalid(cudaStream_t stream) +{ +#ifdef STREAM_MODE_TESTING + // In this mode the _only_ valid stream is the one returned by cudf::test::get_default_stream. + return (stream != cudf::test::get_default_stream().value()); +#else // We explicitly list the possibilities rather than using // `cudf::get_default_stream().value()` for two reasons: // 1. There is no guarantee that `thrust::device` and the default value of @@ -39,8 +79,17 @@ void check_stream_and_error(cudaStream_t stream) // 2. Using the cudf default stream would require linking against cudf, which // adds unnecessary complexity to the build process (especially in CI) // when this simple approach is sufficient. - if (stream == cudaStreamDefault || (stream == cudaStreamLegacy) || - (stream == cudaStreamPerThread)) { + return (stream == cudaStreamDefault) || (stream == cudaStreamLegacy) || + (stream == cudaStreamPerThread); +#endif +} + +/** + * @brief Print a backtrace and raise an error if stream is a default stream. + */ +void check_stream_and_error(cudaStream_t stream) +{ + if (stream_is_invalid(stream)) { #ifdef __GNUC__ // If we're on the wrong stream, print the stack trace from the current frame. // Adapted from from https://panthema.net/2008/0901-stacktrace-demangled/ @@ -109,7 +158,12 @@ void check_stream_and_error(cudaStream_t stream) #else std::cout << "Backtraces are only when built with a GNU compiler." << std::endl; #endif // __GNUC__ - throw std::runtime_error("Found unexpected default stream!"); + char const* env_stream_error_mode{std::getenv("GTEST_CUDF_STREAM_ERROR_MODE")}; + if (env_stream_error_mode && !strcmp(env_stream_error_mode, "print")) { + std::cout << "Found unexpected stream!" << std::endl; + } else { + throw std::runtime_error("Found unexpected stream!"); + } } } @@ -289,23 +343,6 @@ DEFINE_OVERLOAD(cudaMallocFromPoolAsync, ARG(void** ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream), ARG(ptr, size, memPool, stream)); -namespace cudf { - -/** - * @brief Get the current default stream - * - * Overload the default function to return a new stream here. - * - * @return The current default stream. - */ -rmm::cuda_stream_view const get_default_stream() -{ - static rmm::cuda_stream stream{}; - return {stream}; -} - -} // namespace cudf - /** * @brief Function to collect all the original CUDA symbols corresponding to overloaded functions. * diff --git a/cpp/tests/utilities/tdigest_utilities.cu b/cpp/tests/utilities/tdigest_utilities.cu index 15998e32bd0..d2e95812894 100644 --- a/cpp/tests/utilities/tdigest_utilities.cu +++ b/cpp/tests/utilities/tdigest_utilities.cu @@ -64,12 +64,12 @@ void tdigest_sample_compare(cudf::tdigest::tdigest_column_view const& tdv, }); } - auto d_expected_src = - cudf::detail::make_device_uvector_async(h_expected_src, cudf::get_default_stream()); - auto d_expected_mean = - cudf::detail::make_device_uvector_async(h_expected_mean, cudf::get_default_stream()); - auto d_expected_weight = - cudf::detail::make_device_uvector_async(h_expected_weight, cudf::get_default_stream()); + auto d_expected_src = cudf::detail::make_device_uvector_async( + h_expected_src, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto d_expected_mean = cudf::detail::make_device_uvector_async( + h_expected_mean, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto d_expected_weight = cudf::detail::make_device_uvector_async( + h_expected_weight, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto iter = thrust::make_counting_iterator(0); thrust::for_each( diff --git a/cpp/tests/utilities_tests/span_tests.cu b/cpp/tests/utilities_tests/span_tests.cu index a043e723eda..66f9fbfc0d6 100644 --- a/cpp/tests/utilities_tests/span_tests.cu +++ b/cpp/tests/utilities_tests/span_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -238,8 +238,8 @@ __global__ void simple_device_kernel(device_span result) { result[0] = tru TEST(SpanTest, CanUseDeviceSpan) { - auto d_message = - cudf::detail::make_zeroed_device_uvector_async(1, cudf::get_default_stream()); + auto d_message = cudf::detail::make_zeroed_device_uvector_async( + 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_span = device_span(d_message.data(), d_message.size()); diff --git a/cpp/tests/utilities_tests/type_check_tests.cpp b/cpp/tests/utilities_tests/type_check_tests.cpp index 84a2d15d477..f65c3652dc9 100644 --- a/cpp/tests/utilities_tests/type_check_tests.cpp +++ b/cpp/tests/utilities_tests/type_check_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -147,6 +147,7 @@ TEST_F(ColumnTypeCheckTest, DifferentFixedWidth) fixed_point_column_wrapper rhs5({10000}, numeric::scale_type{0}); EXPECT_FALSE(column_types_equal(lhs5, rhs5)); + EXPECT_TRUE(column_types_equivalent(lhs5, rhs5)); // Different rep, same scale fixed_point_column_wrapper lhs6({10000}, numeric::scale_type{-1}); diff --git a/dependencies.yaml b/dependencies.yaml index 7b623d58425..1bd664fc57d 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -8,31 +8,43 @@ files: includes: - build_all - build_cpp + - build_wheels - build_python + - build_python_common - cudatoolkit - develop - docs - notebooks - py_version - - run + - run_common + - run_cudf + - run_dask_cudf + - run_cudf_kafka + - run_custreamz - test_cpp - - test_python + - test_python_common + - test_python_cudf + - test_python_dask_cudf test_cpp: output: none includes: - cudatoolkit - - libidentify_stream_usage_build - test_cpp + - libarrow_run test_python: output: none includes: - cudatoolkit - py_version - - test_python + - test_python_common + - test_python_cudf + - test_python_dask_cudf + - pyarrow_run test_java: output: none includes: - build_all + - libarrow_run - cudatoolkit - test_java test_notebooks: @@ -51,6 +63,103 @@ files: - cudatoolkit - docs - py_version + py_build_cudf: + output: pyproject + pyproject_dir: python/cudf + extras: + table: build-system + includes: + - build_all + - build_python + - build_python_common + - build_wheels + py_run_cudf: + output: pyproject + pyproject_dir: python/cudf + extras: + table: project + includes: + - run_common + - run_cudf + - pyarrow_run + py_test_cudf: + output: pyproject + pyproject_dir: python/cudf + extras: + table: project.optional-dependencies + key: test + includes: + - test_python_common + - test_python_cudf + py_build_dask_cudf: + output: pyproject + pyproject_dir: python/dask_cudf + extras: + table: build-system + includes: + - build_wheels + py_run_dask_cudf: + output: pyproject + pyproject_dir: python/dask_cudf + extras: + table: project + includes: + - run_common + - run_dask_cudf + py_test_dask_cudf: + output: pyproject + pyproject_dir: python/dask_cudf + extras: + table: project.optional-dependencies + key: test + includes: + - test_python_common + - test_python_dask_cudf + py_build_cudf_kafka: + output: pyproject + pyproject_dir: python/cudf_kafka + extras: + table: build-system + includes: + - build_wheels + - build_python_common + py_run_cudf_kafka: + output: pyproject + pyproject_dir: python/cudf_kafka + extras: + table: project + includes: + - run_cudf_kafka + py_test_cudf_kafka: + output: pyproject + pyproject_dir: python/cudf_kafka + extras: + table: project.optional-dependencies + key: test + includes: + - test_python_common + py_build_custreamz: + output: pyproject + pyproject_dir: python/custreamz + extras: + table: build-system + includes: + - build_wheels + py_run_custreamz: + output: pyproject + pyproject_dir: python/custreamz + extras: + table: project + includes: + - run_custreamz + py_test_custreamz: + output: pyproject + pyproject_dir: python/custreamz + extras: + table: project.optional-dependencies + key: test + includes: + - test_python_common channels: - rapidsai - rapidsai-nightly @@ -61,29 +170,28 @@ channels: dependencies: build_all: common: - - output_types: [conda, requirements] + - output_types: [conda, requirements, pyproject] packages: - &cmake_ver cmake>=3.23.1,!=3.25.0 - - dlpack>=0.5,<0.6.0a0 - ninja - output_types: conda packages: - - libarrow=10 - c-compiler - cxx-compiler + - dlpack>=0.5,<0.6.0a0 specific: - output_types: conda matrices: - matrix: arch: x86_64 packages: - - &gcc_amd64 gcc_linux-64=11.* - - &sysroot_amd64 sysroot_linux-64==2.17 + - gcc_linux-64=11.* + - sysroot_linux-64==2.17 - matrix: arch: aarch64 packages: - - &gcc_aarch64 gcc_linux-aarch64=11.* - - &sysroot_aarch64 sysroot_linux-aarch64==2.17 + - gcc_linux-aarch64=11.* + - sysroot_linux-aarch64==2.17 - output_types: conda matrices: - matrix: @@ -100,24 +208,56 @@ dependencies: common: - output_types: [conda, requirements] packages: - - librmm=23.04.* + - librmm==23.4.* - output_types: conda packages: - fmt>=9.1.0,<10 + - >est gtest==1.10.0.* + - &gmock gmock==1.10.0.* + # Hard pin the patch version used during the build. This must be kept + # in sync with the version pinned in get_arrow.cmake. + - libarrow==10.0.1.* - librdkafka=1.7.0 - spdlog>=1.11.0,<1.12 - build_python: + build_wheels: common: - - output_types: [conda, requirements] + - output_types: pyproject + packages: + - wheel + - setuptools + build_python_common: + common: + - output_types: [conda, requirements, pyproject] packages: - - cuda-python>=11.7.1,<12.0 - cython>=0.29,<0.30 - - pyarrow=10 - - rmm=23.04.* + # Hard pin the patch version used during the build. This must be kept + # in sync with the version pinned in get_arrow.cmake. + - pyarrow==10.0.1.* + - numpy>=1.21 + build_python: + common: + - output_types: [conda, requirements, pyproject] + packages: - scikit-build>=0.13.1 + - rmm==23.4.* - output_types: conda packages: - - protobuf>=4.21.6,<4.22 + - &protobuf protobuf>=4.21.6,<4.22 + - output_types: pyproject + packages: + - protoc-wheel + libarrow_run: + common: + - output_types: [conda, requirements] + packages: + # Allow runtime version to float up to minor version + - libarrow==10.* + pyarrow_run: + common: + - output_types: [conda, requirements, pyproject] + packages: + # Allow runtime version to float up to minor version + - pyarrow==10.* cudatoolkit: specific: - output_types: conda @@ -161,66 +301,6 @@ dependencies: - sphinx-copybutton - sphinx-markdown-tables - sphinxcontrib-websupport - libidentify_stream_usage_build: - common: - - output_types: conda - packages: - - *cmake_ver - specific: - - output_types: conda - matrices: - - matrix: - arch: x86_64 - packages: - - *gcc_amd64 - - *sysroot_amd64 - - matrix: - arch: aarch64 - packages: - - *gcc_aarch64 - - *sysroot_aarch64 - - output_types: conda - matrices: - - matrix: - arch: x86_64 - cuda: "11.2" - packages: - - nvcc_linux-64=11.2 - - matrix: - arch: aarch64 - cuda: "11.2" - packages: - - nvcc_linux-aarch64=11.2 - - matrix: - arch: x86_64 - cuda: "11.4" - packages: - - nvcc_linux-64=11.4 - - matrix: - arch: aarch64 - cuda: "11.4" - packages: - - nvcc_linux-aarch64=11.4 - - matrix: - arch: x86_64 - cuda: "11.5" - packages: - - nvcc_linux-64=11.5 - - matrix: - arch: aarch64 - cuda: "11.5" - packages: - - nvcc_linux-aarch64=11.5 - - matrix: - arch: x86_64 - cuda: "11.8" - packages: - - nvcc_linux-64=11.8 - - matrix: - arch: aarch64 - cuda: "11.8" - packages: - - nvcc_linux-aarch64=11.8 notebooks: common: - output_types: [conda, requirements] @@ -247,22 +327,25 @@ dependencies: - matrix: packages: - python>=3.8,<3.11 - run: + run_common: common: - - output_types: [conda, requirements] + - output_types: [conda, requirements, pyproject] packages: - - cachetools - - dask>=2023.1.1 - - distributed>=2023.1.1 - fsspec>=0.6.0 - - numba>=0.56.2 - numpy>=1.21 + - pandas>=1.3,<1.6.0dev0 + run_cudf: + common: + - output_types: [conda, requirements, pyproject] + packages: + - cachetools + - cuda-python>=11.7.1,<12.0 + - &numba numba>=0.56.4,<0.57 - nvtx>=0.2.1 - packaging - - pandas>=1.3,<1.6.0dev0 - - python-confluent-kafka=1.7.0 - - streamz + - rmm==23.4.* - typing_extensions + - *protobuf - output_types: conda packages: - cubinlinker @@ -271,7 +354,6 @@ dependencies: - pip: - git+https://github.com/python-streamz/streamz.git@master - ptxcompiler - - rmm=23.04.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -280,7 +362,11 @@ dependencies: - cubinlinker-cu11 - git+https://github.com/python-streamz/streamz.git@master - ptxcompiler-cu11 - - rmm-cu11=23.04.* + - output_types: pyproject + packages: + - cubinlinker + - &cupy_pip cupy-cuda11x>=9.5.0,<12.0.0a0 + - ptxcompiler specific: - output_types: requirements matrices: @@ -292,7 +378,41 @@ dependencies: arch: aarch64 packages: - cupy-cuda11x -f https://pip.cupy.dev/aarch64 # TODO: Verify that this works. + run_dask_cudf: + common: + - output_types: [conda, requirements, pyproject] + packages: + - dask>=2023.1.1 + - distributed>=2023.1.1 + - output_types: pyproject + packages: + - &cudf cudf==23.4.* + - *cupy_pip + run_cudf_kafka: + common: + - output_types: conda + packages: + - python-confluent-kafka==1.7.0 + - output_types: [requirements, pyproject] + packages: + - *cudf + - confluent-kafka==1.7.0 + run_custreamz: + common: + - output_types: [conda, requirements, pyproject] + packages: + - streamz + - output_types: [requirements, pyproject] + packages: + - *cudf + - cudf_kafka==23.4.* test_cpp: + common: + - output_types: conda + packages: + - *cmake_ver + - *gtest + - *gmock specific: - output_types: conda matrices: @@ -320,27 +440,39 @@ dependencies: cuda: "11.8" packages: - cuda-nvtx=11.8 - test_python: + test_python_common: common: - - output_types: [conda, requirements] + - output_types: [conda, requirements, pyproject] + packages: + - pytest + - pytest-cov + - pytest-xdist + test_python_cudf: + common: + - output_types: [conda, requirements, pyproject] packages: - - aiobotocore>=2.2.0 - - boto3>=1.21.21 - - botocore>=1.24.21 - - dask-cuda=23.04.* - fastavro>=0.22.9 - hypothesis - mimesis>=4.1.0 - - moto>=4.0.8 - pyorc - - pytest - pytest-benchmark - pytest-cases - - pytest-cov - - pytest-xdist - python-snappy>=0.6.0 - - s3fs>=2022.3.0 - scipy + - output_types: conda + packages: + - aiobotocore>=2.2.0 + - boto3>=1.21.21 + - botocore>=1.24.21 + - msgpack-python + - moto>=4.0.8 + - s3fs>=2022.3.0 + - output_types: pyproject + packages: + - msgpack + - &tokenizers tokenizers==0.13.1 + - &transformers transformers==4.24.0 + - tzdata specific: - output_types: conda matrices: @@ -352,7 +484,13 @@ dependencies: - pytorch<1.12.0 # We only install these on x86_64 to avoid pulling pytorch as a # dependency of transformers. - - tokenizers==0.13.1 - - transformers==4.24.0 + - *tokenizers + - *transformers - matrix: packages: + test_python_dask_cudf: + common: + - output_types: [conda, requirements, pyproject] + packages: + - dask-cuda==23.4.* + - *numba diff --git a/docs/cudf/source/api_docs/general_functions.rst b/docs/cudf/source/api_docs/general_functions.rst index 112df2fdf9f..5c28b4e7e85 100644 --- a/docs/cudf/source/api_docs/general_functions.rst +++ b/docs/cudf/source/api_docs/general_functions.rst @@ -27,6 +27,7 @@ Top-level conversions cudf.to_numeric cudf.from_dlpack + cudf.from_pandas Top-level dealing with datetimelike ----------------------------------- diff --git a/docs/dask_cudf/Makefile b/docs/dask_cudf/Makefile new file mode 100644 index 00000000000..d0c3cbf1020 --- /dev/null +++ b/docs/dask_cudf/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/dask_cudf/make.bat b/docs/dask_cudf/make.bat new file mode 100644 index 00000000000..747ffb7b303 --- /dev/null +++ b/docs/dask_cudf/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/dask_cudf/source/_static/RAPIDS-logo-purple.png b/docs/dask_cudf/source/_static/RAPIDS-logo-purple.png new file mode 100644 index 00000000000..d884e01374d Binary files /dev/null and b/docs/dask_cudf/source/_static/RAPIDS-logo-purple.png differ diff --git a/docs/dask_cudf/source/api.rst b/docs/dask_cudf/source/api.rst new file mode 100644 index 00000000000..893f5dd7434 --- /dev/null +++ b/docs/dask_cudf/source/api.rst @@ -0,0 +1,79 @@ +=============== + API reference +=============== + +This page provides a list of all publicly accessible modules, methods, +and classes in the ``dask_cudf`` namespace. + + +Creating and storing DataFrames +=============================== + +:doc:`Like Dask `, Dask-cuDF supports creation +of DataFrames from a variety of storage formats. For on-disk data that +are not supported directly in Dask-cuDF, we recommend using Dask's +data reading facilities, followed by calling +:func:`.from_dask_dataframe` to obtain a Dask-cuDF object. + +.. automodule:: dask_cudf + :members: + from_cudf, + from_dask_dataframe, + read_csv, + read_json, + read_orc, + to_orc, + read_text, + read_parquet + +.. warning:: + + FIXME: where should the following live? + + .. autofunction:: dask_cudf.concat + + .. autofunction:: dask_cudf.from_delayed + +Grouping +======== + +As discussed in the :doc:`Dask documentation for groupby +`, ``groupby``, ``join``, and ``merge``, and +similar operations that require matching up rows of a DataFrame become +significantly more challenging in a parallel setting than they are in +serial. Dask-cuDF has the same challenges, however for certain groupby +operations, we can take advantage of functionality in cuDF that allows +us to compute multiple aggregations at once. There are therefore two +interfaces to grouping in Dask-cuDF, the general +:meth:`DataFrame.groupby` which returns a +:class:`.CudfDataFrameGroupBy` object, and a specialized +:func:`.groupby_agg`. Generally speaking, you should not need to call +:func:`.groupby_agg` directly, since Dask-cuDF will arrange to call it +if possible. + +.. autoclass:: dask_cudf.groupby.CudfDataFrameGroupBy + :members: + :inherited-members: + :show-inheritance: + +.. autofunction:: dask_cudf.groupby_agg + + +DataFrames and Series +===================== + +The core distributed objects provided by Dask-cuDF are the +:class:`.DataFrame` and :class:`.Series`. These inherit respectively +from :class:`dask.dataframe.DataFrame` and +:class:`dask.dataframe.Series`, and so the API is essentially +identical. The full API is provided below. + +.. autoclass:: dask_cudf.DataFrame + :members: + :inherited-members: + :show-inheritance: + +.. autoclass:: dask_cudf.Series + :members: + :inherited-members: + :show-inheritance: diff --git a/docs/dask_cudf/source/conf.py b/docs/dask_cudf/source/conf.py new file mode 100644 index 00000000000..1341e7fd9e7 --- /dev/null +++ b/docs/dask_cudf/source/conf.py @@ -0,0 +1,82 @@ +# Copyright (c) 2018-2023, NVIDIA CORPORATION. + +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "dask-cudf" +copyright = "2018-2023, NVIDIA Corporation" +author = "NVIDIA Corporation" +version = "23.04" +release = "23.04.00" + +language = "en" + + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "sphinx.ext.intersphinx", + "sphinx.ext.autodoc", + "sphinx_copybutton", + "numpydoc", + "IPython.sphinxext.ipython_console_highlighting", + "IPython.sphinxext.ipython_directive", + "myst_nb", +] + +templates_path = ["_templates"] +exclude_patterns = [] + +copybutton_prompt_text = ">>> " + +# Enable automatic generation of systematic, namespaced labels for sections +myst_heading_anchors = 2 + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "pydata_sphinx_theme" +html_logo = "_static/RAPIDS-logo-purple.png" +htmlhelp_basename = "dask-cudfdoc" +html_use_modindex = True + +html_static_path = ["_static"] + +pygments_style = "sphinx" + +html_theme_options = { + "external_links": [], + "github_url": "https://github.com/rapidsai/cudf", + "twitter_url": "https://twitter.com/rapidsai", + "show_toc_level": 1, + "navbar_align": "right", +} +include_pandas_compat = True + +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "cupy": ("https://docs.cupy.dev/en/stable/", None), + "numpy": ("https://numpy.org/doc/stable", None), + "pyarrow": ("https://arrow.apache.org/docs/", None), + "cudf": ("https://docs.rapids.ai/api/cudf/stable/", None), + "dask": ("https://docs.dask.org/en/stable/", None), + "pandas": ("https://pandas.pydata.org/docs/", None), +} + +numpydoc_show_inherited_class_members = True +numpydoc_class_members_toctree = False +numpydoc_attributes_as_param_list = False + + +def setup(app): + app.add_css_file("https://docs.rapids.ai/assets/css/custom.css") + app.add_js_file( + "https://docs.rapids.ai/assets/js/custom.js", loading_method="defer" + ) diff --git a/docs/dask_cudf/source/index.rst b/docs/dask_cudf/source/index.rst new file mode 100644 index 00000000000..0442ab0929a --- /dev/null +++ b/docs/dask_cudf/source/index.rst @@ -0,0 +1,112 @@ +.. dask-cudf documentation coordinating file, created by + sphinx-quickstart on Mon Feb 6 18:48:11 2023. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to dask-cudf's documentation! +===================================== + +Dask-cuDF is an extension library for the `Dask `__ +parallel computing framework that provides a `cuDF +`__-backed distributed +dataframe with the same API as `Dask dataframes +`__. + +If you are familiar with Dask and `pandas `__ or +`cuDF `__, then Dask-cuDF +should feel familiar to you. If not, we recommend starting with `10 +minutes to Dask +`__ followed +by `10 minutes to cuDF and Dask-cuDF +`__. + +When running on multi-GPU systems, `Dask-CUDA +`__ is recommended to +simplify the setup of the cluster, taking advantage of all features of +the GPU and networking hardware. + +Using Dask-cuDF +--------------- + +When installed, Dask-cuDF registers itself as a dataframe backend for +Dask. This means that in many cases, using cuDF-backed dataframes requires +only small changes to an existing workflow. The minimal change is to +select cuDF as the dataframe backend in :doc:`Dask's +configuration `. To do so, we must set the option +``dataframe.backend`` to ``cudf``. From Python, this can be achieved +like so:: + + import dask + + dask.config.set({"dataframe.backend": "cudf"}) + +Alternatively, you can set ``DASK_DATAFRAME__BACKEND=cudf`` in the +environment before running your code. + +Dataframe creation from on-disk formats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your workflow creates Dask dataframes from on-disk formats +(for example using :func:`dask.dataframe.read_parquet`), then setting +the backend may well be enough to migrate your workflow. + +For example, consider reading a dataframe from parquet:: + + import dask.dataframe as dd + + # By default, we obtain a pandas-backed dataframe + df = dd.read_parquet("data.parquet", ...) + + +To obtain a cuDF-backed dataframe, we must set the +``dataframe.backend`` configuration option:: + + import dask + import dask.dataframe as dd + + dask.config.set({"dataframe.backend": "cudf"}) + # This gives us a cuDF-backed dataframe + df = dd.read_parquet("data.parquet", ...) + +This code will use cuDF's GPU-accelerated :func:`parquet reader +` to read partitions of the data. + +Dataframe creation from in-memory formats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you already have a dataframe in memory and want to convert it to a +cuDF-backend one, there are two options depending on whether the +dataframe is already a Dask one or not. If you have a Dask dataframe, +then you can call :func:`dask.dataframe.to_backend` passing ``"cudf"`` +as the backend; if you have a pandas dataframe then you can either +call :func:`dask.dataframe.from_pandas` followed by +:func:`~dask.dataframe.to_backend` or first convert the dataframe with +:func:`cudf.from_pandas` and then parallelise this with +:func:`dask_cudf.from_cudf`. + +API Reference +------------- + +Generally speaking, Dask-cuDF tries to offer exactly the same API as +Dask itself. There are, however, some minor differences mostly because +cuDF does not :doc:`perfectly mirror ` +the pandas API, or because cuDF provides additional configuration +flags (these mostly occur in data reading and writing interfaces). + +As a result, straightforward workflows can be migrated without too +much trouble, but more complex ones that utilise more features may +need a bit of tweaking. The API documentation describes details of the +differences and all functionality that Dask-cuDF supports. + +.. toctree:: + :maxdepth: 2 + + api + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 84183819854..7d93438d72e 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -2914,6 +2914,41 @@ public final ColumnVector stringReplace(Scalar target, Scalar replace) { replace.getScalarHandle())); } + /** + * Returns a new strings column where target strings with each string are replaced with + * corresponding replacement strings. For each string in the column, the list of targets + * is searched within that string. If a target string is found, it is replaced by the + * corresponding entry in the repls column. All occurrences found in each string are replaced. + * The repls argument can optionally contain a single string. In this case, all matching + * target substrings will be replaced by that single string. + * + * Example: + * cv = ["hello", "goodbye"] + * targets = ["e","o"] + * repls = ["EE","OO"] + * r1 = cv.stringReplace(targets, repls) + * r1 is now ["hEEllO", "gOOOOdbyEE"] + * + * targets = ["e", "o"] + * repls = ["_"] + * r2 = cv.stringReplace(targets, repls) + * r2 is now ["h_ll_", "g__dby_"] + * + * @param targets Strings to search for in each string. + * @param repls Corresponding replacement strings for target strings. + * @return A new java column vector containing the replaced strings. + */ + public final ColumnVector stringReplace(ColumnView targets, ColumnView repls) { + assert type.equals(DType.STRING) : "column type must be a String"; + assert targets != null : "target list may not be null"; + assert targets.getType().equals(DType.STRING) : "target list must be a string column"; + assert repls != null : "replacement list may not be null"; + assert repls.getType().equals(DType.STRING) : "replacement list must be a string column"; + + return new ColumnVector(stringReplaceMulti(getNativeView(), targets.getNativeView(), + repls.getNativeView())); + } + /** * For each string, replaces any character sequence matching the given pattern using the * replacement string scalar. @@ -4170,6 +4205,14 @@ private static native long substringColumn(long columnView, long startColumn, lo */ private static native long stringReplace(long columnView, long target, long repl) throws CudfException; + /** + * Native method to replace target strings by corresponding repl strings. + * @param inputCV native handle of the cudf::column_view being operated on. + * @param targetsCV handle of column containing the strings being searched. + * @param replsCV handle of column containing the strings to replace (can optionally contain a single string). + */ + private static native long stringReplaceMulti(long inputCV, long targetsCV, long replsCV) throws CudfException; + /** * Native method for replacing each regular expression pattern match with the specified * replacement string. diff --git a/java/src/main/native/src/ColumnVectorJni.cpp b/java/src/main/native/src/ColumnVectorJni.cpp index 6dc7de13560..1d22d8a5d79 100644 --- a/java/src/main/native/src/ColumnVectorJni.cpp +++ b/java/src/main/native/src/ColumnVectorJni.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,6 +32,7 @@ #include #include #include +#include #include "cudf_jni_apis.hpp" #include "dtype_utils.hpp" @@ -296,7 +297,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_concatenate(JNIEnv *env cudf::jni::native_jpointerArray{env, column_handles}.get_dereferenced(); auto const is_lists_column = columns[0].type().id() == cudf::type_id::LIST; return release_as_jlong( - is_lists_column ? cudf::lists::detail::concatenate(columns, cudf::get_default_stream()) : + is_lists_column ? cudf::lists::detail::concatenate(columns, cudf::get_default_stream(), + rmm::mr::get_current_device_resource()) : cudf::concatenate(columns)); } CATCH_STD(env, 0); diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index f2c361c5e8c..1213ab305fe 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1546,6 +1546,26 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringReplace(JNIEnv *env CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringReplaceMulti(JNIEnv *env, jclass, + jlong inputs_cv, + jlong targets_cv, + jlong repls_cv) { + JNI_NULL_CHECK(env, inputs_cv, "column is null", 0); + JNI_NULL_CHECK(env, targets_cv, "targets string column view is null", 0); + JNI_NULL_CHECK(env, repls_cv, "repls string column view is null", 0); + try { + cudf::jni::auto_set_device(env); + cudf::column_view *cv = reinterpret_cast(inputs_cv); + cudf::strings_column_view scv(*cv); + cudf::column_view *cvtargets = reinterpret_cast(targets_cv); + cudf::strings_column_view scvtargets(*cvtargets); + cudf::column_view *cvrepls = reinterpret_cast(repls_cv); + cudf::strings_column_view scvrepls(*cvrepls); + return release_as_jlong(cudf::strings::replace(scv, scvtargets, scvrepls)); + } + CATCH_STD(env, 0); +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_mapLookupForKeys(JNIEnv *env, jclass, jlong map_column_view, jlong lookup_keys) { diff --git a/java/src/main/native/src/ColumnViewJni.cu b/java/src/main/native/src/ColumnViewJni.cu index 7e0b0f9330d..9a96374688a 100644 --- a/java/src/main/native/src/ColumnViewJni.cu +++ b/java/src/main/native/src/ColumnViewJni.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -55,7 +56,7 @@ new_column_with_boolean_column_as_validity(cudf::column_view const &exemplar, auto [null_mask, null_count] = cudf::detail::valid_if( validity_begin, validity_end, [] __device__(auto optional_bool) { return optional_bool.value_or(false); }, - cudf::get_default_stream()); + cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const exemplar_without_null_mask = cudf::column_view{ exemplar.type(), exemplar.size(), @@ -152,8 +153,9 @@ void post_process_list_overlap(cudf::column_view const &lhs, cudf::column_view c }); // Create a new nullmask from the validity data. - auto [new_null_mask, new_null_count] = cudf::detail::valid_if( - validity.begin(), validity.end(), thrust::identity{}, cudf::get_default_stream()); + auto [new_null_mask, new_null_count] = + cudf::detail::valid_if(validity.begin(), validity.end(), thrust::identity{}, + cudf::get_default_stream(), rmm::mr::get_current_device_resource()); if (new_null_count > 0) { // If the `overlap_result` column is nullable, perform `bitmask_and` of its nullmask and the @@ -162,7 +164,8 @@ void post_process_list_overlap(cudf::column_view const &lhs, cudf::column_view c auto [null_mask, null_count] = cudf::detail::bitmask_and( std::vector{ overlap_cv.null_mask(), static_cast(new_null_mask.data())}, - std::vector{0, 0}, overlap_cv.size(), stream); + std::vector{0, 0}, overlap_cv.size(), stream, + rmm::mr::get_current_device_resource()); overlap_result->set_null_mask(std::move(null_mask), null_count); } else { // Just set the output nullmask as the new nullmask. @@ -187,13 +190,14 @@ std::unique_ptr lists_distinct_by_key(cudf::lists_column_view cons // Use `cudf::duplicate_keep_option::KEEP_LAST` so this will produce the desired behavior when // being called in `create_map` in spark-rapids. // Other options comparing nulls and NaNs are set as all-equal. - auto out_columns = cudf::detail::stable_distinct( - table_view{{column_view{cudf::device_span{labels}}, - child.child(0), child.child(1)}}, // input table - std::vector{0, 1}, // key columns - cudf::duplicate_keep_option::KEEP_LAST, cudf::null_equality::EQUAL, - cudf::nan_equality::ALL_EQUAL, stream) - ->release(); + auto out_columns = + cudf::detail::stable_distinct( + table_view{{column_view{cudf::device_span{labels}}, child.child(0), + child.child(1)}}, // input table + std::vector{0, 1}, // key columns + cudf::duplicate_keep_option::KEEP_LAST, cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL, stream, rmm::mr::get_current_device_resource()) + ->release(); auto const out_labels = out_columns.front()->view(); // Assemble a structs column of . @@ -211,9 +215,10 @@ std::unique_ptr lists_distinct_by_key(cudf::lists_column_view cons cudf::detail::labels_to_offsets(labels_begin, labels_begin + out_labels.size(), offsets_begin, offsets_begin + out_offsets->size(), stream); - return cudf::make_lists_column(input.size(), std::move(out_offsets), std::move(out_structs), - input.null_count(), - cudf::detail::copy_bitmask(input.parent(), stream), stream); + return cudf::make_lists_column( + input.size(), std::move(out_offsets), std::move(out_structs), input.null_count(), + cudf::detail::copy_bitmask(input.parent(), stream, rmm::mr::get_current_device_resource()), + stream); } } // namespace cudf::jni diff --git a/java/src/main/native/src/maps_column_view.cu b/java/src/main/native/src/maps_column_view.cu index 23254c0d501..1af7689f972 100644 --- a/java/src/main/native/src/maps_column_view.cu +++ b/java/src/main/native/src/maps_column_view.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -55,7 +55,8 @@ std::unique_ptr get_values_for_impl(maps_column_view const &maps_view, CUDF_EXPECTS(lookup_keys.type().id() == keys_.child().type().id(), "Lookup keys must have the same type as the keys of the map column."); auto key_indices = - lists::detail::index_of(keys_, lookup_keys, lists::duplicate_find_option::FIND_LAST, stream); + lists::detail::index_of(keys_, lookup_keys, lists::duplicate_find_option::FIND_LAST, stream, + rmm::mr::get_current_device_resource()); auto constexpr absent_offset = size_type{-1}; auto constexpr nullity_offset = std::numeric_limits::min(); thrust::replace(rmm::exec_policy(stream), key_indices->mutable_view().template begin(), @@ -86,7 +87,8 @@ std::unique_ptr contains_impl(maps_column_view const &maps_view, KeyT co auto const keys = maps_view.keys(); CUDF_EXPECTS(lookup_keys.type().id() == keys.child().type().id(), "Lookup keys must have the same type as the keys of the map column."); - auto const contains = lists::detail::contains(keys, lookup_keys, stream); + auto const contains = + lists::detail::contains(keys, lookup_keys, stream, rmm::mr::get_current_device_resource()); // Replace nulls with BOOL8{false}; auto const scalar_false = numeric_scalar{false, true, stream}; return detail::replace_nulls(contains->view(), scalar_false, stream, mr); diff --git a/java/src/main/native/src/row_conversion.cu b/java/src/main/native/src/row_conversion.cu index 5cf7658106f..84f84f8b46f 100644 --- a/java/src/main/native/src/row_conversion.cu +++ b/java/src/main/native/src/row_conversion.cu @@ -226,7 +226,8 @@ build_string_row_offsets(table_view const &tbl, size_type fixed_width_and_validi std::copy_if(offsets_iter, offsets_iter + tbl.num_columns(), std::back_inserter(offsets_iterators), [](auto const &offset_ptr) { return offset_ptr != nullptr; }); - return make_device_uvector_async(offsets_iterators, stream); + return make_device_uvector_async(offsets_iterators, stream, + rmm::mr::get_current_device_resource()); }(); auto const num_columns = static_cast(d_offsets_iterators.size()); @@ -1256,7 +1257,7 @@ static std::unique_ptr fixed_width_convert_to_rows( // Allocate and set the offsets row for the byte array std::unique_ptr offsets = - cudf::detail::sequence(num_rows + 1, zero, scalar_size_per_row, stream); + cudf::detail::sequence(num_rows + 1, zero, scalar_size_per_row, stream, mr); std::unique_ptr data = make_numeric_column(data_type(type_id::INT8), static_cast(total_allocation), @@ -1539,7 +1540,9 @@ batch_data build_batches(size_type num_rows, RowSize row_sizes, bool all_fixed_w last_row_end = row_end; } - return {std::move(batch_row_offsets), make_device_uvector_async(batch_row_boundaries, stream), + return {std::move(batch_row_offsets), + make_device_uvector_async(batch_row_boundaries, stream, + rmm::mr::get_current_device_resource()), std::move(batch_row_boundaries), std::move(row_batches)}; } @@ -1750,8 +1753,10 @@ std::vector> convert_to_rows( return table_view(cols); }; - auto dev_col_sizes = make_device_uvector_async(column_info.column_sizes, stream); - auto dev_col_starts = make_device_uvector_async(column_info.column_starts, stream); + auto dev_col_sizes = make_device_uvector_async(column_info.column_sizes, stream, + rmm::mr::get_current_device_resource()); + auto dev_col_starts = make_device_uvector_async(column_info.column_starts, stream, + rmm::mr::get_current_device_resource()); // Get the pointers to the input columnar data ready auto const data_begin = thrust::make_transform_iterator(tbl.begin(), [](auto const &c) { @@ -1764,8 +1769,10 @@ std::vector> convert_to_rows( thrust::make_transform_iterator(tbl.begin(), [](auto const &c) { return c.null_mask(); }); std::vector input_nm(nm_begin, nm_begin + tbl.num_columns()); - auto dev_input_data = make_device_uvector_async(input_data, stream); - auto dev_input_nm = make_device_uvector_async(input_nm, stream); + auto dev_input_data = + make_device_uvector_async(input_data, stream, rmm::mr::get_current_device_resource()); + auto dev_input_nm = + make_device_uvector_async(input_nm, stream, rmm::mr::get_current_device_resource()); // the first batch always exists unless we were sent an empty table auto const first_batch_size = batch_info.row_batches[0].row_count; @@ -1811,7 +1818,8 @@ std::vector> convert_to_rows( auto validity_tile_infos = detail::build_validity_tile_infos( tbl.num_columns(), num_rows, shmem_limit_per_tile, batch_info.row_batches); - auto dev_validity_tile_infos = make_device_uvector_async(validity_tile_infos, stream); + auto dev_validity_tile_infos = make_device_uvector_async(validity_tile_infos, stream, + rmm::mr::get_current_device_resource()); auto const validity_offset = column_info.column_starts.back(); @@ -1847,9 +1855,10 @@ std::vector> convert_to_rows( std::vector variable_width_input_data( variable_data_begin, variable_data_begin + variable_width_table.num_columns()); - auto dev_variable_input_data = make_device_uvector_async(variable_width_input_data, stream); - auto dev_variable_col_output_offsets = - make_device_uvector_async(column_info.variable_width_column_starts, stream); + auto dev_variable_input_data = make_device_uvector_async( + variable_width_input_data, stream, rmm::mr::get_current_device_resource()); + auto dev_variable_col_output_offsets = make_device_uvector_async( + column_info.variable_width_column_starts, stream, rmm::mr::get_current_device_resource()); for (uint i = 0; i < batch_info.row_batches.size(); i++) { auto const batch_row_offset = batch_info.batch_row_boundaries[i]; @@ -2076,8 +2085,10 @@ std::unique_ptr
convert_from_rows(lists_column_view const &input, // Ideally we would check that the offsets are all the same, etc. but for now this is probably // fine CUDF_EXPECTS(size_per_row * num_rows <= child.size(), "The layout of the data appears to be off"); - auto dev_col_starts = make_device_uvector_async(column_info.column_starts, stream); - auto dev_col_sizes = make_device_uvector_async(column_info.column_sizes, stream); + auto dev_col_starts = make_device_uvector_async(column_info.column_starts, stream, + rmm::mr::get_current_device_resource()); + auto dev_col_sizes = make_device_uvector_async(column_info.column_sizes, stream, + rmm::mr::get_current_device_resource()); // Allocate the columns we are going to write into std::vector> output_columns; @@ -2118,16 +2129,20 @@ std::unique_ptr
convert_from_rows(lists_column_view const &input, } } - auto dev_string_row_offsets = make_device_uvector_async(string_row_offsets, stream); - auto dev_string_lengths = make_device_uvector_async(string_lengths, stream); + auto dev_string_row_offsets = + make_device_uvector_async(string_row_offsets, stream, rmm::mr::get_current_device_resource()); + auto dev_string_lengths = + make_device_uvector_async(string_lengths, stream, rmm::mr::get_current_device_resource()); // build the row_batches from the passed in list column std::vector row_batches; row_batches.push_back( {detail::row_batch{child.size(), num_rows, device_uvector(0, stream)}}); - auto dev_output_data = make_device_uvector_async(output_data, stream); - auto dev_output_nm = make_device_uvector_async(output_nm, stream); + auto dev_output_data = + make_device_uvector_async(output_data, stream, rmm::mr::get_current_device_resource()); + auto dev_output_nm = + make_device_uvector_async(output_nm, stream, rmm::mr::get_current_device_resource()); // only ever get a single batch when going from rows, so boundaries are 0, num_rows constexpr auto num_batches = 2; @@ -2164,7 +2179,8 @@ std::unique_ptr
convert_from_rows(lists_column_view const &input, auto validity_tile_infos = detail::build_validity_tile_infos(schema.size(), num_rows, shmem_limit_per_tile, row_batches); - auto dev_validity_tile_infos = make_device_uvector_async(validity_tile_infos, stream); + auto dev_validity_tile_infos = make_device_uvector_async(validity_tile_infos, stream, + rmm::mr::get_current_device_resource()); dim3 const validity_blocks(validity_tile_infos.size()); @@ -2221,8 +2237,10 @@ std::unique_ptr
convert_from_rows(lists_column_view const &input, string_col_offsets.push_back(std::move(output_string_offsets)); string_data_cols.push_back(std::move(string_data)); } - auto dev_string_col_offsets = make_device_uvector_async(string_col_offset_ptrs, stream); - auto dev_string_data_cols = make_device_uvector_async(string_data_col_ptrs, stream); + auto dev_string_col_offsets = make_device_uvector_async(string_col_offset_ptrs, stream, + rmm::mr::get_current_device_resource()); + auto dev_string_data_cols = make_device_uvector_async(string_data_col_ptrs, stream, + rmm::mr::get_current_device_resource()); dim3 const string_blocks( std::min(std::max(MIN_STRING_BLOCKS, num_rows / NUM_STRING_ROWS_PER_BLOCK_FROM_ROWS), @@ -2274,8 +2292,10 @@ std::unique_ptr
convert_from_rows_fixed_width_optimized( // fine CUDF_EXPECTS(size_per_row * num_rows == child.size(), "The layout of the data appears to be off"); - auto dev_column_start = make_device_uvector_async(column_start, stream); - auto dev_column_size = make_device_uvector_async(column_size, stream); + auto dev_column_start = + make_device_uvector_async(column_start, stream, rmm::mr::get_current_device_resource()); + auto dev_column_size = + make_device_uvector_async(column_size, stream, rmm::mr::get_current_device_resource()); // Allocate the columns we are going to write into std::vector> output_columns; diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 7848807dab8..8e19c543ee5 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -5146,6 +5146,27 @@ void teststringReplaceThrowsException() { }); } + @Test + void teststringReplaceMulti() { + try (ColumnVector v = ColumnVector.fromStrings("Héllo", "thésssé", null, "", "ARé", "sssstrings"); + ColumnVector e_allParameters = ColumnVector.fromStrings("Hello", "theSse", null, "", "ARe", "SStrings"); + ColumnVector targets = ColumnVector.fromStrings("ss", "é"); + ColumnVector repls = ColumnVector.fromStrings("S", "e"); + ColumnVector replace_allParameters = v.stringReplace(targets, repls)) { + assertColumnsAreEqual(e_allParameters, replace_allParameters); + } + } + + @Test + void teststringReplaceMultiThrowsException() { + assertThrows(AssertionError.class, () -> { + try (ColumnVector testStrings = ColumnVector.fromStrings("Héllo", "thésé", null, "", "ARé", "strings"); + ColumnVector targets = ColumnVector.fromInts(0, 1); + ColumnVector repls = null; + ColumnVector result = testStrings.stringReplace(targets,repls)){} + }); + } + @Test void testReplaceRegex() { try (ColumnVector v = ColumnVector.fromStrings("title and Title with title", "nothing", null, "Title"); diff --git a/python/cudf/MANIFEST.in b/python/cudf/MANIFEST.in deleted file mode 100644 index 4d3155158f8..00000000000 --- a/python/cudf/MANIFEST.in +++ /dev/null @@ -1,16 +0,0 @@ -# Cython files -recursive-include cudf *.pxd -recursive-include cudf *.pyx - -# Typing files -recursive-include cudf *.pyi - -# C++ files -recursive-include cudf *.hpp -recursive-include udf_cpp *.hpp -recursive-include udf_cpp *.cuh - -# Build files. Don't use a recursive include on '.' in case the repo is dirty -include . CMakeLists.txt -recursive-include cudf CMakeLists.txt -recursive-include cmake * diff --git a/python/cudf/benchmarks/API/bench_dataframe.py b/python/cudf/benchmarks/API/bench_dataframe.py index 42bfa854396..28777b23583 100644 --- a/python/cudf/benchmarks/API/bench_dataframe.py +++ b/python/cudf/benchmarks/API/bench_dataframe.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. """Benchmarks of DataFrame methods.""" @@ -104,6 +104,30 @@ def bench_groupby_agg(benchmark, dataframe, agg, num_key_cols, as_index, sort): benchmark(dataframe.groupby(by=by, as_index=as_index, sort=sort).agg, agg) +@benchmark_with_object(cls="dataframe", dtype="int", nulls=False, cols=6) +@pytest.mark.parametrize( + "num_key_cols", + [2, 3, 4], +) +@pytest.mark.parametrize("use_frac", [True, False]) +@pytest.mark.parametrize("replace", [True, False]) +@pytest.mark.parametrize("target_sample_frac", [0.1, 0.5, 1]) +def bench_groupby_sample( + benchmark, dataframe, num_key_cols, use_frac, replace, target_sample_frac +): + grouper = dataframe.groupby(by=list(dataframe.columns[:num_key_cols])) + if use_frac: + kwargs = {"frac": target_sample_frac, "replace": replace} + else: + minsize = grouper.size().min() + target_size = numpy.round( + target_sample_frac * minsize, decimals=0 + ).astype(int) + kwargs = {"n": target_size, "replace": replace} + + benchmark(grouper.sample, **kwargs) + + @benchmark_with_object(cls="dataframe", dtype="int") @pytest.mark.parametrize("num_cols_to_sort", [1]) def bench_sort_values(benchmark, dataframe, num_cols_to_sort): diff --git a/python/cudf/cudf/_lib/cpp/io/types.pxd b/python/cudf/cudf/_lib/cpp/io/types.pxd index 21809ef7bd9..b2b0a77c45f 100644 --- a/python/cudf/cudf/_lib/cpp/io/types.pxd +++ b/python/cudf/cudf/_lib/cpp/io/types.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from libc.stdint cimport uint8_t from libcpp cimport bool @@ -74,6 +74,7 @@ cdef extern from "cudf/io/types.hpp" \ column_in_metadata& set_decimal_precision(uint8_t precision) column_in_metadata& child(size_type i) column_in_metadata& set_output_as_binary(bool binary) + string get_name() cdef cppclass table_input_metadata: table_input_metadata() except + diff --git a/python/cudf/cudf/_lib/cpp/sorting.pxd b/python/cudf/cudf/_lib/cpp/sorting.pxd index c6c42c327ac..b210ddf81dd 100644 --- a/python/cudf/cudf/_lib/cpp/sorting.pxd +++ b/python/cudf/cudf/_lib/cpp/sorting.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from libcpp cimport bool from libcpp.memory cimport unique_ptr @@ -38,3 +38,10 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: const table_view& table, vector[libcudf_types.order] column_order, vector[libcudf_types.null_order] null_precedence) except + + + cdef unique_ptr[table] segmented_sort_by_key( + const table_view& values, + const table_view& keys, + const column_view& segment_offsets, + vector[libcudf_types.order] column_order, + vector[libcudf_types.null_order] null_precedence) except + diff --git a/python/cudf/cudf/_lib/cpp/types.pxd b/python/cudf/cudf/_lib/cpp/types.pxd index b1a257feedf..e4106ffb99d 100644 --- a/python/cudf/cudf/_lib/cpp/types.pxd +++ b/python/cudf/cudf/_lib/cpp/types.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from libc.stdint cimport int32_t, uint32_t @@ -47,8 +47,10 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil: UNEQUAL "cudf::null_equality::UNEQUAL" ctypedef enum nan_equality "cudf::nan_equality": + # These names differ from the C++ names due to Cython warnings if + # "UNEQUAL" is declared by both null_equality and nan_equality. ALL_EQUAL "cudf::nan_equality::ALL_EQUAL" - UNEQUAL "cudf::nan_equality::UNEQUAL" + NANS_UNEQUAL "cudf::nan_equality::UNEQUAL" ctypedef enum type_id "cudf::type_id": EMPTY "cudf::type_id::EMPTY" diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx index 92840561563..c5d8c48fa2c 100644 --- a/python/cudf/cudf/_lib/interop.pyx +++ b/python/cudf/cudf/_lib/interop.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from cpython cimport pycapsule from libcpp.memory cimport shared_ptr, unique_ptr @@ -70,7 +70,7 @@ def to_dlpack(list source_columns): ) -cdef void dlmanaged_tensor_pycapsule_deleter(object pycap_obj): +cdef void dlmanaged_tensor_pycapsule_deleter(object pycap_obj) noexcept: cdef DLManagedTensor* dlpack_tensor = 0 try: dlpack_tensor = pycapsule.PyCapsule_GetPointer( diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx index 47e9dccc8e6..199641fd2ce 100644 --- a/python/cudf/cudf/_lib/lists.pyx +++ b/python/cudf/cudf/_lib/lists.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. from cudf.core.buffer import acquire_spill_lock @@ -84,7 +84,7 @@ def distinct(Column col, bool nulls_equal, bool nans_all_equal): null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL ) cdef nan_equality c_nans_equal = ( - nan_equality.ALL_EQUAL if nans_all_equal else nan_equality.UNEQUAL + nan_equality.ALL_EQUAL if nans_all_equal else nan_equality.NANS_UNEQUAL ) cdef unique_ptr[column] c_result diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index 464d9243408..923f5c4089f 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -321,7 +321,8 @@ def write_parquet( object row_group_size_rows=None, object max_page_size_bytes=None, object max_page_size_rows=None, - object partitions_info=None + object partitions_info=None, + object force_nullable_schema=False, ): """ Cython function to call into libcudf API, see `write_parquet`. @@ -364,7 +365,9 @@ def write_parquet( tbl_meta.get().column_metadata[i].set_name(name.encode()) _set_col_metadata( - table[name]._column, tbl_meta.get().column_metadata[i] + table[name]._column, + tbl_meta.get().column_metadata[i], + force_nullable_schema ) cdef map[string, string] tmp_user_data @@ -597,7 +600,8 @@ cdef class ParquetWriter: for i, name in enumerate(table._column_names, num_index_cols_meta): self.tbl_meta.get().column_metadata[i].set_name(name.encode()) _set_col_metadata( - table[name]._column, self.tbl_meta.get().column_metadata[i] + table[name]._column, + self.tbl_meta.get().column_metadata[i], ) index = ( @@ -675,15 +679,32 @@ cdef cudf_io_types.compression_type _get_comp_type(object compression): raise ValueError("Unsupported `compression` type") -cdef _set_col_metadata(Column col, column_in_metadata& col_meta): +cdef _set_col_metadata( + Column col, + column_in_metadata& col_meta, + bool force_nullable_schema=False, +): + if force_nullable_schema: + # Only set nullability if `force_nullable_schema` + # is true. + col_meta.set_nullability(True) + if is_struct_dtype(col): for i, (child_col, name) in enumerate( zip(col.children, list(col.dtype.fields)) ): col_meta.child(i).set_name(name.encode()) - _set_col_metadata(child_col, col_meta.child(i)) + _set_col_metadata( + child_col, + col_meta.child(i), + force_nullable_schema + ) elif is_list_dtype(col): - _set_col_metadata(col.children[1], col_meta.child(1)) + _set_col_metadata( + col.children[1], + col_meta.child(1), + force_nullable_schema + ) else: if is_decimal_dtype(col): col_meta.set_decimal_precision(col.dtype.precision) diff --git a/python/cudf/cudf/_lib/sort.pyx b/python/cudf/cudf/_lib/sort.pyx index 3b96cc618dd..3c3f8cabda6 100644 --- a/python/cudf/cudf/_lib/sort.pyx +++ b/python/cudf/cudf/_lib/sort.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from cudf.core.buffer import acquire_spill_lock @@ -18,11 +18,13 @@ from cudf._lib.cpp.search cimport lower_bound, upper_bound from cudf._lib.cpp.sorting cimport ( is_sorted as cpp_is_sorted, rank, + segmented_sort_by_key as cpp_segmented_sort_by_key, sorted_order, ) +from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport null_order, null_policy, order -from cudf._lib.utils cimport table_view_from_columns +from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns @acquire_spill_lock() @@ -143,6 +145,67 @@ def order_by(list columns_from_table, object ascending, str na_position): return Column.from_unique_ptr(move(c_result)) +def segmented_sort_by_key( + list values, + list keys, + Column segment_offsets, + list column_order=None, + list null_precedence=None, +): + """ + Sort segments of a table by given keys + + Parameters + ---------- + values : list[Column] + Columns of the table which will be sorted + keys : list[Column] + Columns making up the sort key + offsets : Column + Segment offsets + column_order : list[bool], optional + Sequence of boolean values which correspond to each column in + keys providing the sort order (default all True). + With True <=> ascending; False <=> descending. + null_precedence : list[str], optional + Sequence of "first" or "last" values (default "first") + indicating the position of null values when sorting the keys. + + Returns + ------- + list[Column] + list of value columns sorted by keys + """ + cdef table_view values_view = table_view_from_columns(values) + cdef table_view keys_view = table_view_from_columns(keys) + cdef column_view offsets_view = segment_offsets.view() + cdef vector[order] c_column_order + cdef vector[null_order] c_null_precedence + cdef unique_ptr[table] result + ncol = len(values) + column_order = column_order or [True] * ncol + null_precedence = null_precedence or ["first"] * ncol + for asc, null in zip(column_order, null_precedence): + c_column_order.push_back(order.ASCENDING if asc else order.DESCENDING) + if asc ^ (null == "first"): + c_null_precedence.push_back(null_order.AFTER) + elif asc ^ (null == "last"): + c_null_precedence.push_back(null_order.BEFORE) + else: + raise ValueError(f"Invalid null precedence {null}") + with nogil: + result = move( + cpp_segmented_sort_by_key( + values_view, + keys_view, + offsets_view, + c_column_order, + c_null_precedence, + ) + ) + return columns_from_unique_ptr(move(result)) + + @acquire_spill_lock() def digitize(list source_columns, list bins, bool right=False): """ diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py index 6590cf2940d..ccf730c91fb 100644 --- a/python/cudf/cudf/core/cut.py +++ b/python/cudf/cudf/core/cut.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. from collections import abc @@ -279,12 +279,8 @@ def cut( if labels is not None: if labels is not ordered and len(set(labels)) != len(labels): # when we have duplicate labels and ordered is False, we - # should allow duplicate categories. The categories are - # returned in order - new_data = [interval_labels[i][0] for i in index_labels.values] - return cudf.CategoricalIndex( - new_data, categories=sorted(set(labels)), ordered=False - ) + # should allow duplicate categories. + return interval_labels[index_labels] col = build_categorical_column( categories=interval_labels, diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index e50c324a8f4..9d14d4bde7f 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -259,9 +259,12 @@ def _getitem_tuple_arg(self, arg): else: if isinstance(arg, tuple): - return columns_df.index._get_row_major(columns_df, arg[0]) + row_arg = arg[0] + elif is_scalar(arg): + row_arg = (arg,) else: - return columns_df.index._get_row_major(columns_df, arg) + row_arg = arg + return columns_df.index._get_row_major(columns_df, row_arg) else: if isinstance(arg[0], slice): out = _get_label_range_or_mask( @@ -904,14 +907,24 @@ def _init_from_dict_like( if index is None: num_rows = 0 if data: - col_name = next(iter(data)) - if is_scalar(data[col_name]): - num_rows = num_rows or 1 - else: - data[col_name] = column.as_column( - data[col_name], nan_as_null=nan_as_null + keys, values, lengths = zip( + *( + (k, v, 1) + if is_scalar(v) + else ( + k, + vc := as_column(v, nan_as_null=nan_as_null), + len(vc), + ) + for k, v in data.items() ) - num_rows = len(data[col_name]) + ) + data = dict(zip(keys, values)) + try: + (num_rows,) = (set(lengths) - {1}) or {1} + except ValueError: + raise ValueError("All arrays must be the same length") + self._index = RangeIndex(0, num_rows) else: self._index = as_index(index) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index ea6a6de0b2b..d8b9ee4d006 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -485,9 +485,20 @@ def get_column_values_na(col): ) if dtype is None: - dtype = find_common_type( - [col.dtype for col in self._data.values()] - ) + dtypes = [col.dtype for col in self._data.values()] + for dtype in dtypes: + if isinstance( + dtype, + ( + cudf.ListDtype, + cudf.core.dtypes.DecimalDtype, + cudf.StructDtype, + ), + ): + raise NotImplementedError( + f"{dtype} cannot be exposed as a cupy array" + ) + dtype = find_common_type(dtypes) matrix = make_empty_matrix( shape=(len(self), ncol), dtype=dtype, order="F" diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 8ff3e17d6ff..cb4c0f6b48b 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -6,7 +6,7 @@ import warnings from collections import abc from functools import cached_property -from typing import Any, Iterable, List, Tuple, Union +from typing import Any, Iterable, List, Optional, Tuple, Union import cupy as cp import numpy as np @@ -16,6 +16,8 @@ from cudf._lib import groupby as libgroupby from cudf._lib.null_mask import bitmask_or from cudf._lib.reshape import interleave_columns +from cudf._lib.sort import segmented_sort_by_key +from cudf._lib.types import size_type_dtype from cudf._typing import AggType, DataFrameOrSeries, MultiColumnAggType from cudf.api.types import is_list_like from cudf.core.abc import Serializable @@ -608,6 +610,177 @@ def _scan(self, op: str, *args, **kwargs): aggregate = agg + def _head_tail(self, n, *, take_head: bool, preserve_order: bool): + """Return the head or tail of each group + + Parameters + ---------- + n + Number of entries to include (if negative, number of + entries to exclude) + take_head + Do we want the head or the tail of the group + preserve_order + If True, return the n rows from each group in original + dataframe order (this mimics pandas behavior though is + more expensive). + + Returns + ------- + New DataFrame or Series + + Notes + ----- + Unlike pandas, this returns an object in group order, not + original order, unless ``preserve_order`` is ``True``. + """ + # A more memory-efficient implementation would merge the take + # into the grouping, but that probably requires a new + # aggregation scheme in libcudf. This is probably "fast + # enough" for most reasonable input sizes. + _, offsets, _, group_values = self._grouped() + group_offsets = np.asarray(offsets, dtype=size_type_dtype) + size_per_group = np.diff(group_offsets) + # "Out of bounds" n for the group size either means no entries + # (negative) or all the entries (positive) + if n < 0: + size_per_group = np.maximum( + size_per_group + n, 0, out=size_per_group + ) + else: + size_per_group = np.minimum(size_per_group, n, out=size_per_group) + if take_head: + group_offsets = group_offsets[:-1] + else: + group_offsets = group_offsets[1:] - size_per_group + to_take = np.arange(size_per_group.sum(), dtype=size_type_dtype) + fixup = np.empty_like(size_per_group) + fixup[0] = 0 + np.cumsum(size_per_group[:-1], out=fixup[1:]) + to_take += np.repeat(group_offsets - fixup, size_per_group) + to_take = as_column(to_take) + result = group_values.iloc[to_take] + if preserve_order: + # Can't use _mimic_pandas_order because we need to + # subsample the gather map from the full input ordering, + # rather than permuting the gather map of the output. + _, (ordering,), _ = self._groupby.groups( + [arange(0, self.obj._data.nrows)] + ) + # Invert permutation from original order to groups on the + # subset of entries we want. + gather_map = ordering.take(to_take).argsort() + return result.take(gather_map) + else: + return result + + @_cudf_nvtx_annotate + def head(self, n: int = 5, *, preserve_order: bool = True): + """Return first n rows of each group + + Parameters + ---------- + n + If positive: number of entries to include from start of group + If negative: number of entries to exclude from end of group + + preserve_order + If True (default), return the n rows from each group in + original dataframe order (this mimics pandas behavior + though is more expensive). If you don't need rows in + original dataframe order you will see a performance + improvement by setting ``preserve_order=False``. In both + cases, the original index is preserved, so ``.loc``-based + indexing will work identically. + + Returns + ------- + Series or DataFrame + Subset of the original grouped object as determined by n + + See Also + -------- + .tail + + Examples + -------- + >>> df = cudf.DataFrame( + ... { + ... "a": [1, 0, 1, 2, 2, 1, 3, 2, 3, 3, 3], + ... "b": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + ... } + ... ) + >>> df.groupby("a").head(1) + a b + 0 1 0 + 1 0 1 + 3 2 3 + 6 3 6 + >>> df.groupby("a").head(-2) + a b + 0 1 0 + 3 2 3 + 6 3 6 + 8 3 8 + """ + return self._head_tail( + n, take_head=True, preserve_order=preserve_order + ) + + @_cudf_nvtx_annotate + def tail(self, n: int = 5, *, preserve_order: bool = True): + """Return last n rows of each group + + Parameters + ---------- + n + If positive: number of entries to include from end of group + If negative: number of entries to exclude from start of group + + preserve_order + If True (default), return the n rows from each group in + original dataframe order (this mimics pandas behavior + though is more expensive). If you don't need rows in + original dataframe order you will see a performance + improvement by setting ``preserve_order=False``. In both + cases, the original index is preserved, so ``.loc``-based + indexing will work identically. + + Returns + ------- + Series or DataFrame + Subset of the original grouped object as determined by n + + + See Also + -------- + .head + + Examples + -------- + >>> df = cudf.DataFrame( + ... { + ... "a": [1, 0, 1, 2, 2, 1, 3, 2, 3, 3, 3], + ... "b": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + ... } + ... ) + >>> df.groupby("a").tail(1) + a b + 1 0 1 + 5 1 5 + 7 2 7 + 10 3 10 + >>> df.groupby("a").tail(-2) + a b + 5 1 5 + 7 2 7 + 9 3 9 + 10 3 10 + """ + return self._head_tail( + n, take_head=False, preserve_order=preserve_order + ) + def nth(self, n): """ Return the nth row from each group. @@ -699,6 +872,134 @@ def ngroup(self, ascending=True): group_ids._index = index return self._broadcast(group_ids) + def sample( + self, + n: Optional[int] = None, + frac: Optional[float] = None, + replace: bool = False, + weights: Union[abc.Sequence, "cudf.Series", None] = None, + random_state: Union[np.random.RandomState, int, None] = None, + ): + """Return a random sample of items in each group. + + Parameters + ---------- + n + Number of items to return for each group, if sampling + without replacement must be at most the size of the + smallest group. Cannot be used with frac. Default is + ``n=1`` if frac is None. + frac + Fraction of items to return. Cannot be used with n. + replace + Should sampling occur with or without replacement? + weights + Sampling probability for each element. Must be the same + length as the grouped frame. Not currently supported. + random_state + Seed for random number generation. + + Returns + ------- + New dataframe or series with samples of appropriate size drawn + from each group. + + """ + if weights is not None: + # To implement this case again needs different algorithms + # in both cases. + # + # Without replacement, use the weighted reservoir sampling + # approach of Efraimidas and Spirakis (2006) + # https://doi.org/10.1016/j.ipl.2005.11.003, essentially, + # do a segmented argsort sorting on weight-scaled + # logarithmic deviates. See + # https://timvieira.github.io/blog/post/ + # 2019/09/16/algorithms-for-sampling-without-replacement/ + # + # With replacement is trickier, one might be able to use + # the alias method, otherwise we're back to bucketed + # rejection sampling. + raise NotImplementedError("Sampling with weights is not supported") + if frac is not None and n is not None: + raise ValueError("Cannot supply both of frac and n") + elif n is None and frac is None: + n = 1 + elif frac is not None and not (0 <= frac <= 1): + raise ValueError( + "Sampling with fraction must provide fraction in " + f"[0, 1], got {frac=}" + ) + # TODO: handle random states properly. + if random_state is not None and not isinstance(random_state, int): + raise NotImplementedError( + "Only integer seeds are supported for random_state " + "in this case" + ) + # Get the groups + # TODO: convince Cython to convert the std::vector offsets + # into a numpy array directly, rather than a list. + # TODO: this uses the sort-based groupby, could one use hash-based? + _, offsets, _, group_values = self._grouped() + group_offsets = np.asarray(offsets, dtype=size_type_dtype) + size_per_group = np.diff(group_offsets) + if n is not None: + samples_per_group = np.broadcast_to( + size_type_dtype.type(n), size_per_group.shape + ) + if not replace and (minsize := size_per_group.min()) < n: + raise ValueError( + f"Cannot sample {n=} without replacement, " + f"smallest group is {minsize}" + ) + else: + # Pandas uses round-to-nearest, ties to even to + # pick sample sizes for the fractional case (unlike IEEE + # which is round-to-nearest, ties to sgn(x) * inf). + samples_per_group = np.round( + size_per_group * frac, decimals=0 + ).astype(size_type_dtype) + if replace: + # We would prefer to use cupy here, but their rng.integers + # interface doesn't take array-based low and high + # arguments. + low = 0 + high = np.repeat(size_per_group, samples_per_group) + rng = np.random.default_rng(seed=random_state) + indices = rng.integers(low, high, dtype=size_type_dtype) + indices += np.repeat(group_offsets[:-1], samples_per_group) + else: + # Approach: do a segmented argsort of the index array and take + # the first samples_per_group entries from sorted array. + # We will shuffle the group indices and then pick them out + # from the grouped dataframe index. + nrows = len(group_values) + indices = cp.arange(nrows, dtype=size_type_dtype) + if len(size_per_group) < 500: + # Empirically shuffling with cupy is faster at this scale + rs = cp.random.get_random_state() + rs.seed(seed=random_state) + for off, size in zip(group_offsets, size_per_group): + rs.shuffle(indices[off : off + size]) + else: + rng = cp.random.default_rng(seed=random_state) + (indices,) = segmented_sort_by_key( + [as_column(indices)], + [as_column(rng.random(size=nrows))], + as_column(group_offsets), + [], + [], + ) + indices = cp.asarray(indices.data_array_view(mode="read")) + # Which indices are we going to want? + want = np.arange(samples_per_group.sum(), dtype=size_type_dtype) + scan = np.empty_like(samples_per_group) + scan[0] = 0 + np.cumsum(samples_per_group[:-1], out=scan[1:]) + want += np.repeat(group_offsets[:-1] - scan, samples_per_group) + indices = indices[want] + return group_values.iloc[indices] + def serialize(self): header = {} frames = [] @@ -977,13 +1278,14 @@ def mult(df): ``engine='jit'`` may be used to accelerate certain functions, initially those that contain reductions and arithmetic operations between results of those reductions: + >>> import cudf >>> df = cudf.DataFrame({'a':[1,1,2,2,3,3], 'b':[1,2,3,4,5,6]}) >>> df.groupby('a').apply( - ... lambda group: group['b'].max() - group['b'].min(), - ... engine='jit' + ... lambda group: group['b'].max() - group['b'].min(), + ... engine='jit' ... ) - a None + a None 0 1 1 1 2 1 2 3 1 @@ -2017,7 +2319,14 @@ def _handle_mapping(self, by): self._handle_series(by) def _handle_label(self, by): - self._key_columns.append(self._obj._data[by]) + try: + self._key_columns.append(self._obj._data[by]) + except KeyError as e: + # `by` can be index name(label) too. + if by in self._obj._index.names: + self._key_columns.append(self._obj._index._data[by]) + else: + raise e self.names.append(by) self._named_columns.append(by) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 413e005b798..d1408fec160 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1403,7 +1403,7 @@ def __repr__(self): @_cudf_nvtx_annotate def __getitem__(self, index): res = self._get_elements_from_column(index) - if not isinstance(index, int): + if isinstance(res, ColumnBase): res = as_index(res) res.name = self.name return res diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 79927c60a85..8ec08b7c92a 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -250,7 +250,11 @@ def __getitem__(self, arg: Any) -> Union[ScalarLike, DataFrameOrSeries]: if isinstance(self._frame.index, cudf.MultiIndex) and not isinstance( arg, cudf.MultiIndex ): - result = self._frame.index._get_row_major(self._frame, arg) + if is_scalar(arg): + row_arg = (arg,) + else: + row_arg = arg + result = self._frame.index._get_row_major(self._frame, row_arg) if ( isinstance(arg, tuple) and len(arg) == self._frame._index.nlevels diff --git a/python/cudf/cudf/core/udf/groupby_utils.py b/python/cudf/cudf/core/udf/groupby_utils.py index dc31cf43292..ebf8c677e55 100644 --- a/python/cudf/cudf/core/udf/groupby_utils.py +++ b/python/cudf/cudf/core/udf/groupby_utils.py @@ -19,11 +19,13 @@ groupby_apply_kernel_template, ) from cudf.core.udf.utils import ( + _generate_cache_key, _get_extensionty_size, _get_kernel, _get_udf_return_type, _supported_cols_from_frame, _supported_dtypes_from_frame, + precompiled, ) from cudf.utils.utils import _cudf_nvtx_annotate @@ -147,12 +149,19 @@ def jit_groupby_apply(offsets, grouped_values, function, *args): offsets = cp.asarray(offsets) ngroups = len(offsets) - 1 - kernel, return_type = _get_groupby_apply_kernel( - grouped_values, function, args + cache_key = _generate_cache_key( + grouped_values, function, suffix="__GROUPBY_APPLY_UDF" ) - return_type = numpy_support.as_dtype(return_type) + if cache_key not in precompiled: + precompiled[cache_key] = _get_groupby_apply_kernel( + grouped_values, function, args + ) + kernel, return_type = precompiled[cache_key] + + return_type = numpy_support.as_dtype(return_type) output = cudf.core.column.column_empty(ngroups, dtype=return_type) + launch_args = [ offsets, output, diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py index edc1a16353f..ed0c3332499 100644 --- a/python/cudf/cudf/core/udf/utils.py +++ b/python/cudf/cudf/core/udf/utils.py @@ -245,7 +245,7 @@ def _mask_get(mask, pos): return (mask[pos // MASK_BITSIZE] >> (pos % MASK_BITSIZE)) & 1 -def _generate_cache_key(frame, func: Callable): +def _generate_cache_key(frame, func: Callable, suffix="__APPLY_UDF"): """Create a cache key that uniquely identifies a compilation. A new compilation is needed any time any of the following things change: @@ -259,6 +259,7 @@ def _generate_cache_key(frame, func: Callable): ), *(col.mask is None for col in frame._data.values()), *frame._data.keys(), + suffix, ) diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py index cac4774400a..8a92ea86d57 100644 --- a/python/cudf/cudf/core/window/rolling.py +++ b/python/cudf/cudf/core/window/rolling.py @@ -557,5 +557,6 @@ def _apply_agg(self, agg_name): ) ) - result = super()._apply_agg(agg_name).set_index(index) + result = super()._apply_agg(agg_name) + result.index = index return result diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index ca4fb103ee8..3e1a4b1f024 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -15,7 +15,7 @@ import cudf from cudf._lib import parquet as libparquet from cudf.api.types import is_list_like -from cudf.core.column import as_column, build_categorical_column +from cudf.core.column import build_categorical_column, column_empty, full from cudf.utils import ioutils from cudf.utils.utils import _cudf_nvtx_annotate @@ -60,6 +60,7 @@ def _write_parquet( max_page_size_rows=None, partitions_info=None, storage_options=None, + force_nullable_schema=False, ): if is_list_like(paths) and len(paths) > 1: if partitions_info is None: @@ -89,6 +90,7 @@ def _write_parquet( "max_page_size_bytes": max_page_size_bytes, "max_page_size_rows": max_page_size_rows, "partitions_info": partitions_info, + "force_nullable_schema": force_nullable_schema, } if all(ioutils.is_fsspec_open_file(buf) for buf in paths_or_bufs): with ExitStack() as stack: @@ -126,6 +128,7 @@ def write_to_dataset( max_page_size_bytes=None, max_page_size_rows=None, storage_options=None, + force_nullable_schema=False, ): """Wraps `to_parquet` to write partitioned Parquet datasets. For each combination of partition group and value, @@ -179,7 +182,6 @@ def write_to_dataset( max_page_size_rows: integer or None, default None Maximum number of rows of each page of the output. If None, 20000 will be used. - storage_options : dict, optional, default None Extra options that make sense for a particular storage connection, e.g. host, port, username, password, etc. For HTTP(S) URLs the @@ -187,6 +189,10 @@ def write_to_dataset( header options. For other URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more details. + force_nullable_schema : bool, default False. + If True, writes all columns as `null` in schema. + If False, columns are written as `null` if they contain null values, + otherwise as `not null`. """ fs = ioutils._ensure_filesystem(fs, root_path, storage_options) @@ -224,6 +230,7 @@ def write_to_dataset( row_group_size_rows=row_group_size_rows, max_page_size_bytes=max_page_size_bytes, max_page_size_rows=max_page_size_rows, + force_nullable_schema=force_nullable_schema, ) else: @@ -244,6 +251,7 @@ def write_to_dataset( row_group_size_rows=row_group_size_rows, max_page_size_bytes=max_page_size_bytes, max_page_size_rows=max_page_size_rows, + force_nullable_schema=force_nullable_schema, ) return metadata @@ -609,11 +617,12 @@ def _parquet_to_frame( ) # Add partition columns to the last DataFrame for (name, value) in part_key: + _len = len(dfs[-1]) if partition_categories and name in partition_categories: # Build the categorical column from `codes` - codes = as_column( - partition_categories[name].index(value), - length=len(dfs[-1]), + codes = full( + size=_len, + fill_value=partition_categories[name].index(value), ) dfs[-1][name] = build_categorical_column( categories=partition_categories[name], @@ -625,14 +634,23 @@ def _parquet_to_frame( else: # Not building categorical columns, so # `value` is already what we want - if partition_meta is not None: - dfs[-1][name] = as_column( - value, - length=len(dfs[-1]), - dtype=partition_meta[name].dtype, + _dtype = ( + partition_meta[name].dtype + if partition_meta is not None + else None + ) + if pd.isna(value): + dfs[-1][name] = column_empty( + row_count=_len, + dtype=_dtype, + masked=True, ) else: - dfs[-1][name] = as_column(value, length=len(dfs[-1])) + dfs[-1][name] = full( + size=_len, + fill_value=value, + dtype=_dtype, + ) # Concatenate dfs and return. # Assume we can ignore the index if it has no name. @@ -702,6 +720,7 @@ def to_parquet( max_page_size_rows=None, storage_options=None, return_metadata=False, + force_nullable_schema=False, *args, **kwargs, ): @@ -750,6 +769,7 @@ def to_parquet( max_page_size_rows=max_page_size_rows, return_metadata=return_metadata, storage_options=storage_options, + force_nullable_schema=force_nullable_schema, ) partition_info = ( @@ -774,6 +794,7 @@ def to_parquet( max_page_size_rows=max_page_size_rows, partitions_info=partition_info, storage_options=storage_options, + force_nullable_schema=force_nullable_schema, ) else: @@ -886,8 +907,11 @@ def _get_groups_and_offsets( grouped_df.reset_index(drop=True, inplace=True) grouped_df.drop(columns=partition_cols, inplace=True) # Copy the entire keys df in one operation rather than using iloc - part_names = part_keys.to_pandas().unique().to_frame(index=False) - + part_names = ( + part_keys.take(part_offsets[:-1]) + .to_pandas(nullable=True) + .to_frame(index=False) + ) return part_names, grouped_df, part_offsets diff --git a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py index 9eb01ae31b4..ea23587ea70 100644 --- a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py +++ b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime import io +import pathlib +from typing import Optional import fastavro import pytest @@ -22,8 +25,7 @@ from cudf.testing.dataset_generator import rand_dataframe -def cudf_from_avro_util(schema, records): - +def cudf_from_avro_util(schema: dict, records: list) -> cudf.DataFrame: schema = [] if schema is None else fastavro.parse_schema(schema) buffer = io.BytesIO() fastavro.writer(buffer, schema, records) @@ -244,3 +246,201 @@ def test_avro_compression(rows, codec): got_df = cudf.read_avro(buffer) assert_eq(expected_df, got_df) + + +avro_logical_type_params = [ + # (avro logical type, avro primitive type, cudf expected dtype) + ("date", "int", "datetime64[s]"), +] + + +@pytest.mark.parametrize( + "logical_type, primitive_type, expected_dtype", avro_logical_type_params +) +@pytest.mark.parametrize("namespace", [None, "root_ns"]) +@pytest.mark.parametrize("nullable", [True, False]) +@pytest.mark.parametrize("prepend_null", [True, False]) +def test_can_detect_dtypes_from_avro_logical_type( + logical_type, + primitive_type, + expected_dtype, + namespace, + nullable, + prepend_null, +): + avro_type = [{"logicalType": logical_type, "type": primitive_type}] + if nullable: + if prepend_null: + avro_type.insert(0, "null") + else: + avro_type.append("null") + + schema = fastavro.parse_schema( + { + "type": "record", + "name": "test", + "namespace": namespace, + "fields": [{"name": "prop", "type": avro_type}], + } + ) + + actual = cudf_from_avro_util(schema, []) + + expected = cudf.DataFrame( + {"prop": cudf.Series(None, None, expected_dtype)} + ) + + assert_eq(expected, actual) + + +def get_days_from_epoch(date: Optional[datetime.date]) -> Optional[int]: + if date is None: + return None + return (date - datetime.date(1970, 1, 1)).days + + +@pytest.mark.parametrize("namespace", [None, "root_ns"]) +@pytest.mark.parametrize("nullable", [True, False]) +@pytest.mark.parametrize("prepend_null", [True, False]) +def test_can_parse_avro_date_logical_type(namespace, nullable, prepend_null): + + avro_type = {"logicalType": "date", "type": "int"} + if nullable: + if prepend_null: + avro_type = ["null", avro_type] + else: + avro_type = [avro_type, "null"] + + schema_dict = { + "type": "record", + "name": "test", + "fields": [ + {"name": "o_date", "type": avro_type}, + ], + } + + if namespace: + schema_dict["namespace"] = namespace + + schema = fastavro.parse_schema(schema_dict) + + # Insert some None values in no particular order. These will get converted + # into avro "nulls" by the fastavro writer (or filtered out if we're not + # nullable). The first and last dates are epoch min/max values, the rest + # are arbitrarily chosen. + dates = [ + None, + datetime.date(1970, 1, 1), + datetime.date(1970, 1, 2), + datetime.date(1981, 10, 25), + None, + None, + datetime.date(2012, 5, 18), + None, + datetime.date(2019, 9, 3), + None, + datetime.date(9999, 12, 31), + ] + + if not nullable: + dates = [date for date in dates if date is not None] + + days_from_epoch = [get_days_from_epoch(date) for date in dates] + + records = [{"o_date": day} for day in days_from_epoch] + + actual = cudf_from_avro_util(schema, records) + + expected = cudf.DataFrame( + {"o_date": cudf.Series(dates, dtype="datetime64[s]")} + ) + + assert_eq(expected, actual) + + +def test_alltypes_plain_avro(): + # During development of the logical type support, the Java avro tests were + # triggering CUDA kernel crashes (null pointer dereferences). We were able + # to replicate the behavior in a C++ test case, and then subsequently came + # up with this Python unit test to also trigger the problematic code path. + # + # So, unlike the other tests, this test is inherently reactive in nature, + # added simply to verify we fixed the problematic code path that was + # causing CUDA kernel crashes. + # + # See https://github.com/rapidsai/cudf/pull/12788#issuecomment-1468822875 + # for more information. + relpath = "../../../../java/src/test/resources/alltypes_plain.avro" + path = pathlib.Path(__file__).parent.joinpath(relpath).resolve() + assert path.is_file(), path + path = str(path) + + with open(path, "rb") as f: + reader = fastavro.reader(f) + records = [record for record in reader] + + # For reference: + # + # >>> from pprint import pprint + # >>> pprint(reader.writer_schema) + # {'fields': [{'name': 'id', 'type': ['int', 'null']}, + # {'name': 'bool_col', 'type': ['boolean', 'null']}, + # {'name': 'tinyint_col', 'type': ['int', 'null']}, + # {'name': 'smallint_col', 'type': ['int', 'null']}, + # {'name': 'int_col', 'type': ['int', 'null']}, + # {'name': 'bigint_col', 'type': ['long', 'null']}, + # {'name': 'float_col', 'type': ['float', 'null']}, + # {'name': 'double_col', 'type': ['double', 'null']}, + # {'name': 'date_string_col', 'type': ['bytes', 'null']}, + # {'name': 'string_col', 'type': ['bytes', 'null']}, + # {'name': 'timestamp_col', + # 'type': [{'logicalType': 'timestamp-micros', + # 'type': 'long'}, + # 'null']}], + # 'name': 'topLevelRecord', + # 'type': 'record'} + # + # >>> pprint(records[0]) + # {'bigint_col': 0, + # 'bool_col': True, + # 'date_string_col': b'03/01/09', + # 'double_col': 0.0, + # 'float_col': 0.0, + # 'id': 4, + # 'int_col': 0, + # 'smallint_col': 0, + # 'string_col': b'0', + # 'timestamp_col': datetime.datetime(2009, 3, 1, 0, 0, + # tzinfo=datetime.timezone.utc), + # 'tinyint_col': 0} + + # Nothing particularly special about these columns, other than them being + # the ones that @davidwendt used to coerce the crash. + columns = ["bool_col", "int_col", "timestamp_col"] + + # This next line would trigger the fatal CUDA kernel crash. + actual = cudf.read_avro(path, columns=columns) + + # If we get here, we haven't crashed, obviously. Verify the returned data + # frame meets our expectations. We need to fiddle with the dtypes of the + # expected data frame in order to correctly match the schema definition and + # our corresponding read_avro()-returned data frame. + + data = [{column: row[column] for column in columns} for row in records] + expected = cudf.DataFrame(data) + + # The fastavro.reader supports the `'logicalType': 'timestamp-micros'` used + # by the 'timestamp_col' column, which is converted into Python + # datetime.datetime() objects (see output of pprint(records[0]) above). + # As we don't support that logical type yet in cudf, we need to convert to + # int64, then divide by 1000 to convert from nanoseconds to microseconds. + timestamps = expected["timestamp_col"].astype("int64") + timestamps //= 1000 + expected["timestamp_col"] = timestamps + + # Furthermore, we need to force the 'int_col' into an int32, per the schema + # definition. (It ends up as an int64 due to cudf.DataFrame() defaulting + # all Python int values to int64 sans a dtype= override.) + expected["int_col"] = expected["int_col"].astype("int32") + + assert_eq(actual, expected) diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py index 2ff0bddf1c8..910f0b9cf86 100644 --- a/python/cudf/cudf/tests/test_concat.py +++ b/python/cudf/cudf/tests/test_concat.py @@ -1869,3 +1869,16 @@ def test_concat_invalid_axis(axis): s = gd.Series([1, 2, 3]) with pytest.raises(ValueError): gd.concat([s], axis=axis) + + +@pytest.mark.parametrize( + "s1,s2", + [ + ([1, 2], [[1, 2], [3, 4]]), + ], +) +def test_concat_mixed_list_types_error(s1, s2): + s1, s2 = gd.Series(s1), gd.Series(s2) + + with pytest.raises(NotImplementedError): + gd.concat([s1, s2], ignore_index=True) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 6a79555d43e..609f5eb488b 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -10031,6 +10031,20 @@ def test_dataframe_transpose_complex_types(data): assert_eq(expected, actual) +@pytest.mark.parametrize( + "data", + [ + {"col": [{"a": 1.1}, {"a": 2.1}, {"a": 10.0}, {"a": 11.2323}, None]}, + {"a": [[{"b": 567}], None] * 10}, + {"a": [decimal.Decimal(10), decimal.Decimal(20), None]}, + ], +) +def test_dataframe_values_complex_types(data): + gdf = cudf.DataFrame(data) + with pytest.raises(NotImplementedError): + gdf.values + + def test_dataframe_from_arrow_slice(): table = pa.Table.from_pandas( pd.DataFrame.from_dict( @@ -10043,3 +10057,33 @@ def test_dataframe_from_arrow_slice(): actual = cudf.DataFrame.from_arrow(table_slice) assert_eq(expected, actual) + + +@pytest.mark.parametrize( + "data", + [ + {"a": [1, 2, 3], "b": ["x", "y", "z"], "c": 4}, + {"c": 4, "a": [1, 2, 3], "b": ["x", "y", "z"]}, + {"a": [1, 2, 3], "c": 4}, + ], +) +def test_dataframe_init_from_scalar_and_lists(data): + actual = cudf.DataFrame(data) + expected = pd.DataFrame(data) + + assert_eq(expected, actual) + + +def test_dataframe_init_length_error(): + assert_exceptions_equal( + lfunc=pd.DataFrame, + rfunc=cudf.DataFrame, + lfunc_args_and_kwargs=( + [], + {"data": {"a": [1, 2, 3], "b": ["x", "y", "z", "z"], "c": 4}}, + ), + rfunc_args_and_kwargs=( + [], + {"data": {"a": [1, 2, 3], "b": ["x", "y", "z", "z"], "c": 4}}, + ), + ) diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index 0751ef7ca67..e58d70f49c7 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -1,7 +1,10 @@ # Copyright (c) 2018-2023, NVIDIA CORPORATION. +import collections import datetime import itertools +import operator +import string import textwrap from decimal import Decimal @@ -17,6 +20,7 @@ from cudf import DataFrame, Series from cudf.core._compat import PANDAS_GE_150, PANDAS_LT_140 from cudf.core.udf.groupby_typing import SUPPORTED_GROUPBY_NUMPY_TYPES +from cudf.core.udf.utils import precompiled from cudf.testing._utils import ( DATETIME_TYPES, SIGNED_TYPES, @@ -531,6 +535,42 @@ def diverging_block(grp_df): run_groupby_apply_jit_test(df, diverging_block, ["a"]) +def test_groupby_apply_caching(): + # Make sure similar functions that differ + # by simple things like constants actually + # recompile + + # begin with a clear cache + precompiled.clear() + assert precompiled.currsize == 0 + + data = cudf.DataFrame({"a": [1, 1, 1, 2, 2, 2], "b": [1, 2, 3, 4, 5, 6]}) + + def f(group): + return group["b"].mean() * 2 + + # a single run should result in a cache size of 1 + run_groupby_apply_jit_test(data, f, ["a"]) + assert precompiled.currsize == 1 + + # a second run with f should not increase the count + run_groupby_apply_jit_test(data, f, ["a"]) + assert precompiled.currsize == 1 + + # changing a constant value inside the UDF should miss + def f(group): + return group["b"].mean() * 3 + + run_groupby_apply_jit_test(data, f, ["a"]) + assert precompiled.currsize == 2 + + # changing the dtypes of the columns should miss + data["b"] = data["b"].astype("float64") + run_groupby_apply_jit_test(data, f, ["a"]) + + assert precompiled.currsize == 3 + + @pytest.mark.parametrize("nelem", [2, 3, 100, 500, 1000]) @pytest.mark.parametrize( "func", @@ -1474,7 +1514,6 @@ def test_grouping(grouper): @pytest.mark.parametrize("agg", [lambda x: x.count(), "count"]) @pytest.mark.parametrize("by", ["a", ["a", "b"], ["a", "c"]]) def test_groupby_count(agg, by): - pdf = pd.DataFrame( {"a": [1, 1, 1, 2, 3], "b": [1, 2, 2, 2, 1], "c": [1, 2, None, 4, 5]} ) @@ -1540,7 +1579,6 @@ def test_groupby_nth(n, by): reason="https://github.com/pandas-dev/pandas/issues/43209", ) def test_raise_data_error(): - pdf = pd.DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) gdf = cudf.from_pandas(pdf) @@ -1551,7 +1589,6 @@ def test_raise_data_error(): def test_drop_unsupported_multi_agg(): - gdf = cudf.DataFrame( {"a": [1, 1, 2, 2], "b": [1, 2, 3, 4], "c": ["a", "b", "c", "d"]} ) @@ -2567,7 +2604,6 @@ def foo(x): ], ) def test_groupby_apply_series_args(func, args): - got = make_frame(DataFrame, 100).groupby("x").y.apply(func, *args) expect = ( make_frame(pd.DataFrame, 100) @@ -2963,3 +2999,175 @@ def test_groupby_dtypes(groups): pdf = df.to_pandas() assert_eq(pdf.groupby(groups).dtypes, df.groupby(groups).dtypes) + + +@pytest.mark.parametrize("index_names", ["a", "b", "c", ["b", "c"]]) +def test_groupby_by_index_names(index_names): + gdf = cudf.DataFrame( + {"a": [1, 2, 3, 4], "b": ["a", "b", "a", "a"], "c": [1, 1, 2, 1]} + ).set_index(index_names) + pdf = gdf.to_pandas() + + assert_groupby_results_equal( + pdf.groupby(index_names).min(), gdf.groupby(index_names).min() + ) + + +class TestSample: + @pytest.fixture(params=["default", "rangeindex", "intindex", "strindex"]) + def index(self, request): + n = 12 + if request.param == "rangeindex": + return cudf.RangeIndex(2, n + 2) + elif request.param == "intindex": + return cudf.Index( + [2, 3, 4, 1, 0, 5, 6, 8, 7, 9, 10, 13], dtype="int32" + ) + elif request.param == "strindex": + return cudf.StringIndex(list(string.ascii_lowercase[:n])) + elif request.param == "default": + return None + + @pytest.fixture( + params=[ + ["a", "a", "b", "b", "c", "c", "c", "d", "d", "d", "d", "d"], + [1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4], + ], + ids=["str-group", "int-group"], + ) + def df(self, index, request): + return cudf.DataFrame( + {"a": request.param, "b": request.param, "v": request.param}, + index=index, + ) + + @pytest.fixture(params=["a", ["a", "b"]], ids=["single-col", "two-col"]) + def by(self, request): + return request.param + + def expected(self, df, *, n=None, frac=None): + value_counts = collections.Counter(df.a.values_host) + if n is not None: + values = list( + itertools.chain.from_iterable( + itertools.repeat(v, n) for v in value_counts.keys() + ) + ) + elif frac is not None: + values = list( + itertools.chain.from_iterable( + itertools.repeat(v, round(count * frac)) + for v, count in value_counts.items() + ) + ) + else: + raise ValueError("Must provide either n or frac") + values = cudf.Series(sorted(values), dtype=df.a.dtype) + return cudf.DataFrame({"a": values, "b": values, "v": values}) + + @pytest.mark.parametrize("n", [None, 0, 1, 2]) + def test_constant_n_no_replace(self, df, by, n): + result = df.groupby(by).sample(n=n).sort_values("a") + n = 1 if n is None else n + assert_eq(self.expected(df, n=n), result.reset_index(drop=True)) + + def test_constant_n_no_replace_too_large_raises(self, df): + with pytest.raises(ValueError): + df.groupby("a").sample(n=3) + + @pytest.mark.parametrize("n", [1, 2, 3]) + def test_constant_n_replace(self, df, by, n): + result = df.groupby(by).sample(n=n, replace=True).sort_values("a") + assert_eq(self.expected(df, n=n), result.reset_index(drop=True)) + + def test_invalid_arguments(self, df): + with pytest.raises(ValueError): + df.groupby("a").sample(n=1, frac=0.1) + + def test_not_implemented_arguments(self, df): + with pytest.raises(NotImplementedError): + # These are valid weights, but we don't implement this yet. + df.groupby("a").sample(n=1, weights=[1 / len(df)] * len(df)) + + @pytest.mark.parametrize("frac", [0, 1 / 3, 1 / 2, 2 / 3, 1]) + @pytest.mark.parametrize("replace", [False, True]) + def test_fraction_rounding(self, df, by, frac, replace): + result = ( + df.groupby(by).sample(frac=frac, replace=replace).sort_values("a") + ) + assert_eq(self.expected(df, frac=frac), result.reset_index(drop=True)) + + +class TestHeadTail: + @pytest.fixture(params=[-3, -2, -1, 0, 1, 2, 3], ids=lambda n: f"{n=}") + def n(self, request): + return request.param + + @pytest.fixture( + params=[False, True], ids=["no-preserve-order", "preserve-order"] + ) + def preserve_order(self, request): + return request.param + + @pytest.fixture + def df(self): + return cudf.DataFrame( + { + "a": [1, 0, 1, 2, 2, 1, 3, 2, 3, 3, 3], + "b": [0, 1, 2, 4, 3, 5, 6, 7, 9, 8, 10], + } + ) + + @pytest.fixture(params=[True, False], ids=["head", "tail"]) + def take_head(self, request): + return request.param + + @pytest.fixture + def expected(self, df, n, take_head, preserve_order): + if n == 0: + # We'll get an empty dataframe in this case + return df._empty_like(keep_index=True) + else: + if preserve_order: + # Should match pandas here + g = df.to_pandas().groupby("a") + if take_head: + return g.head(n=n) + else: + return g.tail(n=n) + else: + # We groupby "a" which is the first column. This + # possibly relies on an implementation detail that for + # integer group keys, cudf produces groups in sorted + # (ascending) order. + keyfunc = operator.itemgetter(0) + if take_head or n == 0: + # Head does group[:n] as does tail for n == 0 + slicefunc = operator.itemgetter(slice(None, n)) + else: + # Tail does group[-n:] except when n == 0 + slicefunc = operator.itemgetter( + slice(-n, None) if n else slice(0) + ) + values_to_sort = np.hstack( + [df.values_host, np.arange(len(df)).reshape(-1, 1)] + ) + expect_a, expect_b, index = zip( + *itertools.chain.from_iterable( + slicefunc(list(group)) + for _, group in itertools.groupby( + sorted(values_to_sort.tolist(), key=keyfunc), + key=keyfunc, + ) + ) + ) + return cudf.DataFrame( + {"a": expect_a, "b": expect_b}, index=index + ) + + def test_head_tail(self, df, n, take_head, expected, preserve_order): + if take_head: + actual = df.groupby("a").head(n=n, preserve_order=preserve_order) + else: + actual = df.groupby("a").tail(n=n, preserve_order=preserve_order) + assert_eq(actual, expected) diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index d043b917251..0b0c5fba7fa 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -2886,3 +2886,22 @@ def test_index_to_pandas_nullable(data, expected_dtype): expected = pd.Index(data, dtype=expected_dtype) assert_eq(pi, expected) + + +class TestIndexScalarGetItem: + @pytest.fixture( + params=[range(1, 10, 2), [1, 2, 3], ["a", "b", "c"], [1.5, 2.5, 3.5]] + ) + def index_values(self, request): + return request.param + + @pytest.fixture(params=[int, np.int8, np.int32, np.int64]) + def i(self, request): + return request.param(1) + + def test_scalar_getitem(self, index_values, i): + index = cudf.Index(index_values) + + assert not isinstance(index[i], cudf.Index) + assert index[i] == index_values[i] + assert_eq(index, index.to_pandas()) diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index 5012ae0979f..95936c48b7c 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -1446,6 +1446,8 @@ def test_loc_zero_dim_array(): reason="https://github.com/pandas-dev/pandas/issues/46704" ), ), + 1, + 2, ], ) def test_loc_series_multiindex(arg): diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py index 0f04e8c0f2d..a0e027d4c86 100644 --- a/python/cudf/cudf/tests/test_multiindex.py +++ b/python/cudf/cudf/tests/test_multiindex.py @@ -319,6 +319,9 @@ def test_multiindex_getitem(pdf, gdf, pdfIndex): (("a", "store"), slice(None)), # return 2 rows, n-1 remaining keys = dataframe with n-k index columns ("a",), + "a", + "b", + "c", (("a",), slice(None)), # return 1 row, 0 remaining keys = dataframe with entire index ("a", "store", "storm", "smoke"), diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index 7cc67347467..c24ff080033 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -2775,3 +2775,16 @@ def test_parquet_reader_unsupported_page_encoding(datadir): # expect a failure when reading the whole file with pytest.raises(RuntimeError): cudf.read_parquet(fname) + + +@pytest.mark.parametrize("data", [{"a": [1, 2, 3, 4]}, {"b": [1, None, 2, 3]}]) +@pytest.mark.parametrize("force_nullable_schema", [True, False]) +def test_parquet_writer_schema_nullability(data, force_nullable_schema): + df = cudf.DataFrame(data) + file_obj = BytesIO() + + df.to_parquet(file_obj, force_nullable_schema=force_nullable_schema) + + assert pa.parquet.read_schema(file_obj).field(0).nullable == ( + force_nullable_schema or df.isnull().any().any() + ) diff --git a/python/cudf/cudf/tests/test_rolling.py b/python/cudf/cudf/tests/test_rolling.py index 62120619d94..b4e0983a9e3 100644 --- a/python/cudf/cudf/tests/test_rolling.py +++ b/python/cudf/cudf/tests/test_rolling.py @@ -547,3 +547,13 @@ def test_rolling_indexer_support(indexer): actual = gdf.rolling(window=indexer, min_periods=2).sum() assert_eq(expected, actual) + + +def test_rolling_series(): + df = cudf.DataFrame({"a": range(0, 100), "b": [10, 20, 30, 40, 50] * 20}) + pdf = df.to_pandas() + + expected = pdf.groupby("b")["a"].rolling(5).mean() + actual = df.groupby("b")["a"].rolling(5).mean() + + assert_eq(expected, actual) diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py index 4d9ffc7cd81..dd82a9244b6 100644 --- a/python/cudf/cudf/tests/test_setitem.py +++ b/python/cudf/cudf/tests/test_setitem.py @@ -353,3 +353,18 @@ def test_scatter_by_slice_with_start_and_step(): target[1::2] = source ctarget[1::2] = csource assert_eq(target, ctarget) + + +@pytest.mark.parametrize("n", [1, 3]) +def test_setitem_str_trailing_null(n): + trailing_nulls = "\x00" * n + s = cudf.Series(["a", "b", "c" + trailing_nulls]) + assert s[2] == "c" + trailing_nulls + s[0] = "a" + trailing_nulls + assert s[0] == "a" + trailing_nulls + s[1] = trailing_nulls + assert s[1] == trailing_nulls + s[0] = "" + assert s[0] == "" + s[0] = "\x00" + assert s[0] == "\x00" diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index acf00b3a3d5..c7a8c8b4096 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -260,6 +260,15 @@ def to_cudf_compatible_scalar(val, dtype=None): ) or cudf.api.types.is_string_dtype(dtype): dtype = "str" + if isinstance(val, str) and val.endswith("\x00"): + # Numpy string dtypes are fixed width and use NULL to + # indicate the end of the string, so they cannot + # distinguish between "abc\x00" and "abc". + # https://github.com/numpy/numpy/issues/20118 + # In this case, don't try going through numpy and just use + # the string value directly (cudf.DeviceScalar will DTRT) + return val + if isinstance(val, datetime.datetime): val = np.datetime64(val) elif isinstance(val, datetime.timedelta): @@ -571,6 +580,27 @@ def find_common_type(dtypes): ) else: return cudf.dtype("O") + if any(cudf.api.types.is_list_dtype(dtype) for dtype in dtypes): + if len(dtypes) == 1: + return dtypes.get(0) + else: + # TODO: As list dtypes allow casting + # to identical types, improve this logic of returning a + # common dtype, for example: + # ListDtype(int64) & ListDtype(int32) common + # dtype could be ListDtype(int64). + raise NotImplementedError( + "Finding a common type for `ListDtype` is currently " + "not supported" + ) + if any(cudf.api.types.is_struct_dtype(dtype) for dtype in dtypes): + if len(dtypes) == 1: + return dtypes.get(0) + else: + raise NotImplementedError( + "Finding a common type for `StructDtype` is currently " + "not supported" + ) # Corner case 1: # Resort to np.result_type to handle "M" and "m" types separately diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index 5f39c8722d9..bf51b360fec 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -290,6 +290,10 @@ include the file path metadata (relative to `root_path`). To request metadata binary blob when using with ``partition_cols``, Pass ``return_metadata=True`` instead of specifying ``metadata_file_path`` +force_nullable_schema : bool, default False. + If True, writes all columns as `null` in schema. + If False, columns are written as `null` if they contain null values, + otherwise as `not null`. **kwargs Additional parameters will be passed to execution engines other than ``cudf``. diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 5b259b1dc66..3b49c821eff 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -3,18 +3,17 @@ [build-system] build-backend = "setuptools.build_meta" requires = [ - "wheel", - "setuptools", - "cython>=0.29,<0.30", - "scikit-build>=0.13.1", "cmake>=3.23.1,!=3.25.0", + "cython>=0.29,<0.30", "ninja", - "numpy", - # Hard pin the patch version used during the build. - "pyarrow==10.0.1", + "numpy>=1.21", "protoc-wheel", + "pyarrow==10.0.1.*", "rmm==23.4.*", -] + "scikit-build>=0.13.1", + "setuptools", + "wheel", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project] name = "cudf" @@ -28,22 +27,21 @@ license = { text = "Apache 2.0" } requires-python = ">=3.8" dependencies = [ "cachetools", + "cubinlinker", "cuda-python>=11.7.1,<12.0", + "cupy-cuda11x>=9.5.0,<12.0.0a0", "fsspec>=0.6.0", - "numba>=0.56.2", + "numba>=0.56.4,<0.57", "numpy>=1.21", "nvtx>=0.2.1", "packaging", "pandas>=1.3,<1.6.0dev0", "protobuf>=4.21.6,<4.22", - "typing_extensions", - # Allow floating minor versions for Arrow. - "pyarrow==10", - "rmm==23.4.*", "ptxcompiler", - "cubinlinker", - "cupy-cuda11x", -] + "pyarrow==10.*", + "rmm==23.4.*", + "typing_extensions", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", "Topic :: Database", @@ -56,18 +54,22 @@ classifiers = [ [project.optional-dependencies] test = [ + "fastavro>=0.22.9", + "hypothesis", + "mimesis>=4.1.0", + "msgpack", + "pyorc", "pytest", "pytest-benchmark", + "pytest-cases", + "pytest-cov", "pytest-xdist", - "hypothesis", - "mimesis>=4.1.0", - "fastavro>=0.22.9", "python-snappy>=0.6.0", - "pyorc", - "msgpack", + "scipy", + "tokenizers==0.13.1", "transformers==4.24.0", "tzdata", -] +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] Homepage = "https://github.com/rapidsai/cudf" diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 8a7ebf574fe..96b91b4ccc0 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -3,8 +3,9 @@ from setuptools import find_packages from skbuild import setup +packages = find_packages(include=["cudf*", "udf_cpp*"]) setup( - include_package_data=True, - packages=find_packages(include=["cudf", "cudf.*"]), + packages=packages, + package_data={key: ["*.pxd", "*.hpp", "*.cuh"] for key in packages}, zip_safe=False, ) diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index 308a7869bc0..ccaa08eeef5 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -3,10 +3,41 @@ [build-system] requires = [ - "wheel", - "setuptools", "cython>=0.29,<0.30", + "numpy>=1.21", + "pyarrow==10.0.1.*", + "setuptools", + "wheel", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. + +[project] +name = "cudf_kafka" +version = "23.04.00" +description = "cuDF Kafka Datasource" +readme = { file = "README.md", content-type = "text/markdown" } +authors = [ + { name = "NVIDIA Corporation" }, ] +license = { text = "Apache 2.0" } +requires-python = ">=3.8" +dependencies = [ + "confluent-kafka==1.7.0", + "cudf==23.4.*", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. + +[project.optional-dependencies] +test = [ + "pytest", + "pytest-cov", + "pytest-xdist", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. + +[project.urls] +Homepage = "https://github.com/rapidsai/cudf" +Documentation = "https://docs.rapids.ai/api/cudf/stable/" + +[tool.setuptools] +license-files = ["LICENSE"] [tool.isort] line_length = 79 diff --git a/python/cudf_kafka/setup.py b/python/cudf_kafka/setup.py index c39b65cdb55..d955d95858a 100644 --- a/python/cudf_kafka/setup.py +++ b/python/cudf_kafka/setup.py @@ -10,10 +10,6 @@ from setuptools import find_packages, setup from setuptools.extension import Extension -install_requires = ["cudf", "cython"] - -extras_require = {"test": ["pytest", "pytest-xdist"]} - cython_files = ["cudf_kafka/_lib/*.pyx"] CUDA_HOME = os.environ.get("CUDA_HOME", False) @@ -84,24 +80,8 @@ ) ] +packages = find_packages(include=["cudf_kafka*"]) setup( - name="cudf_kafka", - version="23.04.00", - description="cuDF Kafka Datasource", - url="https://github.com/rapidsai/cudf", - author="NVIDIA Corporation", - license="Apache 2.0", - classifiers=[ - "Intended Audience :: Developers", - "Topic :: Streaming", - "Topic :: Scientific/Engineering", - "Topic :: Apache Kafka", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - ], # Include the separately-compiled shared library ext_modules=cythonize( extensions, @@ -110,12 +90,7 @@ profile=False, language_level=3, embedsignature=True ), ), - packages=find_packages(include=["cudf_kafka", "cudf_kafka.*"]), - package_data=dict.fromkeys( - find_packages(include=["cudf_kafka._lib*"]), - ["*.pxd"], - ), - install_requires=install_requires, - extras_require=extras_require, + packages=packages, + package_data={key: ["*.pxd"] for key in packages}, zip_safe=False, ) diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml index 315621fa3c1..657b3865495 100644 --- a/python/custreamz/pyproject.toml +++ b/python/custreamz/pyproject.toml @@ -3,9 +3,9 @@ [build-system] build-backend = "setuptools.build_meta" requires = [ - "wheel", "setuptools", -] + "wheel", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project] name = "custreamz" @@ -18,9 +18,10 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.8" dependencies = [ - "cudf", - "cudf_kafka", -] + "cudf==23.4.*", + "cudf_kafka==23.4.*", + "streamz", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", "Topic :: Streaming", @@ -36,8 +37,9 @@ classifiers = [ [project.optional-dependencies] test = [ "pytest", + "pytest-cov", "pytest-xdist", -] +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] Homepage = "https://github.com/rapidsai/cudf" diff --git a/python/dask_cudf/README.md b/python/dask_cudf/README.md new file mode 120000 index 00000000000..fe840054137 --- /dev/null +++ b/python/dask_cudf/README.md @@ -0,0 +1 @@ +../../README.md \ No newline at end of file diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py index 60bbe5d9571..d2858876fcd 100644 --- a/python/dask_cudf/dask_cudf/core.py +++ b/python/dask_cudf/dask_cudf/core.py @@ -1,6 +1,7 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. import math +import textwrap import warnings import numpy as np @@ -68,6 +69,18 @@ def to_dask_dataframe(self, **kwargs): class DataFrame(_Frame, dd.core.DataFrame): + """ + A distributed Dask DataFrame where the backing dataframe is a + :class:`cuDF DataFrame `. + + Typically you would not construct this object directly, but rather + use one of Dask-cuDF's IO routines. + + Most operations on :doc:`Dask DataFrames ` are + supported, with many of the same caveats. + + """ + _partition_type = cudf.DataFrame @_dask_cudf_nvtx_annotate @@ -671,12 +684,35 @@ def from_cudf(data, npartitions=None, chunksize=None, sort=True, name=None): from_cudf.__doc__ = ( - "Wraps main-line Dask from_pandas...\n" + dd.from_pandas.__doc__ + textwrap.dedent( + """ + Create a :class:`.DataFrame` from a :class:`cudf.DataFrame`. + + This function is a thin wrapper around + :func:`dask.dataframe.from_pandas`, accepting the same + arguments (described below) excepting that it operates on cuDF + rather than pandas objects.\n + """ + ) + + textwrap.dedent(dd.from_pandas.__doc__) ) @_dask_cudf_nvtx_annotate def from_dask_dataframe(df): + """ + Convert a Dask :class:`dask.dataframe.DataFrame` to a Dask-cuDF + one. + + Parameters + ---------- + df : dask.dataframe.DataFrame + The Dask dataframe to convert + + Returns + ------- + dask_cudf.DataFrame : A new Dask collection backed by cuDF objects + """ return df.map_partitions(cudf.from_pandas) diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/groupby.py index f91738bdab0..f4bbcaf4dd1 100644 --- a/python/dask_cudf/dask_cudf/groupby.py +++ b/python/dask_cudf/dask_cudf/groupby.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from functools import wraps from typing import Set @@ -433,22 +433,55 @@ def groupby_agg( ): """Optimized groupby aggregation for Dask-CuDF. - This aggregation algorithm only supports the following options: - - - "count" - - "mean" - - "std" - - "var" - - "sum" - - "min" - - "max" - - "collect" - - "first" - - "last" - - This "optimized" approach is more performant than the algorithm - in `dask.dataframe`, because it allows the cudf backend to - perform multiple aggregations at once. + Parameters + ---------- + ddf : DataFrame + DataFrame object to perform grouping on. + gb_cols : str or list[str] + Column names to group by. + aggs_in : str, list, or dict + Aggregations to perform. + split_every : int (optional) + How to group intermediate aggregates. + dropna : bool + Drop grouping key values corresponding to NA values. + as_index : bool + Currently ignored. + sort : bool + Sort the group keys, better performance is obtained when + not sorting. + shuffle : str (optional) + Control how shuffling of the DataFrame is performed. + sep : str + Internal usage. + + + Notes + ----- + This "optimized" approach is more performant than the algorithm in + implemented in :meth:`DataFrame.apply` because it allows the cuDF + backend to perform multiple aggregations at once. + + This aggregation algorithm only supports the following options + + * "collect" + * "count" + * "first" + * "last" + * "max" + * "mean" + * "min" + * "std" + * "sum" + * "var" + + + See Also + -------- + DataFrame.groupby : generic groupby of a DataFrame + dask.dataframe.apply_concat_apply : for more description of the + split_every argument. + """ # Assert that aggregations are supported aggs = _redirect_aggs(aggs_in) diff --git a/python/dask_cudf/dask_cudf/io/csv.py b/python/dask_cudf/dask_cudf/io/csv.py index b4d080fd182..fd27083bbf4 100644 --- a/python/dask_cudf/dask_cudf/io/csv.py +++ b/python/dask_cudf/dask_cudf/io/csv.py @@ -16,9 +16,10 @@ def read_csv(path, blocksize="default", **kwargs): """ - Read CSV files into a dask_cudf.DataFrame + Read CSV files into a :class:`.DataFrame`. - This API parallelizes the ``cudf.read_csv`` function in the following ways: + This API parallelizes the :func:`cudf:cudf.read_csv` function in + the following ways: It supports loading many files at once using globstrings: @@ -34,23 +35,26 @@ def read_csv(path, blocksize="default", **kwargs): >>> df = dask_cudf.read_csv("s3://bucket/myfiles.*.csv") >>> df = dask_cudf.read_csv("https://www.mycloud.com/sample.csv") - Internally ``dask_cudf.read_csv`` uses ``cudf.read_csv`` and supports - many of the same keyword arguments with the same performance guarantees. - See the docstring for ``cudf.read_csv()`` for more information on available + Internally ``read_csv`` uses :func:`cudf:cudf.read_csv` and + supports many of the same keyword arguments with the same + performance guarantees. See the docstring for + :func:`cudf:cudf.read_csv` for more information on available keyword arguments. Parameters ---------- path : str, path object, or file-like object - Either a path to a file (a str, pathlib.Path, or - py._path.local.LocalPath), URL (including http, ftp, and S3 locations), - or any object with a read() method (such as builtin open() file - handler function or StringIO). + Either a path to a file (a str, :py:class:`pathlib.Path`, or + py._path.local.LocalPath), URL (including http, ftp, and S3 + locations), or any object with a read() method (such as + builtin :py:func:`open` file handler function or + :py:class:`~io.StringIO`). blocksize : int or str, default "256 MiB" - The target task partition size. If `None`, a single block + The target task partition size. If ``None``, a single block is used for each file. **kwargs : dict - Passthrough key-word arguments that are sent to ``cudf.read_csv``. + Passthrough key-word arguments that are sent to + :func:`cudf:cudf.read_csv`. Examples -------- @@ -61,6 +65,7 @@ def read_csv(path, blocksize="default", **kwargs): 0 1 hi 1 2 hello 2 3 ai + """ # Handle `chunksize` deprecation diff --git a/python/dask_cudf/dask_cudf/io/json.py b/python/dask_cudf/dask_cudf/io/json.py index bb3d0f3c601..2a6ad603414 100644 --- a/python/dask_cudf/dask_cudf/io/json.py +++ b/python/dask_cudf/dask_cudf/io/json.py @@ -10,30 +10,33 @@ def read_json(url_path, engine="auto", **kwargs): - """Create a dask_cudf DataFrame collection from JSON data + """Read JSON data into a :class:`.DataFrame`. - This function wraps ``dask.dataframe.read_json``, and passes + This function wraps :func:`dask.dataframe.read_json`, and passes ``engine=partial(cudf.read_json, engine="auto")`` by default. Parameters ---------- - url_path: str, list of str + url_path : str, list of str Location to read from. If a string, can include a glob character to find a set of file names. Supports protocol specifications such as ``"s3://"``. engine : str or Callable, default "auto" - If str, this value will be used as the ``engine`` argument when - ``cudf.read_json`` is used to create each partition. If Callable, - this value will be used as the underlying function used to create - each partition from JSON data. The default value is "auto", so - that ``engine=partial(cudf.read_json, engine="auto")`` will be - passed to ``dask.dataframe.read_json`` by default. + + If str, this value will be used as the ``engine`` argument + when :func:`cudf.read_json` is used to create each partition. + If a :obj:`~typing.Callable`, this value will be used as the + underlying function used to create each partition from JSON + data. The default value is "auto", so that + ``engine=partial(cudf.read_json, engine="auto")`` will be + passed to :func:`dask.dataframe.read_json` by default. + **kwargs : - Key-word arguments to pass through to ``dask.dataframe.read_json``. + Key-word arguments to pass through to :func:`dask.dataframe.read_json`. Returns ------- - dask_cudf.DataFrame + :class:`.DataFrame` Examples -------- @@ -53,7 +56,8 @@ def read_json(url_path, engine="auto", **kwargs): See Also -------- - dask.dataframe.io.json.read_json + dask.dataframe.read_json + """ # TODO: Add optimized code path to leverage the diff --git a/python/dask_cudf/dask_cudf/io/orc.py b/python/dask_cudf/dask_cudf/io/orc.py index e731057ed90..49fea0d7602 100644 --- a/python/dask_cudf/dask_cudf/io/orc.py +++ b/python/dask_cudf/dask_cudf/io/orc.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from io import BufferedWriter, IOBase @@ -25,37 +25,45 @@ def _read_orc_stripe(fs, path, stripe, columns, kwargs=None): def read_orc(path, columns=None, filters=None, storage_options=None, **kwargs): - """Read cudf dataframe from ORC file(s). + """Read ORC files into a :class:`.DataFrame`. Note that this function is mostly borrowed from upstream Dask. Parameters ---------- - path: str or list(str) + path : str or list[str] Location of file(s), which can be a full URL with protocol specifier, and may include glob character if a single string. - columns: None or list(str) + columns : None or list[str] Columns to load. If None, loads all. filters : None or list of tuple or list of lists of tuples - If not None, specifies a filter predicate used to filter out row groups - using statistics stored for each row group as Parquet metadata. Row - groups that do not match the given filter predicate are not read. The - predicate is expressed in disjunctive normal form (DNF) like - `[[('x', '=', 0), ...], ...]`. DNF allows arbitrary boolean logical - combinations of single column predicates. The innermost tuples each - describe a single column predicate. The list of inner predicates is - interpreted as a conjunction (AND), forming a more selective and - multiple column predicate. Finally, the outermost list combines - these filters as a disjunction (OR). Predicates may also be passed - as a list of tuples. This form is interpreted as a single conjunction. - To express OR in predicates, one must use the (preferred) notation of - list of lists of tuples. - storage_options: None or dict + If not None, specifies a filter predicate used to filter out + row groups using statistics stored for each row group as + Parquet metadata. Row groups that do not match the given + filter predicate are not read. The predicate is expressed in + `disjunctive normal form (DNF) + `__ + like ``[[('x', '=', 0), ...], ...]``. DNF allows arbitrary + boolean logical combinations of single column predicates. The + innermost tuples each describe a single column predicate. The + list of inner predicates is interpreted as a conjunction + (AND), forming a more selective and multiple column predicate. + Finally, the outermost list combines these filters as a + disjunction (OR). Predicates may also be passed as a list of + tuples. This form is interpreted as a single conjunction. To + express OR in predicates, one must use the (preferred) + notation of list of lists of tuples. + storage_options : None or dict Further parameters to pass to the bytes backend. + See Also + -------- + dask.dataframe.read_orc + Returns ------- - cudf.DataFrame + dask_cudf.DataFrame + """ storage_options = storage_options or {} @@ -133,22 +141,25 @@ def to_orc( compute=True, **kwargs, ): - """Write a dask_cudf dataframe to ORC file(s) (one file per partition). + """ + Write a :class:`.DataFrame` to ORC file(s) (one file per partition). Parameters ---------- - df : dask_cudf.DataFrame - path: string or pathlib.Path + df : DataFrame + path : str or pathlib.Path Destination directory for data. Prepend with protocol like ``s3://`` or ``hdfs://`` for remote data. write_index : boolean, optional Whether or not to write the index. Defaults to True. - storage_options: None or dict + storage_options : None or dict Further parameters to pass to the bytes backend. compression : string or dict, optional compute : bool, optional - If True (default) then the result is computed immediately. If False - then a ``dask.delayed`` object is returned for future computation. + If True (default) then the result is computed immediately. If + False then a :class:`~dask.delayed.Delayed` object is returned + for future computation. + """ from dask import compute as dask_compute, delayed diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py index 452f2f8914a..f19c373150d 100644 --- a/python/dask_cudf/dask_cudf/io/parquet.py +++ b/python/dask_cudf/dask_cudf/io/parquet.py @@ -121,6 +121,8 @@ def _read_paths( if row_groups else None, strings_to_categorical=strings_to_categorical, + dataset_kwargs=dataset_kwargs, + categorical_partitions=False, **kwargs, ) for i, pof in enumerate(paths_or_fobs) @@ -191,6 +193,8 @@ def read_partition( dataset_kwargs = kwargs.get("dataset", {}) partitioning = partitioning or dataset_kwargs.get("partitioning", None) + if isinstance(partitioning, dict): + partitioning = pa_ds.partitioning(**partitioning) # Check if we are actually selecting any columns read_columns = columns @@ -438,13 +442,14 @@ def set_object_dtypes_from_pa_schema(df, schema): def read_parquet(path, columns=None, **kwargs): - """Read parquet files into a Dask DataFrame + """ + Read parquet files into a :class:`.DataFrame`. - Calls ``dask.dataframe.read_parquet`` with ``engine=CudfEngine`` - to coordinate the execution of ``cudf.read_parquet``, and to - ultimately create a ``dask_cudf.DataFrame`` collection. + Calls :func:`dask.dataframe.read_parquet` with ``engine=CudfEngine`` + to coordinate the execution of :func:`cudf.read_parquet`, and to + ultimately create a :class:`.DataFrame` collection. - See the ``dask.dataframe.read_parquet`` documentation for + See the :func:`dask.dataframe.read_parquet` documentation for all available options. Examples @@ -469,6 +474,7 @@ def read_parquet(path, columns=None, **kwargs): See Also -------- cudf.read_parquet + dask.dataframe.read_parquet """ if isinstance(columns, str): columns = [columns] diff --git a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py index 8fb6e591660..f5ae9706fde 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py @@ -508,13 +508,14 @@ def test_null_partition(tmpdir): import pyarrow as pa from pyarrow.dataset import HivePartitioning - df = pd.DataFrame({"id": [0, 1, None], "x": [1, 2, 3]}) + ids = pd.Series([0, 1, None], dtype="Int64") + df = pd.DataFrame({"id": ids, "x": [1, 2, 3]}) ddf = dd.from_pandas(df, npartitions=1).to_backend("cudf") ddf.to_parquet(str(tmpdir), partition_on="id") fns = glob.glob(os.path.join(tmpdir, "id" + "=*/*.parquet")) assert len(fns) == 3 - partitioning = HivePartitioning(pa.schema([("id", pa.float64())])) + partitioning = HivePartitioning(pa.schema([("id", pa.int64())])) ddf_read = dask_cudf.read_parquet( str(tmpdir), dataset={"partitioning": partitioning}, diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py index 0f2dc0d4efc..e841f2d8830 100644 --- a/python/dask_cudf/dask_cudf/sorting.py +++ b/python/dask_cudf/dask_cudf/sorting.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from collections.abc import Iterator @@ -218,9 +218,11 @@ def quantile_divisions(df, by, npartitions): divisions[col].iloc[-1] += 1 divisions[col] = divisions[col].astype(dtype) else: - divisions[col].iloc[-1] = chr( - ord(divisions[col].iloc[-1][0]) + 1 - ) + if last := divisions[col].iloc[-1]: + val = chr(ord(last[0]) + 1) + else: + val = "this string intentionally left empty" # any but "" + divisions[col].iloc[-1] = val divisions = divisions.drop_duplicates().sort_index() return divisions diff --git a/python/dask_cudf/dask_cudf/tests/test_sort.py b/python/dask_cudf/dask_cudf/tests/test_sort.py index 770a52316b6..94609b180d6 100644 --- a/python/dask_cudf/dask_cudf/tests/test_sort.py +++ b/python/dask_cudf/dask_cudf/tests/test_sort.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. import cupy as cp import numpy as np @@ -104,3 +104,13 @@ def f(partition, by_columns, ascending, na_position, **kwargs): ) expect = df.sort_values(by=by) dd.assert_eq(got, expect, check_index=False) + + +@pytest.mark.parametrize("by", ["a", "b", ["a", "b"], ["b", "a"]]) +def test_sort_values_empty_string(by): + df = cudf.DataFrame({"a": [3, 2, 1, 4], "b": [""] * 4}) + ddf = dd.from_pandas(df, npartitions=2) + got = ddf.sort_values(by) + if "a" in by: + expect = df.sort_values(by) + assert dd.assert_eq(got, expect, check_index=False) diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index 79a9aca9e96..c91a9bb3b85 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -3,9 +3,9 @@ [build-system] build-backend = "setuptools.build_meta" requires = [ - "wheel", "setuptools", -] + "wheel", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project] name = "dask_cudf" @@ -18,14 +18,14 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.8" dependencies = [ + "cudf==23.4.*", + "cupy-cuda11x>=9.5.0,<12.0.0a0", "dask>=2023.1.1", "distributed>=2023.1.1", "fsspec>=0.6.0", "numpy>=1.21", "pandas>=1.3,<1.6.0dev0", - "cudf==23.4.*", - "cupy-cuda11x", -] +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", "Topic :: Database", @@ -40,12 +40,12 @@ dynamic = ["entry-points"] [project.optional-dependencies] test = [ - "numpy>=1.21", - "pandas>=1.3,<1.6.0dev0", + "dask-cuda==23.4.*", + "numba>=0.56.4,<0.57", "pytest", + "pytest-cov", "pytest-xdist", - "numba>=0.56.2", -] +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] Homepage = "https://github.com/rapidsai/cudf"