diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000000..e80e3afc443 --- /dev/null +++ b/.flake8 @@ -0,0 +1,24 @@ +# Copyright (c) 2017-2023, NVIDIA CORPORATION. + +[flake8] +filename = *.py, *.pyx, *.pxd, *.pxi +exclude = __init__.py, *.egg, build, docs, .git +force-check = True +ignore = + # line break before binary operator + W503, + # whitespace before : + E203 +per-file-ignores = + # Rules ignored only in Cython: + # E211: whitespace before '(' (used in multi-line imports) + # E225: Missing whitespace around operators (breaks cython casting syntax like ) + # E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*) + # E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax) + # E275: Missing whitespace after keyword (Doesn't work with Cython except?) + # E402: invalid syntax (works for Python, not Cython) + # E999: invalid syntax (works for Python, not Cython) + # W504: line break after binary operator (breaks lines that end with a pointer) + *.pyx: E211, E225, E226, E227, E275, E402, E999, W504 + *.pxd: E211, E225, E226, E227, E275, E402, E999, W504 + *.pxi: E211, E225, E226, E227, E275, E402, E999, W504 diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index fbfe7434d50..00000000000 --- a/.gitattributes +++ /dev/null @@ -1,4 +0,0 @@ -python/cudf/cudf/_version.py export-subst -python/cudf_kafka/cudf_kafka/_version.py export-subst -python/custreamz/custreamz/_version.py export-subst -python/dask_cudf/dask_cudf/_version.py export-subst diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 26d07515f70..024eb828e3c 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -53,6 +53,17 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} skip_upload_pkgs: libcudf-example + docs-build: + if: github.ref_type == 'branch' && github.event_name == 'push' + needs: python-build + secrets: inherit + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04 + with: + build_type: branch + node_type: "gpu-latest-1" + arch: "amd64" + container_image: "rapidsai/ci:latest" + run_script: "ci/build_docs.sh" wheel-build-cudf: secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04 @@ -64,6 +75,7 @@ jobs: package-name: cudf package-dir: python/cudf skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" + uses-setup-env-vars: false wheel-publish-cudf: needs: wheel-build-cudf secrets: inherit @@ -85,6 +97,7 @@ jobs: date: ${{ inputs.date }} package-name: dask_cudf package-dir: python/dask_cudf + uses-setup-env-vars: false wheel-publish-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index f33fc15c52f..d02825b73d1 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -20,6 +20,7 @@ jobs: - conda-python-other-tests - conda-java-tests - conda-notebook-tests + - docs-build - wheel-build-cudf - wheel-tests-cudf - wheel-build-dask-cudf @@ -29,6 +30,8 @@ jobs: checks: secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.04 + with: + enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit @@ -82,6 +85,16 @@ jobs: arch: "amd64" container_image: "rapidsai/ci:latest" run_script: "ci/test_notebooks.sh" + docs-build: + needs: conda-python-build + secrets: inherit + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04 + with: + build_type: pull-request + node_type: "gpu-latest-1" + arch: "amd64" + container_image: "rapidsai/ci:latest" + run_script: "ci/build_docs.sh" wheel-build-cudf: needs: checks secrets: inherit @@ -91,6 +104,7 @@ jobs: package-name: cudf package-dir: python/cudf skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" + uses-setup-env-vars: false wheel-tests-cudf: needs: wheel-build-cudf secrets: inherit @@ -99,9 +113,8 @@ jobs: build_type: pull-request package-name: cudf # Install cupy-cuda11x for arm from a special index url - # Install tokenizers last binary wheel to avoid a Rust compile from the latest sdist - test-before-arm64: "pip install tokenizers==0.10.2 cupy-cuda11x -f https://pip.cupy.dev/aarch64" - test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" + test-before-arm64: "python -m pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + test-unittest: "python -m pytest -v -n 8 ./python/cudf/cudf/tests" test-smoketest: "python ./ci/wheel_smoke_test_cudf.py" wheel-build-dask-cudf: needs: wheel-tests-cudf @@ -111,7 +124,8 @@ jobs: build_type: pull-request package-name: dask_cudf package-dir: python/dask_cudf - before-wheel: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf && pip install --no-deps ./local-cudf/cudf*.whl" + before-wheel: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf && python -m pip install --no-deps ./local-cudf/cudf*.whl" + uses-setup-env-vars: false wheel-tests-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit @@ -119,5 +133,5 @@ jobs: with: build_type: pull-request package-name: dask_cudf - test-before: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf-dep && pip install --no-deps ./local-cudf-dep/cudf*.whl" - test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" + test-before: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf-dep && python -m pip install --no-deps ./local-cudf-dep/cudf*.whl" + test-unittest: "python -m pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index ff19d51f8ef..c808e1475e6 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -22,6 +22,18 @@ jobs: branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} + conda-cpp-memcheck-tests: + secrets: inherit + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + node_type: "gpu-latest-1" + arch: "amd64" + container_image: "rapidsai/ci:latest" + run_script: "ci/test_cpp_memcheck.sh" conda-python-cudf-tests: secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04 @@ -74,8 +86,8 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} package-name: cudf - test-before-arm64: "pip install tokenizers==0.10.2 cupy-cuda11x -f https://pip.cupy.dev/aarch64" - test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" + test-before-arm64: "python -m pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + test-unittest: "python -m pytest -v -n 8 ./python/cudf/cudf/tests" wheel-tests-dask-cudf: secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.04 @@ -85,4 +97,4 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} package-name: dask_cudf - test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" + test-unittest: "python -m pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" diff --git a/.gitignore b/.gitignore index 2d83aad7712..fb5c301fe3f 100644 --- a/.gitignore +++ b/.gitignore @@ -166,6 +166,9 @@ docs/cudf/source/api_docs/generated/* docs/cudf/source/api_docs/api/* docs/cudf/source/user_guide/example_output/* docs/cudf/source/user_guide/cudf.*Dtype.*.rst +_html +_text +jupyter_execute # cibuildwheel /wheelhouse diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a13b4ca10f1..1eb2c508db9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,7 +34,7 @@ repos: rev: 5.0.4 hooks: - id: flake8 - args: ["--config=setup.cfg"] + args: ["--config=.flake8"] files: python/.*$ types: [file] types_or: [python, cython] @@ -48,7 +48,7 @@ repos: hooks: - id: mypy additional_dependencies: [types-cachetools] - args: ["--config-file=setup.cfg", + args: ["--config-file=pyproject.toml", "python/cudf/cudf", "python/custreamz/custreamz", "python/cudf_kafka/cudf_kafka", @@ -58,7 +58,19 @@ repos: rev: 6.1.1 hooks: - id: pydocstyle - args: ["--config=setup.cfg"] + # https://github.com/PyCQA/pydocstyle/issues/603 + additional_dependencies: [toml] + args: ["--config=pyproject.toml"] + - repo: https://github.com/nbQA-dev/nbQA + rev: 1.6.3 + hooks: + - id: nbqa-isort + # Use the cudf_kafka isort orderings in notebooks so that dask + # and RAPIDS packages have their own sections. + args: ["--settings-file=python/cudf_kafka/pyproject.toml"] + - id: nbqa-black + # Explicitly specify the pyproject.toml at the repo root, not per-project. + args: ["--config=pyproject.toml"] - repo: https://github.com/pre-commit/mirrors-clang-format rev: v11.1.0 hooks: @@ -138,15 +150,21 @@ repos: pass_filenames: false verbose: false - repo: https://github.com/codespell-project/codespell - rev: v2.1.0 + rev: v2.2.2 hooks: - id: codespell + additional_dependencies: [tomli] + args: ["--toml", "pyproject.toml"] exclude: | (?x)^( .*test.*| - ^CHANGELOG.md$| - ^.*versioneer.py$ + ^CHANGELOG.md$ ) + - repo: https://github.com/rapidsai/dependency-file-generator + rev: v1.4.0 + hooks: + - id: rapids-dependency-file-generator + args: ["--clean"] default_language_version: python: python3 diff --git a/ci/build_docs.sh b/ci/build_docs.sh new file mode 100755 index 00000000000..6daedb59733 --- /dev/null +++ b/ci/build_docs.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Copyright (c) 2023, NVIDIA CORPORATION. + +set -euo pipefail + +rapids-logger "Create test conda environment" +. /opt/conda/etc/profile.d/conda.sh + +rapids-dependency-file-generator \ + --output conda \ + --file_key docs \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml + +rapids-mamba-retry env create --force -f env.yaml -n docs +conda activate docs + +rapids-print-env + +rapids-logger "Downloading artifacts from previous jobs" +CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) +PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) +VERSION_NUMBER=$(rapids-get-rapids-version-from-git) + +rapids-mamba-retry install \ + --channel "${CPP_CHANNEL}" \ + --channel "${PYTHON_CHANNEL}" \ + libcudf cudf dask-cudf + + +rapids-logger "Build Doxygen docs" +pushd cpp/doxygen +aws s3 cp s3://rapidsai-docs/librmm/${VERSION_NUMBER}/html/rmm.tag . || echo "Failed to download rmm Doxygen tag" +doxygen Doxyfile +popd + +rapids-logger "Build Sphinx docs" +pushd docs/cudf +sphinx-build -b dirhtml source _html +sphinx-build -b text source _text +popd + + +if [[ ${RAPIDS_BUILD_TYPE} == "branch" ]]; then + rapids-logger "Upload Docs to S3" + aws s3 sync --no-progress --delete cpp/doxygen/html "s3://rapidsai-docs/libcudf/${VERSION_NUMBER}/html" + aws s3 sync --no-progress --delete docs/cudf/_html "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/html" + aws s3 sync --no-progress --delete docs/cudf/_text "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/txt" +fi diff --git a/ci/check_style.sh b/ci/check_style.sh index 020143095ce..f9bfea7b47c 100755 --- a/ci/check_style.sh +++ b/ci/check_style.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. set -euo pipefail @@ -20,4 +20,4 @@ mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE}) wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL} # Run pre-commit checks -pre-commit run --hook-stage manual --all-files --show-diff-on-failure +pre-commit run --all-files --show-diff-on-failure diff --git a/ci/checks/copyright.py b/ci/checks/copyright.py index 0f2540c440c..e76d9524c76 100644 --- a/ci/checks/copyright.py +++ b/ci/checks/copyright.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -31,7 +31,6 @@ ] ExemptFiles = [ re.compile(r"cpp/include/cudf_test/cxxopts.hpp"), - re.compile(r"versioneer[.]py"), ] # this will break starting at year 10000, which is probably OK :) diff --git a/ci/release/apply_wheel_modifications.sh b/ci/release/apply_wheel_modifications.sh new file mode 100755 index 00000000000..9d9758f1f15 --- /dev/null +++ b/ci/release/apply_wheel_modifications.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Usage: bash apply_wheel_modifications.sh + +VERSION=${1} +CUDA_SUFFIX=${2} + +# __init__.py versions +sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/cudf/cudf/__init__.py +sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/dask_cudf/dask_cudf/__init__.py +sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/cudf_kafka/cudf_kafka/__init__.py +sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/custreamz/custreamz/__init__.py + +# pyproject.toml versions +sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/cudf/pyproject.toml +sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/dask_cudf/pyproject.toml +sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/cudf_kafka/pyproject.toml +sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/custreamz/pyproject.toml + +# cudf pyproject.toml cuda suffixes +sed -i "s/^name = \"cudf\"/name = \"cudf${CUDA_SUFFIX}\"/g" python/cudf/pyproject.toml +sed -i "s/rmm/rmm${CUDA_SUFFIX}/g" python/cudf/pyproject.toml +sed -i "s/ptxcompiler/ptxcompiler${CUDA_SUFFIX}/g" python/cudf/pyproject.toml +sed -i "s/cubinlinker/cubinlinker${CUDA_SUFFIX}/g" python/cudf/pyproject.toml + +# dask_cudf pyproject.toml cuda suffixes +sed -i "s/^name = \"dask_cudf\"/name = \"dask_cudf${CUDA_SUFFIX}\"/g" python/dask_cudf/pyproject.toml +# Need to provide the == to avoid modifying the URL +sed -i "s/\"cudf==/\"cudf${CUDA_SUFFIX}==/g" python/dask_cudf/pyproject.toml diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 15d81127450..e5c9ba0569f 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -34,19 +34,27 @@ function sed_runner() { # cpp update sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/CMakeLists.txt -# cpp stream testing update -sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/tests/utilities/identify_stream_usage/CMakeLists.txt - -# Python update +# Python CMakeLists updates sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/cudf/CMakeLists.txt - # cpp libcudf_kafka update sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt # cpp cudf_jni update sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' java/src/main/native/CMakeLists.txt +# Python __init__.py updates +sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cudf/cudf/__init__.py +sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/dask_cudf/dask_cudf/__init__.py +sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cudf_kafka/cudf_kafka/__init__.py +sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/custreamz/custreamz/__init__.py + +# Python pyproject.toml updates +sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cudf/pyproject.toml +sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/dask_cudf/pyproject.toml +sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cudf_kafka/pyproject.toml +sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/custreamz/pyproject.toml + # rapids-cmake version sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake @@ -81,9 +89,9 @@ sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_ # Need to distutils-normalize the original version NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))") -# Wheel builds install intra-RAPIDS dependencies from same release -sed_runner "s/rmm{cuda_suffix}.*\",/rmm{cuda_suffix}==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/cudf/setup.py -sed_runner "s/cudf{cuda_suffix}==.*\",/cudf{cuda_suffix}==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/dask_cudf/setup.py +# Dependency versions in pyproject.toml +sed_runner "s/rmm==.*\",/rmm==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/cudf/pyproject.toml +sed_runner "s/cudf==.*\",/cudf==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/dask_cudf/pyproject.toml for FILE in .github/workflows/*.yaml; do sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index 983a63d4ce9..bd7a82afbea 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -1,35 +1,7 @@ #!/bin/bash # Copyright (c) 2022-2023, NVIDIA CORPORATION. -set -euo pipefail - -. /opt/conda/etc/profile.d/conda.sh - -rapids-logger "Generate C++ testing dependencies" -rapids-dependency-file-generator \ - --output conda \ - --file_key test_cpp \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee env.yaml - -rapids-mamba-retry env create --force -f env.yaml -n test - -# Temporarily allow unbound variables for conda activation. -set +u -conda activate test -set -u - -CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) -RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/ -mkdir -p "${RAPIDS_TESTS_DIR}" - -rapids-print-env - -rapids-mamba-retry install \ - --channel "${CPP_CHANNEL}" \ - libcudf libcudf_kafka libcudf-tests - -rapids-logger "Check GPU usage" -nvidia-smi +source "$(dirname "$0")/test_cpp_common.sh" EXITCODE=0 trap "EXITCODE=1" ERR diff --git a/ci/test_cpp_common.sh b/ci/test_cpp_common.sh new file mode 100644 index 00000000000..c7c095dc4df --- /dev/null +++ b/ci/test_cpp_common.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Copyright (c) 2022-2023, NVIDIA CORPORATION. + +set -euo pipefail + +. /opt/conda/etc/profile.d/conda.sh + +rapids-logger "Generate C++ testing dependencies" +rapids-dependency-file-generator \ + --output conda \ + --file_key test_cpp \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee env.yaml + +rapids-mamba-retry env create --force -f env.yaml -n test + +# Temporarily allow unbound variables for conda activation. +set +u +conda activate test +set -u + +CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) +RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/ +mkdir -p "${RAPIDS_TESTS_DIR}" + +rapids-print-env + +rapids-mamba-retry install \ + --channel "${CPP_CHANNEL}" \ + libcudf libcudf_kafka libcudf-tests + +rapids-logger "Check GPU usage" +nvidia-smi diff --git a/ci/test_cpp_memcheck.sh b/ci/test_cpp_memcheck.sh new file mode 100755 index 00000000000..db9ce143d51 --- /dev/null +++ b/ci/test_cpp_memcheck.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Copyright (c) 2023, NVIDIA CORPORATION. + +source "$(dirname "$0")/test_cpp_common.sh" + +EXITCODE=0 +trap "EXITCODE=1" ERR +set +e + +# Run gtests with compute-sanitizer +rapids-logger "Memcheck gtests with rmm_mode=cuda" +export GTEST_CUDF_RMM_MODE=cuda +COMPUTE_SANITIZER_CMD="compute-sanitizer --tool memcheck" +for gt in "$CONDA_PREFIX"/bin/gtests/libcudf/* ; do + test_name=$(basename ${gt}) + if [[ "$test_name" == "ERROR_TEST" ]] || [[ "$test_name" == "STREAM_IDENTIFICATION_TEST" ]]; then + continue + fi + echo "Running compute-sanitizer on $test_name" + ${COMPUTE_SANITIZER_CMD} ${gt} --gtest_output=xml:"${RAPIDS_TESTS_DIR}${test_name}.xml" +done +unset GTEST_CUDF_RMM_MODE + +rapids-logger "Test script exiting with value: $EXITCODE" +exit ${EXITCODE} diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh index 7f5f35219b0..c9dc99733a9 100755 --- a/ci/test_notebooks.sh +++ b/ci/test_notebooks.sh @@ -34,7 +34,7 @@ pushd notebooks # Add notebooks that should be skipped here # (space-separated list of filenames without paths) -SKIPNBS="" +SKIPNBS="performance_comparisons.ipynb" EXITCODE=0 trap "EXITCODE=1" ERR diff --git a/ci/test_python_other.sh b/ci/test_python_other.sh index 25ed615df84..ab36fbbb5ff 100755 --- a/ci/test_python_other.sh +++ b/ci/test_python_other.sh @@ -17,31 +17,31 @@ trap "EXITCODE=1" ERR set +e rapids-logger "pytest dask_cudf" -pushd python/dask_cudf +pushd python/dask_cudf/dask_cudf pytest \ --cache-clear \ --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf.xml" \ --numprocesses=8 \ --dist=loadscope \ - --cov-config=.coveragerc \ + --cov-config=../.coveragerc \ --cov=dask_cudf \ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cudf-coverage.xml" \ --cov-report=term \ - dask_cudf + tests popd rapids-logger "pytest custreamz" -pushd python/custreamz +pushd python/custreamz/custreamz pytest \ --cache-clear \ --junitxml="${RAPIDS_TESTS_DIR}/junit-custreamz.xml" \ --numprocesses=8 \ --dist=loadscope \ - --cov-config=.coveragerc \ + --cov-config=../.coveragerc \ --cov=custreamz \ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/custreamz-coverage.xml" \ --cov-report=term \ - custreamz + tests popd rapids-logger "Test script exiting with value: $EXITCODE" diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 675df3891c3..67e2dc4720e 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -16,6 +16,7 @@ dependencies: - cmake>=3.23.1,!=3.25.0 - cubinlinker - cuda-python>=11.7.1,<12.0 +- cuda-sanitizer-api=11.8.86 - cudatoolkit=11.8 - cupy>=9.5.0,<12.0.0a0 - cxx-compiler @@ -27,11 +28,12 @@ dependencies: - doxygen=1.8.20 - fastavro>=0.22.9 - fsspec>=0.6.0 -- gcc_linux-64=9.* +- gcc_linux-64=11.* - hypothesis - ipython - libarrow=10 - librdkafka=1.7.0 +- librmm=23.04.* - mimesis>=4.1.0 - moto>=4.0.8 - myst-nb @@ -48,7 +50,7 @@ dependencies: - pandoc<=2.0.0 - pip - pre-commit -- protobuf=4.21 +- protobuf>=4.21.6,<4.22 - ptxcompiler - pyarrow=10 - pydata-sphinx-theme diff --git a/conda/recipes/cudf/conda_build_config.yaml b/conda/recipes/cudf/conda_build_config.yaml index 4feac647e8c..7494fec79a0 100644 --- a/conda/recipes/cudf/conda_build_config.yaml +++ b/conda/recipes/cudf/conda_build_config.yaml @@ -1,8 +1,8 @@ c_compiler_version: - - 9 + - 11 cxx_compiler_version: - - 9 + - 11 sysroot_version: - "2.17" diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 27073eb323b..e0f33ad40c7 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -18,18 +18,20 @@ build: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} script_env: - - PARALLEL_LEVEL - - CMAKE_GENERATOR + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN - CMAKE_C_COMPILER_LAUNCHER - - CMAKE_CXX_COMPILER_LAUNCHER - CMAKE_CUDA_COMPILER_LAUNCHER - - SCCACHE_S3_KEY_PREFIX=cudf-aarch64 # [aarch64] - - SCCACHE_S3_KEY_PREFIX=cudf-linux64 # [linux64] + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_GENERATOR + - PARALLEL_LEVEL - SCCACHE_BUCKET - - SCCACHE_REGION - SCCACHE_IDLE_TIMEOUT - - AWS_ACCESS_KEY_ID - - AWS_SECRET_ACCESS_KEY + - SCCACHE_REGION + - SCCACHE_S3_KEY_PREFIX=cudf-aarch64 # [aarch64] + - SCCACHE_S3_KEY_PREFIX=cudf-linux64 # [linux64] + - SCCACHE_S3_USE_SSL ignore_run_exports: # libcudf's run_exports pinning is looser than we would like - libcudf @@ -45,7 +47,7 @@ requirements: - ninja - sysroot_{{ target_platform }} {{ sysroot_version }} host: - - protobuf =4.21 + - protobuf >=4.21.6,<4.22 - python - cython >=0.29,<0.30 - scikit-build >=0.13.1 @@ -57,7 +59,7 @@ requirements: - rmm ={{ minor_version }} - cudatoolkit ={{ cuda_version }} run: - - protobuf =4.21 + - protobuf >=4.21.6,<4.22 - python - typing_extensions - pandas >=1.0,<1.6.0dev0 diff --git a/conda/recipes/cudf_kafka/conda_build_config.yaml b/conda/recipes/cudf_kafka/conda_build_config.yaml index c049d21fd91..ccc49851a8e 100644 --- a/conda/recipes/cudf_kafka/conda_build_config.yaml +++ b/conda/recipes/cudf_kafka/conda_build_config.yaml @@ -1,8 +1,8 @@ c_compiler_version: - - 9 + - 11 cxx_compiler_version: - - 9 + - 11 sysroot_version: - "2.17" diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 5cbea78e82b..4ae47ef1c10 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -17,18 +17,20 @@ build: number: {{ GIT_DESCRIBE_NUMBER }} string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} script_env: - - PARALLEL_LEVEL - - CMAKE_GENERATOR + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN - CMAKE_C_COMPILER_LAUNCHER - - CMAKE_CXX_COMPILER_LAUNCHER - CMAKE_CUDA_COMPILER_LAUNCHER - - SCCACHE_S3_KEY_PREFIX=cudf-kafka-aarch64 # [aarch64] - - SCCACHE_S3_KEY_PREFIX=cudf-kafka-linux64 # [linux64] + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_GENERATOR + - PARALLEL_LEVEL - SCCACHE_BUCKET - - SCCACHE_REGION - SCCACHE_IDLE_TIMEOUT - - AWS_ACCESS_KEY_ID - - AWS_SECRET_ACCESS_KEY + - SCCACHE_REGION + - SCCACHE_S3_KEY_PREFIX=cudf-kafka-aarch64 # [aarch64] + - SCCACHE_S3_KEY_PREFIX=cudf-kafka-linux64 # [linux64] + - SCCACHE_S3_USE_SSL requirements: build: diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index af5705341e6..5fafa7464db 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -17,18 +17,20 @@ build: number: {{ GIT_DESCRIBE_NUMBER }} string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} script_env: - - PARALLEL_LEVEL - - CMAKE_GENERATOR + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN - CMAKE_C_COMPILER_LAUNCHER - - CMAKE_CXX_COMPILER_LAUNCHER - CMAKE_CUDA_COMPILER_LAUNCHER - - SCCACHE_S3_KEY_PREFIX=custreamz-aarch64 # [aarch64] - - SCCACHE_S3_KEY_PREFIX=custreamz-linux64 # [linux64] + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_GENERATOR + - PARALLEL_LEVEL - SCCACHE_BUCKET - - SCCACHE_REGION - SCCACHE_IDLE_TIMEOUT - - AWS_ACCESS_KEY_ID - - AWS_SECRET_ACCESS_KEY + - SCCACHE_REGION + - SCCACHE_S3_KEY_PREFIX=custreamz-aarch64 # [aarch64] + - SCCACHE_S3_KEY_PREFIX=custreamz-linux64 # [linux64] + - SCCACHE_S3_USE_SSL requirements: host: diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml index 3ee3d4d3952..79f1f09858a 100644 --- a/conda/recipes/dask-cudf/meta.yaml +++ b/conda/recipes/dask-cudf/meta.yaml @@ -18,18 +18,20 @@ build: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} script_env: - - PARALLEL_LEVEL - - CMAKE_GENERATOR + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN - CMAKE_C_COMPILER_LAUNCHER - - CMAKE_CXX_COMPILER_LAUNCHER - CMAKE_CUDA_COMPILER_LAUNCHER - - SCCACHE_S3_KEY_PREFIX=dask-cudf-aarch64 # [aarch64] - - SCCACHE_S3_KEY_PREFIX=dask-cudf-linux64 # [linux64] + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_GENERATOR + - PARALLEL_LEVEL - SCCACHE_BUCKET - - SCCACHE_REGION - SCCACHE_IDLE_TIMEOUT - - AWS_ACCESS_KEY_ID - - AWS_SECRET_ACCESS_KEY + - SCCACHE_REGION + - SCCACHE_S3_KEY_PREFIX=dask-cudf-aarch64 # [aarch64] + - SCCACHE_S3_KEY_PREFIX=dask-cudf-linux64 # [linux64] + - SCCACHE_S3_USE_SSL requirements: host: diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml index ff8d9026aef..1111fc0a24e 100644 --- a/conda/recipes/libcudf/conda_build_config.yaml +++ b/conda/recipes/libcudf/conda_build_config.yaml @@ -1,8 +1,8 @@ c_compiler_version: - - 9 + - 11 cxx_compiler_version: - - 9 + - 11 cuda_compiler: - nvcc diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index fbfcf6e71a2..309868b8144 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -15,19 +15,21 @@ source: build: script_env: - - PARALLEL_LEVEL - - CMAKE_GENERATOR + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN - CMAKE_C_COMPILER_LAUNCHER - - CMAKE_CXX_COMPILER_LAUNCHER - CMAKE_CUDA_COMPILER_LAUNCHER - - SCCACHE_S3_KEY_PREFIX=libcudf-aarch64 # [aarch64] - - SCCACHE_S3_KEY_PREFIX=libcudf-linux64 # [linux64] + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_GENERATOR + - PARALLEL_LEVEL + - RAPIDS_ARTIFACTS_DIR - SCCACHE_BUCKET - - SCCACHE_REGION - SCCACHE_IDLE_TIMEOUT - - AWS_ACCESS_KEY_ID - - AWS_SECRET_ACCESS_KEY - - RAPIDS_ARTIFACTS_DIR + - SCCACHE_REGION + - SCCACHE_S3_KEY_PREFIX=libcudf-aarch64 # [aarch64] + - SCCACHE_S3_KEY_PREFIX=libcudf-linux64 # [linux64] + - SCCACHE_S3_USE_SSL requirements: build: diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d402a47628c..96524b7c55f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -455,6 +455,7 @@ add_library( src/reductions/scan/scan_inclusive.cu src/reductions/segmented/all.cu src/reductions/segmented/any.cu + src/reductions/segmented/counts.cu src/reductions/segmented/max.cu src/reductions/segmented/mean.cu src/reductions/segmented/min.cu diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index c5ae3345da5..cc0b642a337 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -252,7 +252,7 @@ ConfigureNVBench(ORC_WRITER_NVBENCH io/orc/orc_writer.cpp io/orc/orc_writer_chun # ################################################################################################## # * csv writer benchmark -------------------------------------------------------------------------- -ConfigureBench(CSV_WRITER_BENCH io/csv/csv_writer.cpp) +ConfigureNVBench(CSV_WRITER_NVBENCH io/csv/csv_writer.cpp) # ################################################################################################## # * ast benchmark --------------------------------------------------------------------------------- @@ -295,13 +295,14 @@ ConfigureBench( string/url_decode.cu ) -ConfigureNVBench(STRINGS_NVBENCH string/like.cpp string/reverse.cpp) +ConfigureNVBench(STRINGS_NVBENCH string/like.cpp string/reverse.cpp string/lengths.cpp) # ################################################################################################## # * json benchmark ------------------------------------------------------------------- ConfigureBench(JSON_BENCH string/json.cu) ConfigureNVBench(FST_NVBENCH io/fst.cu) -ConfigureNVBench(NESTED_JSON_NVBENCH io/json/nested_json.cpp) +ConfigureNVBench(JSON_READER_NVBENCH io/json/nested_json.cpp io/json/json_reader_input.cpp) +ConfigureNVBench(JSON_WRITER_NVBENCH io/json/json_writer.cpp) # ################################################################################################## # * io benchmark --------------------------------------------------------------------- diff --git a/cpp/benchmarks/binaryop/compiled_binaryop.cpp b/cpp/benchmarks/binaryop/compiled_binaryop.cpp index 4c3bf360256..d32664cde4a 100644 --- a/cpp/benchmarks/binaryop/compiled_binaryop.cpp +++ b/cpp/benchmarks/binaryop/compiled_binaryop.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -66,8 +66,14 @@ void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop) #define BINARYOP_BENCHMARK_DEFINE(lhs, rhs, bop, tout) \ BM_BINARYOP_BENCHMARK_DEFINE(build_name(bop, lhs, rhs, tout), lhs, rhs, bop, tout) -using namespace cudf; -using namespace numeric; +using cudf::duration_D; +using cudf::duration_ms; +using cudf::duration_ns; +using cudf::duration_s; +using cudf::timestamp_D; +using cudf::timestamp_ms; +using cudf::timestamp_s; +using numeric::decimal32; // clang-format off BINARYOP_BENCHMARK_DEFINE(float, int64_t, ADD, int32_t); diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu index dee7e2b8586..2829d14070c 100644 --- a/cpp/benchmarks/common/generate_input.cu +++ b/cpp/benchmarks/common/generate_input.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -501,7 +501,7 @@ std::unique_ptr create_random_utf8_string_column(data_profile cons rmm::device_uvector offsets(num_rows + 1, cudf::get_default_stream()); thrust::exclusive_scan( thrust::device, valid_lengths, valid_lengths + lengths.size(), offsets.begin()); - // offfsets are ready. + // offsets are ready. auto chars_length = *thrust::device_pointer_cast(offsets.end() - 1); rmm::device_uvector chars(chars_length, cudf::get_default_stream()); thrust::for_each_n(thrust::device, diff --git a/cpp/benchmarks/common/generate_input.hpp b/cpp/benchmarks/common/generate_input.hpp index f8ea194f0c4..e65aa69763b 100644 --- a/cpp/benchmarks/common/generate_input.hpp +++ b/cpp/benchmarks/common/generate_input.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -373,13 +373,13 @@ class data_profile { void set_bool_probability_true(double p) { - CUDF_EXPECTS(p >= 0. and p <= 1., "probablity must be in range [0...1]"); + CUDF_EXPECTS(p >= 0. and p <= 1., "probability must be in range [0...1]"); bool_probability_true = p; } void set_null_probability(std::optional p) { CUDF_EXPECTS(p.value_or(0.) >= 0. and p.value_or(0.) <= 1., - "probablity must be in range [0...1]"); + "probability must be in range [0...1]"); null_probability = p; } void set_cardinality(cudf::size_type c) { cardinality = c; } diff --git a/cpp/benchmarks/groupby/group_rank.cpp b/cpp/benchmarks/groupby/group_rank.cpp index 2a70b95890b..6aac3826e55 100644 --- a/cpp/benchmarks/groupby/group_rank.cpp +++ b/cpp/benchmarks/groupby/group_rank.cpp @@ -28,8 +28,7 @@ template static void nvbench_groupby_rank(nvbench::state& state, nvbench::type_list>) { - using namespace cudf; - constexpr auto dtype = type_to_id(); + constexpr auto dtype = cudf::type_to_id(); bool const is_sorted = state.get_int64("is_sorted"); cudf::size_type const column_size = state.get_int64("data_size"); @@ -43,16 +42,17 @@ static void nvbench_groupby_rank(nvbench::state& state, // values to be pre-sorted too for groupby rank if (is_sorted) source_table = cudf::sort(*source_table); - table_view keys{{source_table->view().column(0)}}; - column_view order_by{source_table->view().column(1)}; + cudf::table_view keys{{source_table->view().column(0)}}; + cudf::column_view order_by{source_table->view().column(1)}; - auto agg = cudf::make_rank_aggregation(method); - std::vector requests; - requests.emplace_back(groupby::scan_request()); + auto agg = cudf::make_rank_aggregation(method); + std::vector requests; + requests.emplace_back(cudf::groupby::scan_request()); requests[0].values = order_by; requests[0].aggregations.push_back(std::move(agg)); - groupby::groupby gb_obj(keys, null_policy::EXCLUDE, is_sorted ? sorted::YES : sorted::NO); + cudf::groupby::groupby gb_obj( + keys, cudf::null_policy::EXCLUDE, is_sorted ? cudf::sorted::YES : cudf::sorted::NO); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { rmm::cuda_stream_view stream_view{launch.get_stream()}; diff --git a/cpp/benchmarks/io/csv/csv_writer.cpp b/cpp/benchmarks/io/csv/csv_writer.cpp index 54a86094eb7..1ca6b5b2a9b 100644 --- a/cpp/benchmarks/io/csv/csv_writer.cpp +++ b/cpp/benchmarks/io/csv/csv_writer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,91 +17,112 @@ #include #include #include -#include +#include #include +#include + // Size of the data in the the benchmark dataframe; chosen to be low enough to allow benchmarks to // run on most GPUs, but large enough to allow highest throughput constexpr size_t data_size = 256 << 20; constexpr cudf::size_type num_cols = 64; -class CsvWrite : public cudf::benchmark { -}; - -void BM_csv_write_varying_inout(benchmark::State& state) +template +void BM_csv_write_dtype_io(nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) { - auto const data_types = get_type_or_group(state.range(0)); - auto const sink_type = static_cast(state.range(1)); + auto const data_types = get_type_or_group(static_cast(DataType)); + auto const sink_type = IO; auto const tbl = create_random_table(cycle_dtypes(data_types, num_cols), table_size_bytes{data_size}); auto const view = tbl->view(); - cuio_source_sink_pair source_sink(sink_type); - auto mem_stats_logger = cudf::memory_stats_logger(); - for (auto _ : state) { - cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 - cudf::io::csv_writer_options options = - cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view); - cudf::io::write_csv(options); - } - - state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); - state.counters["encoded_file_size"] = source_sink.size(); + std::size_t encoded_file_size = 0; + + auto const mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync, + [&](nvbench::launch& launch, auto& timer) { + cuio_source_sink_pair source_sink(sink_type); + + timer.start(); + cudf::io::csv_writer_options options = + cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view); + cudf::io::write_csv(options); + timer.stop(); + + encoded_file_size = source_sink.size(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size"); } -void BM_csv_write_varying_options(benchmark::State& state) +void BM_csv_write_varying_options(nvbench::state& state) { - auto const na_per_len = state.range(0); - auto const rows_per_chunk = 1 << state.range(1); + auto const na_per_len = state.get_int64("na_per_len"); + auto const rows_per_chunk = state.get_int64("rows_per_chunk"); - auto const data_types = get_type_or_group({int32_t(type_group_id::INTEGRAL), - int32_t(type_group_id::FLOATING_POINT), - int32_t(type_group_id::FIXED_POINT), - int32_t(type_group_id::TIMESTAMP), - int32_t(type_group_id::DURATION), - int32_t(cudf::type_id::STRING)}); + auto const data_types = get_type_or_group({static_cast(data_type::INTEGRAL), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::DURATION), + static_cast(data_type::STRING)}); auto const tbl = create_random_table(data_types, table_size_bytes{data_size}); auto const view = tbl->view(); std::string const na_per(na_per_len, '#'); - cuio_source_sink_pair source_sink(io_type::HOST_BUFFER); - auto mem_stats_logger = cudf::memory_stats_logger(); - for (auto _ : state) { - cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 - cudf::io::csv_writer_options options = - cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view) - .na_rep(na_per) - .rows_per_chunk(rows_per_chunk); - cudf::io::write_csv(options); - } - - state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); - state.counters["encoded_file_size"] = source_sink.size(); + std::size_t encoded_file_size = 0; + + auto const mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync, + [&](nvbench::launch& launch, auto& timer) { + cuio_source_sink_pair source_sink(io_type::HOST_BUFFER); + + timer.start(); + cudf::io::csv_writer_options options = + cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view) + .na_rep(na_per) + .rows_per_chunk(rows_per_chunk); + cudf::io::write_csv(options); + timer.stop(); + + encoded_file_size = source_sink.size(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size"); } -#define CSV_WR_BM_INOUTS_DEFINE(name, type_or_group, sink_type) \ - BENCHMARK_DEFINE_F(CsvWrite, name) \ - (::benchmark::State & state) { BM_csv_write_varying_inout(state); } \ - BENCHMARK_REGISTER_F(CsvWrite, name) \ - ->Args({int32_t(type_or_group), sink_type}) \ - ->Unit(benchmark::kMillisecond) \ - ->UseManualTime(); - -WR_BENCHMARK_DEFINE_ALL_SINKS(CSV_WR_BM_INOUTS_DEFINE, integral, type_group_id::INTEGRAL); -WR_BENCHMARK_DEFINE_ALL_SINKS(CSV_WR_BM_INOUTS_DEFINE, floats, type_group_id::FLOATING_POINT); -WR_BENCHMARK_DEFINE_ALL_SINKS(CSV_WR_BM_INOUTS_DEFINE, decimal, type_group_id::FIXED_POINT); -WR_BENCHMARK_DEFINE_ALL_SINKS(CSV_WR_BM_INOUTS_DEFINE, timestamps, type_group_id::TIMESTAMP); -WR_BENCHMARK_DEFINE_ALL_SINKS(CSV_WR_BM_INOUTS_DEFINE, durations, type_group_id::DURATION); -WR_BENCHMARK_DEFINE_ALL_SINKS(CSV_WR_BM_INOUTS_DEFINE, string, cudf::type_id::STRING); - -BENCHMARK_DEFINE_F(CsvWrite, writer_options) -(::benchmark::State& state) { BM_csv_write_varying_options(state); } -BENCHMARK_REGISTER_F(CsvWrite, writer_options) - ->ArgsProduct({{0, 16}, {8, 10, 12, 14, 16, 18, 20}}) - ->Unit(benchmark::kMillisecond) - ->UseManualTime(); +using d_type_list = nvbench::enum_type_list; + +using io_list = nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(BM_csv_write_dtype_io, NVBENCH_TYPE_AXES(d_type_list, io_list)) + .set_name("csv_write_dtype_io") + .set_type_axes_names({"data_type", "io"}) + .set_min_samples(4); + +NVBENCH_BENCH(BM_csv_write_varying_options) + .set_name("csv_write_options") + .set_min_samples(4) + .add_int64_axis("na_per_len", {0, 16}) + .add_int64_power_of_two_axis("rows_per_chunk", nvbench::range(8, 20, 2)); diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp index eec165098ae..34adae30505 100644 --- a/cpp/benchmarks/io/cuio_common.hpp +++ b/cpp/benchmarks/io/cuio_common.hpp @@ -26,11 +26,6 @@ using cudf::io::io_type; -#define WR_BENCHMARK_DEFINE_ALL_SINKS(benchmark, name, type_or_group) \ - benchmark(name##_file_output, type_or_group, static_cast(io_type::FILEPATH)); \ - benchmark(name##_buffer_output, type_or_group, static_cast(io_type::HOST_BUFFER)); \ - benchmark(name##_void_output, type_or_group, static_cast(io_type::VOID)); - std::string random_file_in_dir(std::string const& dir_path); /** diff --git a/cpp/benchmarks/io/fst.cu b/cpp/benchmarks/io/fst.cu index 7acf69e9d8e..7fb505f1d34 100644 --- a/cpp/benchmarks/io/fst.cu +++ b/cpp/benchmarks/io/fst.cu @@ -37,11 +37,10 @@ #include -namespace cudf { namespace { auto make_test_json_data(nvbench::state& state) { - auto const string_size{size_type(state.get_int64("string_size"))}; + auto const string_size{cudf::size_type(state.get_int64("string_size"))}; // Test input std::string input = R"( {)" @@ -59,13 +58,12 @@ auto make_test_json_data(nvbench::state& state) R"("price": 8.95)" R"(} {} [] [ ])"; - auto d_input_scalar = cudf::make_string_scalar(input); - auto& d_string_scalar = static_cast(*d_input_scalar); - const size_type repeat_times = string_size / input.size(); + auto d_input_scalar = cudf::make_string_scalar(input); + auto& d_string_scalar = static_cast(*d_input_scalar); + const cudf::size_type repeat_times = string_size / input.size(); return cudf::strings::repeat_string(d_string_scalar, repeat_times); } -using namespace cudf::test::io::json; // Type used to represent the atomic symbol type used within the finite-state machine using SymbolT = char; // Type sufficiently large to index symbols within the input and output (may be unsigned) @@ -78,9 +76,9 @@ constexpr std::size_t single_item = 1; void BM_FST_JSON(nvbench::state& state) { - CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits::max(), + CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits::max(), "Benchmarks only support up to size_type's maximum number of items"); - auto const string_size{size_type(state.get_int64("string_size"))}; + auto const string_size{cudf::size_type(state.get_int64("string_size"))}; // Prepare cuda stream for data transfers & kernels rmm::cuda_stream stream{}; rmm::cuda_stream_view stream_view(stream); @@ -113,9 +111,9 @@ void BM_FST_JSON(nvbench::state& state) void BM_FST_JSON_no_outidx(nvbench::state& state) { - CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits::max(), + CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits::max(), "Benchmarks only support up to size_type's maximum number of items"); - auto const string_size{size_type(state.get_int64("string_size"))}; + auto const string_size{cudf::size_type(state.get_int64("string_size"))}; // Prepare cuda stream for data transfers & kernels rmm::cuda_stream stream{}; rmm::cuda_stream_view stream_view(stream); @@ -148,9 +146,9 @@ void BM_FST_JSON_no_outidx(nvbench::state& state) void BM_FST_JSON_no_out(nvbench::state& state) { - CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits::max(), + CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits::max(), "Benchmarks only support up to size_type's maximum number of items"); - auto const string_size{size_type(state.get_int64("string_size"))}; + auto const string_size{cudf::size_type(state.get_int64("string_size"))}; // Prepare cuda stream for data transfers & kernels rmm::cuda_stream stream{}; rmm::cuda_stream_view stream_view(stream); @@ -181,9 +179,9 @@ void BM_FST_JSON_no_out(nvbench::state& state) void BM_FST_JSON_no_str(nvbench::state& state) { - CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits::max(), + CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits::max(), "Benchmarks only support up to size_type's maximum number of items"); - auto const string_size{size_type(state.get_int64("string_size"))}; + auto const string_size{cudf::size_type(state.get_int64("string_size"))}; // Prepare cuda stream for data transfers & kernels rmm::cuda_stream stream{}; rmm::cuda_stream_view stream_view(stream); @@ -228,5 +226,3 @@ NVBENCH_BENCH(BM_FST_JSON_no_out) NVBENCH_BENCH(BM_FST_JSON_no_str) .set_name("FST_JSON_no_str") .add_int64_power_of_two_axis("string_size", nvbench::range(20, 30, 1)); - -} // namespace cudf diff --git a/cpp/benchmarks/io/json/json_reader_input.cpp b/cpp/benchmarks/io/json/json_reader_input.cpp new file mode 100644 index 00000000000..55614d040d5 --- /dev/null +++ b/cpp/benchmarks/io/json/json_reader_input.cpp @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +#include + +// Size of the data in the the benchmark dataframe; chosen to be low enough to allow benchmarks to +// run on most GPUs, but large enough to allow highest throughput +constexpr size_t data_size = 512 << 20; +constexpr cudf::size_type num_cols = 64; + +void json_read_common(cudf::io::json_writer_options const& write_opts, + cuio_source_sink_pair& source_sink, + nvbench::state& state) +{ + cudf::io::write_json(write_opts); + + cudf::io::json_reader_options read_opts = + cudf::io::json_reader_options::builder(source_sink.make_source_info()); + + auto mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, + [&](nvbench::launch& launch, auto& timer) { + try_drop_l3_cache(); + + timer.start(); + cudf::io::read_json(read_opts); + timer.stop(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); +} + +template +void BM_json_read_io(nvbench::state& state, nvbench::type_list>) +{ + auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::DURATION), + static_cast(data_type::STRING), + static_cast(data_type::LIST), + static_cast(data_type::STRUCT)}); + + auto const source_type = IO; + + auto const tbl = create_random_table( + cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder()); + auto const view = tbl->view(); + + cuio_source_sink_pair source_sink(source_type); + cudf::io::json_writer_options const write_opts = + cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view).na_rep("null"); + + json_read_common(write_opts, source_sink, state); +} + +template +void BM_json_read_data_type( + nvbench::state& state, nvbench::type_list, nvbench::enum_type>) +{ + auto const d_type = get_type_or_group(static_cast(DataType)); + auto const source_type = IO; + + auto const tbl = create_random_table( + cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder()); + auto const view = tbl->view(); + + cuio_source_sink_pair source_sink(source_type); + cudf::io::json_writer_options const write_opts = + cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view).na_rep("null"); + + json_read_common(write_opts, source_sink, state); +} + +using d_type_list = nvbench::enum_type_list; + +using io_list = nvbench::enum_type_list; + +using compression_list = + nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(BM_json_read_data_type, + NVBENCH_TYPE_AXES(d_type_list, + nvbench::enum_type_list)) + .set_name("json_read_data_type") + .set_type_axes_names({"data_type", "io"}) + .set_min_samples(4); + +NVBENCH_BENCH_TYPES(BM_json_read_io, NVBENCH_TYPE_AXES(io_list)) + .set_name("json_read_io") + .set_type_axes_names({"io"}) + .set_min_samples(4); diff --git a/cpp/benchmarks/io/json/json_writer.cpp b/cpp/benchmarks/io/json/json_writer.cpp new file mode 100644 index 00000000000..ee183b327fe --- /dev/null +++ b/cpp/benchmarks/io/json/json_writer.cpp @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +#include + +// Size of the data in the the benchmark dataframe; chosen to be low enough to allow benchmarks to +// run on most GPUs, but large enough to allow highest throughput +constexpr size_t data_size = 512 << 20; +constexpr cudf::size_type num_cols = 64; + +void json_write_common(cudf::io::json_writer_options const& write_opts, + cuio_source_sink_pair& source_sink, + size_t const data_size, + nvbench::state& state) +{ + auto mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, + [&](nvbench::launch& launch, auto& timer) { + try_drop_l3_cache(); + + timer.start(); + cudf::io::write_json(write_opts); + timer.stop(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); +} + +template +void BM_json_write_io(nvbench::state& state, nvbench::type_list>) +{ + auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::DURATION), + static_cast(data_type::STRING), + static_cast(data_type::LIST), + static_cast(data_type::STRUCT)}); + + auto const source_type = IO; + + auto const tbl = create_random_table( + cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder()); + auto const view = tbl->view(); + + cuio_source_sink_pair source_sink(source_type); + cudf::io::json_writer_options write_opts = + cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view).na_rep("null"); + + json_write_common(write_opts, source_sink, data_size, state); +} + +void BM_json_writer_options(nvbench::state& state) +{ + auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::DURATION), + static_cast(data_type::STRING), + static_cast(data_type::LIST), + static_cast(data_type::STRUCT)}); + + auto const source_type = io_type::HOST_BUFFER; + bool const json_lines = state.get_int64("json_lines"); + bool const include_nulls = state.get_int64("include_nulls"); + auto const rows_per_chunk = state.get_int64("rows_per_chunk"); + + auto const tbl = create_random_table( + cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder()); + auto const view = tbl->view(); + + cuio_source_sink_pair source_sink(source_type); + cudf::io::json_writer_options write_opts = + cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view) + .na_rep("null") + .lines(json_lines) + .include_nulls(include_nulls) + .rows_per_chunk(rows_per_chunk); + + json_write_common(write_opts, source_sink, data_size, state); +} + +using io_list = nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(BM_json_write_io, NVBENCH_TYPE_AXES(io_list)) + .set_name("json_write_io") + .set_type_axes_names({"io"}) + .set_min_samples(4); + +NVBENCH_BENCH(BM_json_writer_options) + .set_name("json_write_options") + .set_min_samples(4) + .add_int64_axis("json_lines", {false, true}) + .add_int64_axis("include_nulls", {false, true}) + .add_int64_power_of_two_axis("rows_per_chunk", nvbench::range(10, 20, 2)); diff --git a/cpp/benchmarks/lists/copying/scatter_lists.cu b/cpp/benchmarks/lists/copying/scatter_lists.cu index 02ad97fee11..c913a093edd 100644 --- a/cpp/benchmarks/lists/copying/scatter_lists.cu +++ b/cpp/benchmarks/lists/copying/scatter_lists.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,8 +32,6 @@ #include -namespace cudf { - class ScatterLists : public cudf::benchmark { }; @@ -43,14 +41,20 @@ void BM_lists_scatter(::benchmark::State& state) auto stream = cudf::get_default_stream(); auto mr = rmm::mr::get_current_device_resource(); - const size_type base_size{(size_type)state.range(0)}; - const size_type num_elements_per_row{(size_type)state.range(1)}; - const auto num_rows = (size_type)ceil(double(base_size) / num_elements_per_row); - - auto source_base_col = make_fixed_width_column( - data_type{type_to_id()}, base_size, mask_state::UNALLOCATED, stream, mr); - auto target_base_col = make_fixed_width_column( - data_type{type_to_id()}, base_size, mask_state::UNALLOCATED, stream, mr); + const cudf::size_type base_size{(cudf::size_type)state.range(0)}; + const cudf::size_type num_elements_per_row{(cudf::size_type)state.range(1)}; + const auto num_rows = (cudf::size_type)ceil(double(base_size) / num_elements_per_row); + + auto source_base_col = make_fixed_width_column(cudf::data_type{cudf::type_to_id()}, + base_size, + cudf::mask_state::UNALLOCATED, + stream, + mr); + auto target_base_col = make_fixed_width_column(cudf::data_type{cudf::type_to_id()}, + base_size, + cudf::mask_state::UNALLOCATED, + stream, + mr); thrust::sequence(rmm::exec_policy(stream), source_base_col->mutable_view().begin(), source_base_col->mutable_view().end()); @@ -58,19 +62,27 @@ void BM_lists_scatter(::benchmark::State& state) target_base_col->mutable_view().begin(), target_base_col->mutable_view().end()); - auto source_offsets = make_fixed_width_column( - data_type{type_to_id()}, num_rows + 1, mask_state::UNALLOCATED, stream, mr); - auto target_offsets = make_fixed_width_column( - data_type{type_to_id()}, num_rows + 1, mask_state::UNALLOCATED, stream, mr); + auto source_offsets = + make_fixed_width_column(cudf::data_type{cudf::type_to_id()}, + num_rows + 1, + cudf::mask_state::UNALLOCATED, + stream, + mr); + auto target_offsets = + make_fixed_width_column(cudf::data_type{cudf::type_to_id()}, + num_rows + 1, + cudf::mask_state::UNALLOCATED, + stream, + mr); thrust::sequence(rmm::exec_policy(stream), - source_offsets->mutable_view().begin(), - source_offsets->mutable_view().end(), + source_offsets->mutable_view().begin(), + source_offsets->mutable_view().end(), 0, num_elements_per_row); thrust::sequence(rmm::exec_policy(stream), - target_offsets->mutable_view().begin(), - target_offsets->mutable_view().end(), + target_offsets->mutable_view().begin(), + target_offsets->mutable_view().end(), 0, num_elements_per_row); @@ -78,37 +90,40 @@ void BM_lists_scatter(::benchmark::State& state) std::move(source_offsets), std::move(source_base_col), 0, - cudf::create_null_mask(num_rows, mask_state::UNALLOCATED), + cudf::create_null_mask(num_rows, cudf::mask_state::UNALLOCATED), stream, mr); auto target = make_lists_column(num_rows, std::move(target_offsets), std::move(target_base_col), 0, - cudf::create_null_mask(num_rows, mask_state::UNALLOCATED), + cudf::create_null_mask(num_rows, cudf::mask_state::UNALLOCATED), stream, mr); - auto scatter_map = make_fixed_width_column( - data_type{type_to_id()}, num_rows, mask_state::UNALLOCATED, stream, mr); + auto scatter_map = make_fixed_width_column(cudf::data_type{cudf::type_to_id()}, + num_rows, + cudf::mask_state::UNALLOCATED, + stream, + mr); auto m_scatter_map = scatter_map->mutable_view(); thrust::sequence(rmm::exec_policy(stream), - m_scatter_map.begin(), - m_scatter_map.end(), + m_scatter_map.begin(), + m_scatter_map.end(), num_rows - 1, -1); if (not coalesce) { thrust::default_random_engine g; thrust::shuffle(rmm::exec_policy(stream), - m_scatter_map.begin(), - m_scatter_map.begin(), + m_scatter_map.begin(), + m_scatter_map.begin(), g); } for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 - scatter(table_view{{*source}}, *scatter_map, table_view{{*target}}, mr); + scatter(cudf::table_view{{*source}}, *scatter_map, cudf::table_view{{*target}}, mr); } state.SetBytesProcessed(static_cast(state.iterations()) * state.range(0) * 2 * @@ -127,5 +142,3 @@ void BM_lists_scatter(::benchmark::State& state) SBM_BENCHMARK_DEFINE(double_type_colesce_o, double, true); SBM_BENCHMARK_DEFINE(double_type_colesce_x, double, false); - -} // namespace cudf diff --git a/cpp/benchmarks/sort/nested_types_common.hpp b/cpp/benchmarks/sort/nested_types_common.hpp index c4851823534..fabef3a7a51 100644 --- a/cpp/benchmarks/sort/nested_types_common.hpp +++ b/cpp/benchmarks/sort/nested_types_common.hpp @@ -21,7 +21,10 @@ #include +// This error appears in GCC 11.3 and may be a compiler bug or nvbench bug. +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #include +#pragma GCC diagnostic pop #include diff --git a/cpp/benchmarks/string/contains.cpp b/cpp/benchmarks/string/contains.cpp index f7f394ea048..a04915d1df8 100644 --- a/cpp/benchmarks/string/contains.cpp +++ b/cpp/benchmarks/string/contains.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -83,18 +84,19 @@ static void BM_contains(benchmark::State& state, contains_type ct) auto input = cudf::strings_column_view(col->view()); auto pattern = patterns[pattern_index]; + auto program = cudf::strings::regex_program::create(pattern); for (auto _ : state) { cuda_event_timer raii(state, true, cudf::get_default_stream()); switch (ct) { case contains_type::contains: // contains_re and matches_re use the same main logic - cudf::strings::contains_re(input, pattern); + cudf::strings::contains_re(input, *program); break; case contains_type::count: // counts occurrences of matches - cudf::strings::count_re(input, pattern); + cudf::strings::count_re(input, *program); break; case contains_type::findall: // returns occurrences of all matches - cudf::strings::findall(input, pattern); + cudf::strings::findall(input, *program); break; } } diff --git a/cpp/benchmarks/string/extract.cpp b/cpp/benchmarks/string/extract.cpp index 4e9ac2f5395..4760956e049 100644 --- a/cpp/benchmarks/string/extract.cpp +++ b/cpp/benchmarks/string/extract.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -59,10 +60,11 @@ static void BM_extract(benchmark::State& state, int groups) auto input = cudf::gather( cudf::table_view{{samples_column}}, map->view(), cudf::out_of_bounds_policy::DONT_CHECK); cudf::strings_column_view strings_view(input->get_column(0).view()); + auto prog = cudf::strings::regex_program::create(pattern); for (auto _ : state) { cuda_event_timer raii(state, true); - auto results = cudf::strings::extract(strings_view, pattern); + auto results = cudf::strings::extract(strings_view, *prog); } state.SetBytesProcessed(state.iterations() * strings_view.chars_size()); diff --git a/cpp/benchmarks/string/lengths.cpp b/cpp/benchmarks/string/lengths.cpp new file mode 100644 index 00000000000..4540e4a8f42 --- /dev/null +++ b/cpp/benchmarks/string/lengths.cpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include + +static void bench_lengths(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const table_profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const table = + create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile); + cudf::strings_column_view input(table->view().column(0)); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + // gather some throughput statistics as well + auto chars_size = input.chars_size(); + state.add_global_memory_reads(chars_size); // all bytes are read; + state.add_global_memory_writes(num_rows); // output is an integer per row + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = cudf::strings::count_characters(input); + }); +} + +NVBENCH_BENCH(bench_lengths) + .set_name("strings_lengths") + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}) + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096}); diff --git a/cpp/benchmarks/string/replace_re.cpp b/cpp/benchmarks/string/replace_re.cpp index 7e9d6036750..b5dcf316a0e 100644 --- a/cpp/benchmarks/string/replace_re.cpp +++ b/cpp/benchmarks/string/replace_re.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include +#include #include #include #include @@ -40,18 +41,20 @@ static void BM_replace(benchmark::State& state, replace_type rt) auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); cudf::strings_column_view input(column->view()); cudf::test::strings_column_wrapper repls({"#", ""}); + auto prog = cudf::strings::regex_program::create("\\d+"); + auto prog_backref = cudf::strings::regex_program::create("(\\d+)"); for (auto _ : state) { cuda_event_timer raii(state, true, cudf::get_default_stream()); switch (rt) { case replace_type::replace_re: // contains_re and matches_re use the same main logic - cudf::strings::replace_re(input, "\\d+"); + cudf::strings::replace_re(input, *prog); break; case replace_type::replace_re_multi: // counts occurrences of pattern cudf::strings::replace_re(input, {"\\d+", "\\s+"}, cudf::strings_column_view(repls)); break; case replace_type::replace_backref: // returns occurrences of matches - cudf::strings::replace_with_backrefs(input, "(\\d+)", "#\\1X"); + cudf::strings::replace_with_backrefs(input, *prog_backref, "#\\1X"); break; } } diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp index 6371cb6c82b..77d6a4d1e89 100644 --- a/cpp/include/cudf/binaryop.hpp +++ b/cpp/include/cudf/binaryop.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -108,6 +108,7 @@ enum class binary_operator : int32_t { * @throw cudf::logic_error if @p output_type dtype isn't fixed-width * @throw cudf::logic_error if @p output_type dtype isn't boolean for comparison and logical * operations. + * @throw cudf::data_type_error if the operation is not supported for the types of @p lhs and @p rhs */ std::unique_ptr binary_operation( scalar const& lhs, @@ -136,6 +137,7 @@ std::unique_ptr binary_operation( * @throw cudf::logic_error if @p output_type dtype isn't fixed-width * @throw cudf::logic_error if @p output_type dtype isn't boolean for comparison and logical * operations. + * @throw cudf::data_type_error if the operation is not supported for the types of @p lhs and @p rhs */ std::unique_ptr binary_operation( column_view const& lhs, @@ -163,6 +165,7 @@ std::unique_ptr binary_operation( * @throw cudf::logic_error if @p output_type dtype isn't boolean for comparison and logical * operations. * @throw cudf::logic_error if @p output_type dtype isn't fixed-width + * @throw cudf::data_type_error if the operation is not supported for the types of @p lhs and @p rhs */ std::unique_ptr binary_operation( column_view const& lhs, diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp index c02991051d9..178fc92b399 100644 --- a/cpp/include/cudf/column/column.hpp +++ b/cpp/include/cudf/column/column.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -259,7 +259,7 @@ class column { * @brief Returns a reference to the specified child * * @param child_index Index of the desired child - * @return column& Reference to the desired child + * @return Reference to the desired child */ column& child(size_type child_index) noexcept { return *_children[child_index]; }; @@ -267,7 +267,7 @@ class column { * @brief Returns a const reference to the specified child * * @param child_index Index of the desired child - * @return column const& Const reference to the desired child + * @return Const reference to the desired child */ [[nodiscard]] column const& child(size_type child_index) const noexcept { @@ -306,7 +306,7 @@ class column { * @brief Creates an immutable, non-owning view of the column's data and * children. * - * @return column_view The immutable, non-owning view + * @return The immutable, non-owning view */ [[nodiscard]] column_view view() const; @@ -316,7 +316,7 @@ class column { * This allows passing a `column` object directly into a function that * requires a `column_view`. The conversion is automatic. * - * @return column_view Immutable, non-owning `column_view` + * @return Immutable, non-owning `column_view` */ operator column_view() const { return this->view(); }; @@ -330,7 +330,7 @@ class column { * if not, the null count will be recomputed on the next invocation of *`null_count()`. * - * @return mutable_column_view The mutable, non-owning view + * @return The mutable, non-owning view */ mutable_column_view mutable_view(); @@ -346,7 +346,7 @@ class column { * Otherwise, the null count will be recomputed on the next invocation of * `null_count()`. * - * @return mutable_column_view Mutable, non-owning `mutable_column_view` + * @return Mutable, non-owning `mutable_column_view` */ operator mutable_column_view() { return this->mutable_view(); }; diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 1361866d0aa..b3e6ad0b99f 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -129,7 +129,7 @@ class alignas(16) column_device_view_base { * or `std::is_same_v` are true. * * @tparam The type to cast to - * @return T const* Typed pointer to underlying data + * @return Typed pointer to underlying data */ template or is_rep_layout_compatible())> @@ -151,7 +151,7 @@ class alignas(16) column_device_view_base { * false. * * @tparam T The type to cast to - * @return T const* Typed pointer to underlying data, including the offset + * @return Typed pointer to underlying data, including the offset */ template ())> [[nodiscard]] CUDF_HOST_DEVICE T const* data() const noexcept @@ -990,7 +990,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view * `data()`. * * @tparam The type to cast to - * @return T* Typed pointer to underlying data + * @return Typed pointer to underlying data */ template or is_rep_layout_compatible())> @@ -1009,7 +1009,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view * @note If `offset() == 0`, then `head() == data()` * * @tparam T The type to cast to - * @return T* Typed pointer to underlying data, including the offset + * @return Typed pointer to underlying data, including the offset */ template ())> CUDF_HOST_DEVICE T* data() const noexcept @@ -1078,7 +1078,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view * `mutable_column_device_view::has_element_accessor()` is false. * * @tparam T The desired type - * @return T* Pointer to the first element after casting + * @return Pointer to the first element after casting */ template ())> iterator begin() @@ -1094,7 +1094,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view * `mutable_column_device_view::has_element_accessor()` is false. * * @tparam T The desired type - * @return T const* Pointer to one past the last element after casting + * @return Pointer to one past the last element after casting */ template ())> iterator end() @@ -1106,7 +1106,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view * @brief Returns the specified child * * @param child_index The index of the desired child - * @return column_view The requested child `column_view` + * @return The requested child `column_view` */ [[nodiscard]] __device__ mutable_column_device_view child(size_type child_index) const noexcept { @@ -1173,7 +1173,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view * device view of the specified column and it's children. * * @param source_view The `column_view` to use for this calculation. - * @return size_t The size in bytes of the amount of memory needed to hold a + * @return The size in bytes of the amount of memory needed to hold a * device view of the specified column and it's children */ static std::size_t extent(mutable_column_view source_view); diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index 217f88e67f9..4889a62bbe4 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -66,7 +66,7 @@ class column_view_base { * or `std::is_same_v` are true. * * @tparam The type to cast to - * @return T const* Typed pointer to underlying data + * @return Typed pointer to underlying data */ template or is_rep_layout_compatible())> @@ -85,7 +85,7 @@ class column_view_base { * false. * * @tparam T The type to cast to - * @return T const* Typed pointer to underlying data, including the offset + * @return Typed pointer to underlying data, including the offset */ template ())> T const* data() const noexcept @@ -101,7 +101,7 @@ class column_view_base { * false. * * @tparam T The desired type - * @return T const* Pointer to the first element after casting + * @return Pointer to the first element after casting */ template ())> T const* begin() const noexcept @@ -117,7 +117,7 @@ class column_view_base { * false. * * @tparam T The desired type - * @return T const* Pointer to one past the last element after casting + * @return Pointer to one past the last element after casting */ template ())> T const* end() const noexcept @@ -389,7 +389,7 @@ class column_view : public detail::column_view_base { * @brief Returns the specified child * * @param child_index The index of the desired child - * @return column_view The requested child `column_view` + * @return The requested child `column_view` */ [[nodiscard]] column_view child(size_type child_index) const noexcept { @@ -553,7 +553,7 @@ class mutable_column_view : public detail::column_view_base { * column, and instead, accessing the elements should be done via `data()`. * * @tparam The type to cast to - * @return T* Typed pointer to underlying data + * @return Typed pointer to underlying data */ template or is_rep_layout_compatible())> @@ -572,7 +572,7 @@ class mutable_column_view : public detail::column_view_base { * @note If `offset() == 0`, then `head() == data()` * * @tparam T The type to cast to - * @return T* Typed pointer to underlying data, including the offset + * @return Typed pointer to underlying data, including the offset */ template ())> T* data() const noexcept @@ -588,7 +588,7 @@ class mutable_column_view : public detail::column_view_base { * false. * * @tparam T The desired type - * @return T* Pointer to the first element after casting + * @return Pointer to the first element after casting */ template ())> T* begin() const noexcept @@ -604,7 +604,7 @@ class mutable_column_view : public detail::column_view_base { * false. * * @tparam T The desired type - * @return T* Pointer to one past the last element after casting + * @return Pointer to one past the last element after casting */ template ())> T* end() const noexcept @@ -639,7 +639,7 @@ class mutable_column_view : public detail::column_view_base { * @brief Returns a reference to the specified child * * @param child_index The index of the desired child - * @return mutable_column_view The requested child `mutable_column_view` + * @return The requested child `mutable_column_view` */ [[nodiscard]] mutable_column_view child(size_type child_index) const noexcept { @@ -670,7 +670,7 @@ class mutable_column_view : public detail::column_view_base { /** * @brief Converts a mutable view into an immutable view * - * @return column_view An immutable view of the mutable view's elements + * @return An immutable view of the mutable view's elements */ operator column_view() const; @@ -684,7 +684,7 @@ class mutable_column_view : public detail::column_view_base { * @brief Counts the number of descendants of the specified parent. * * @param parent The parent whose descendants will be counted - * @return size_type The number of descendants of the parent + * @return The number of descendants of the parent */ size_type count_descendants(column_view parent); diff --git a/cpp/include/cudf/concatenate.hpp b/cpp/include/cudf/concatenate.hpp index 1f8ce65ad93..2b4eee607e2 100644 --- a/cpp/include/cudf/concatenate.hpp +++ b/cpp/include/cudf/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,8 +40,7 @@ namespace cudf { * * @param views host_span of column views whose bitmasks will be concatenated * @param mr Device memory resource used for allocating the new device_buffer - * @return rmm::device_buffer A `device_buffer` containing the bitmasks of all - * the column views in the views vector + * @return A `device_buffer` containing the bitmasks of all the column views in the views vector */ rmm::device_buffer concatenate_masks( host_span views, @@ -50,8 +49,8 @@ rmm::device_buffer concatenate_masks( /** * @brief Concatenates multiple columns into a single column. * - * @throws cudf::logic_error - * If types of the input columns mismatch + * @throws cudf::logic_error If types of the input columns mismatch + * @throws std::overflow_error If the the total number of output rows exceeds cudf::size_type * * @param columns_to_concat host_span of column views to be concatenated into a single column * @param mr Device memory resource used to allocate the returned column's device memory @@ -80,8 +79,8 @@ std::unique_ptr concatenate( * column_view tc1 = (t->view()).column(1); //Contains {0,1,2,3,4,5,6,7} * ``` * - * @throws cudf::logic_error - * If number of columns mismatch + * @throws cudf::logic_error If number of columns mismatch + * @throws std::overflow_error If the the total number of output rows exceeds cudf::size_type * * @param tables_to_concat host_span of table views to be concatenated into a single table * @param mr Device memory resource used to allocate the returned table's device memory diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 63c66335d2d..d5a3c930853 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -78,7 +78,7 @@ enum class out_of_bounds_policy : bool { * better performance. If `policy` is set to `DONT_CHECK` and there are out-of-bounds indices * in the gather map, the behavior is undefined. Defaults to `DONT_CHECK`. * @param[in] mr Device memory resource used to allocate the returned table's device memory - * @return std::unique_ptr Result of the gather + * @return Result of the gather */ std::unique_ptr
gather( table_view const& source_table, @@ -211,7 +211,7 @@ enum class mask_allocation_policy { * @brief Initializes and returns an empty column of the same type as the `input`. * * @param[in] input Immutable view of input column to emulate - * @return std::unique_ptr An empty column of same type as `input` + * @return An empty column of same type as `input` */ std::unique_ptr empty_like(column_view const& input); @@ -219,7 +219,7 @@ std::unique_ptr empty_like(column_view const& input); * @brief Initializes and returns an empty column of the same type as the `input`. * * @param[in] input Scalar to emulate - * @return std::unique_ptr An empty column of same type as `input` + * @return An empty column of same type as `input` */ std::unique_ptr empty_like(scalar const& input); @@ -264,7 +264,7 @@ std::unique_ptr allocate_like( * memory for the column's data or bitmask. * * @param[in] input_table Immutable view of input table to emulate - * @return std::unique_ptr
A table of empty columns with the same types as the columns in + * @return A table of empty columns with the same types as the columns in * `input_table` */ std::unique_ptr
empty_like(table_view const& input_table); @@ -333,7 +333,7 @@ void copy_range_in_place(column_view const& source, * (exclusive) * @param target_begin The starting index of the target range (inclusive) * @param mr Device memory resource used to allocate the returned column's device memory - * @return std::unique_ptr The result target column + * @return The result target column */ std::unique_ptr copy_range( column_view const& source, @@ -920,7 +920,7 @@ std::unique_ptr
boolean_mask_scatter( * @param input Column view to get the element from * @param index Index into `input` to get the element at * @param mr Device memory resource used to allocate the returned scalar's device memory - * @return std::unique_ptr Scalar containing the single value + * @return Scalar containing the single value */ std::unique_ptr get_element( column_view const& input, @@ -960,7 +960,7 @@ enum class sample_with_replacement : bool { * @param seed Seed value to initiate random number generator * @param mr Device memory resource used to allocate the returned table's device memory * - * @return std::unique_ptr
Table containing samples from `input` + * @return Table containing samples from `input` */ std::unique_ptr
sample( table_view const& input, diff --git a/cpp/include/cudf/detail/groupby/sort_helper.hpp b/cpp/include/cudf/detail/groupby/sort_helper.hpp index 3ef7a7d5acf..e2510d75a83 100644 --- a/cpp/include/cudf/detail/groupby/sort_helper.hpp +++ b/cpp/include/cudf/detail/groupby/sort_helper.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,6 @@ #include #include -#include #include #include diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp index 2dfe31091ac..e539b1a34c8 100644 --- a/cpp/include/cudf/detail/join.hpp +++ b/cpp/include/cudf/detail/join.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,6 @@ #pragma once #include -#include #include #include #include @@ -37,6 +36,10 @@ template class default_allocator; +namespace cudf::structs::detail { +class flattened_table; +} + namespace cudf { namespace detail { @@ -74,7 +77,7 @@ struct hash_join { rmm::device_buffer const _composite_bitmask; ///< Bitmask to denote whether a row is valid cudf::null_equality const _nulls_equal; ///< whether to consider nulls as equal cudf::table_view _build; ///< input table to build the hash map - cudf::structs::detail::flattened_table + std::unique_ptr _flattened_build_table; ///< flattened data structures for `_build` map_type _hash_table; ///< hash table built on `_build` diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp index c8b758ca337..4a708d2fb51 100644 --- a/cpp/include/cudf/detail/structs/utilities.hpp +++ b/cpp/include/cudf/detail/structs/utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -162,16 +162,20 @@ class flattened_table { * @param input input table to be flattened * @param column_order column order for input table * @param null_precedence null order for input table - * @param nullability force output to have nullability columns even if input columns - * are all valid - * @return `flatten_result` with flattened table, flattened column order, flattened null precedence, - * alongside the supporting columns and device_buffers for the flattened table. + * @param nullability force output to have nullability columns even if input columns are all valid + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate new device memory + * @return A pointer of type `flattened_table` containing flattened columns, flattened column + * orders, flattened null precedence, alongside the supporting columns and device_buffers + * for the flattened table. */ -[[nodiscard]] flattened_table flatten_nested_columns( +[[nodiscard]] std::unique_ptr flatten_nested_columns( table_view const& input, std::vector const& column_order, std::vector const& null_precedence, - column_nullability nullability = column_nullability::MATCH_INCOMING); + column_nullability nullability, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Superimpose nulls from a given null mask into the input column, using bitwise AND. diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp index 0e1c047d9b0..b7ecedc1489 100644 --- a/cpp/include/cudf/detail/unary.hpp +++ b/cpp/include/cudf/detail/unary.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -74,6 +74,16 @@ std::unique_ptr unary_operation( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @copydoc cudf::is_valid + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr is_valid( + cudf::column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @copydoc cudf::cast * diff --git a/cpp/include/cudf/io/data_sink.hpp b/cpp/include/cudf/io/data_sink.hpp index cf3e94029be..0be2935b84c 100644 --- a/cpp/include/cudf/io/data_sink.hpp +++ b/cpp/include/cudf/io/data_sink.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -123,7 +123,7 @@ class data_sink { * instead of write() when possible. However, it is still possible to receive * write() calls as well. * - * @return bool If this writer supports device_write() calls + * @return If this writer supports device_write() calls */ [[nodiscard]] virtual bool supports_device_write() const { return false; } @@ -194,7 +194,7 @@ class data_sink { /** * @pure @brief Returns the total number of bytes written into this sink * - * @return size_t Total number of bytes written into this sink + * @return Total number of bytes written into this sink */ virtual size_t bytes_written() = 0; }; diff --git a/cpp/include/cudf/io/datasource.hpp b/cpp/include/cudf/io/datasource.hpp index a0ef2155f7d..12b8377bff2 100644 --- a/cpp/include/cudf/io/datasource.hpp +++ b/cpp/include/cudf/io/datasource.hpp @@ -296,14 +296,14 @@ class datasource { /** * @brief Returns the size of the data in the source. * - * @return size_t The size of the source data in bytes + * @return The size of the source data in bytes */ [[nodiscard]] virtual size_t size() const = 0; /** * @brief Returns whether the source contains any data. * - * @return bool True if there is data, False otherwise + * @return True if there is data, False otherwise */ [[nodiscard]] virtual bool is_empty() const { return size() == 0; } diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index f4fb4d91f58..92b69deb671 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -494,7 +494,7 @@ class parquet_writer_options { // Maximum size of column chunk dictionary (in bytes) size_t _max_dictionary_size = default_max_dictionary_size; // Maximum number of rows in a page fragment - size_type _max_page_fragment_size = default_max_page_fragment_size; + std::optional _max_page_fragment_size; /** * @brief Constructor from sink and table. @@ -1076,7 +1076,7 @@ class chunked_parquet_writer_options { // Maximum size of column chunk dictionary (in bytes) size_t _max_dictionary_size = default_max_dictionary_size; // Maximum number of rows in a page fragment - size_type _max_page_fragment_size = default_max_page_fragment_size; + std::optional _max_page_fragment_size; /** * @brief Constructor from sink. diff --git a/cpp/include/cudf/lists/combine.hpp b/cpp/include/cudf/lists/combine.hpp index 4f211e87cc7..531396e940e 100644 --- a/cpp/include/cudf/lists/combine.hpp +++ b/cpp/include/cudf/lists/combine.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -80,10 +80,10 @@ std::unique_ptr concatenate_rows( * r is [ {1, 2, 3, 4, 5}, {6, 7, 8, 9} ] * @endcode * - * @throws cudf::logic_error if the input column is not at least two-level depth lists column (i.e., - * each row must be a list of list). + * @throws std::invalid_argument if the input column is not at least two-level depth lists column + * (i.e., each row must be a list of list). * @throws cudf::logic_error if the input lists column contains nested typed entries that are not - * lists. + * lists. * * @param input The lists column containing lists of list elements to concatenate. * @param null_policy The parameter to specify whether a null list element will be ignored from diff --git a/cpp/include/cudf/lists/contains.hpp b/cpp/include/cudf/lists/contains.hpp index d2b4d59dfba..fbe931f945d 100644 --- a/cpp/include/cudf/lists/contains.hpp +++ b/cpp/include/cudf/lists/contains.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,7 @@ namespace lists { * @param lists Lists column whose `n` rows are to be searched * @param search_key The scalar key to be looked up in each list row * @param mr Device memory resource used to allocate the returned column's device memory. - * @return std::unique_ptr BOOL8 column of `n` rows with the result of the lookup + * @return BOOL8 column of `n` rows with the result of the lookup */ std::unique_ptr contains( cudf::lists_column_view const& lists, @@ -65,7 +65,7 @@ std::unique_ptr contains( * @param lists Lists column whose `n` rows are to be searched * @param search_keys Column of elements to be looked up in each list row * @param mr Device memory resource used to allocate the returned column's device memory. - * @return std::unique_ptr BOOL8 column of `n` rows with the result of the lookup + * @return BOOL8 column of `n` rows with the result of the lookup */ std::unique_ptr contains( cudf::lists_column_view const& lists, @@ -86,7 +86,7 @@ std::unique_ptr contains( * * @param lists Lists column whose `n` rows are to be searched * @param mr Device memory resource used to allocate the returned column's device memory. - * @return std::unique_ptr BOOL8 column of `n` rows with the result of the lookup + * @return BOOL8 column of `n` rows with the result of the lookup */ std::unique_ptr contains_nulls( cudf::lists_column_view const& lists, @@ -124,11 +124,9 @@ enum class duplicate_find_option : int32_t { * @param find_option Whether to return the position of the first match (`FIND_FIRST`) or * last (`FIND_LAST`) * @param mr Device memory resource used to allocate the returned column's device memory. - * @return std::unique_ptr INT32 column of `n` rows with the location of the `search_key` + * @return INT32 column of `n` rows with the location of the `search_key` * - * @throw cudf::logic_error If `search_key` type does not match the element type in `lists` - * @throw cudf::logic_error If `search_key` is of a nested type, or `lists` contains nested - * elements (LIST, STRUCT) + * @throw cudf::data_type_error If `search_keys` type does not match the element type in `lists` */ std::unique_ptr index_of( cudf::lists_column_view const& lists, @@ -160,11 +158,10 @@ std::unique_ptr index_of( * @param find_option Whether to return the position of the first match (`FIND_FIRST`) or * last (`FIND_LAST`) * @param mr Device memory resource used to allocate the returned column's device memory. - * @return std::unique_ptr INT32 column of `n` rows with the location of the `search_key` + * @return INT32 column of `n` rows with the location of the `search_key` * * @throw cudf::logic_error If `search_keys` does not match `lists` in its number of rows - * @throw cudf::logic_error If `search_keys` type does not match the element type in `lists` - * @throw cudf::logic_error If `lists` or `search_keys` contains nested elements (LIST, STRUCT) + * @throw cudf::data_type_error If `search_keys` type does not match the element type in `lists` */ std::unique_ptr index_of( cudf::lists_column_view const& lists, diff --git a/cpp/include/cudf/lists/detail/dremel.hpp b/cpp/include/cudf/lists/detail/dremel.hpp index 4e3aeec2499..d36a4091947 100644 --- a/cpp/include/cudf/lists/detail/dremel.hpp +++ b/cpp/include/cudf/lists/detail/dremel.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -183,16 +183,34 @@ struct dremel_data { * - | - | -- | --- * ``` * - * @param col Column of LIST type - * @param level_nullability Pre-determined nullability at each list level. Empty means infer from - * `col` + * @param input Column of LIST type + * @param nullability Pre-determined nullability at each list level. Empty means infer from + * `input` + * @param output_as_byte_array if `true`, then any nested list level that has a child of type + * `uint8_t` will be considered as the last level * @param stream CUDA stream used for device memory operations and kernel launches. - * * @return A struct containing dremel data */ -dremel_data get_dremel_data(column_view h_col, +dremel_data get_dremel_data(column_view input, std::vector nullability, bool output_as_byte_array, rmm::cuda_stream_view stream); +/** + * @brief Get Dremel offsets, repetition levels, and modified definition levels to be used for + * lexicographical comparators. The modified definition levels are produced by treating + * each nested column in the input as nullable + * + * @param input Column of LIST type + * @param nullability Pre-determined nullability at each list level. Empty means infer from + * `input` + * @param output_as_byte_array if `true`, then any nested list level that has a child of type + * `uint8_t` will be considered as the last level + * @param stream CUDA stream used for device memory operations and kernel launches. + * @return A struct containing dremel data + */ +dremel_data get_comparator_data(column_view input, + std::vector nullability, + bool output_as_byte_array, + rmm::cuda_stream_view stream); } // namespace cudf::detail diff --git a/cpp/include/cudf/lists/gather.hpp b/cpp/include/cudf/lists/gather.hpp index f91ce29a7cb..38bed9ede43 100644 --- a/cpp/include/cudf/lists/gather.hpp +++ b/cpp/include/cudf/lists/gather.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,7 @@ namespace lists { * @endcode * * @throws cudf::logic_error if `gather_map_list` size is not same as `source_column` size. - * @throws cudf::logic_error if gather_map contains null values. + * @throws std::invalid_argument if gather_map contains null values. * @throws cudf::logic_error if gather_map is not list column of an index type. * * If indices in `gather_map_list` are outside the range `[-n, n)`, where `n` is the number of diff --git a/cpp/include/cudf/lists/lists_column_view.hpp b/cpp/include/cudf/lists/lists_column_view.hpp index 6b74a0e600a..336214e3934 100644 --- a/cpp/include/cudf/lists/lists_column_view.hpp +++ b/cpp/include/cudf/lists/lists_column_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -115,7 +115,7 @@ class lists_column_view : private column_view { /** * @brief Return first offset (accounting for column offset) * - * @return int32_t const* Pointer to the first offset + * @return Pointer to the first offset */ [[nodiscard]] offset_iterator offsets_begin() const noexcept { @@ -130,7 +130,7 @@ class lists_column_view : private column_view { * be computed using the size of the offsets() child column, which is also the offsets of the * entire original (non-sliced) lists column. * - * @return int32_t const* Pointer to one past the last offset + * @return Pointer to one past the last offset */ [[nodiscard]] offset_iterator offsets_end() const noexcept { diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp index bd4ce28a2ef..360006c1eea 100644 --- a/cpp/include/cudf/null_mask.hpp +++ b/cpp/include/cudf/null_mask.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ namespace cudf { * * @param state The state of the null mask * @param size The number of elements represented by the mask - * @return size_type The count of null elements + * @return The count of null elements */ size_type state_null_count(mask_state state, size_type size); @@ -52,7 +52,7 @@ size_type state_null_count(mask_state state, size_type size); * @param number_of_bits The number of bits that need to be represented * @param padding_boundary The value returned will be rounded up to a multiple * of this value - * @return std::size_t The necessary number of bytes + * @return The necessary number of bytes */ std::size_t bitmask_allocation_size_bytes(size_type number_of_bits, std::size_t padding_boundary = 64); @@ -68,7 +68,7 @@ std::size_t bitmask_allocation_size_bytes(size_type number_of_bits, * in a bitmask and ignore the padding/slack bits. * * @param number_of_bits The number of bits that need to be represented - * @return size_type The necessary number of `bitmask_type` elements + * @return The necessary number of `bitmask_type` elements */ size_type num_bitmask_words(size_type number_of_bits); @@ -79,7 +79,7 @@ size_type num_bitmask_words(size_type number_of_bits); * @param size The number of elements to be represented by the mask * @param state The desired state of the mask * @param mr Device memory resource used to allocate the returned device_buffer - * @return rmm::device_buffer A `device_buffer` for use as a null bitmask + * @return A `device_buffer` for use as a null bitmask * satisfying the desired size and state */ rmm::device_buffer create_null_mask( @@ -114,7 +114,7 @@ void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit * @param begin_bit Index of the first bit to be copied (inclusive) * @param end_bit Index of the last bit to be copied (exclusive) * @param mr Device memory resource used to allocate the returned device_buffer - * @return rmm::device_buffer A `device_buffer` containing the bits + * @return A `device_buffer` containing the bits * `[begin_bit, end_bit)` from `mask`. */ rmm::device_buffer copy_bitmask( @@ -131,7 +131,7 @@ rmm::device_buffer copy_bitmask( * * @param view Column view whose bitmask needs to be copied * @param mr Device memory resource used to allocate the returned device_buffer - * @return rmm::device_buffer A `device_buffer` containing the bits + * @return A `device_buffer` containing the bits * `[view.offset(), view.offset() + view.size())` from `view`'s bitmask. */ rmm::device_buffer copy_bitmask( diff --git a/cpp/include/cudf/round.hpp b/cpp/include/cudf/round.hpp index 29e5c1ab808..030d3d42773 100644 --- a/cpp/include/cudf/round.hpp +++ b/cpp/include/cudf/round.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -67,7 +67,7 @@ enum class rounding_method : int32_t { HALF_UP, HALF_EVEN }; * @param method Rounding method * @param mr Device memory resource used to allocate the returned column's device memory * - * @return std::unique_ptr Column with each of the values rounded + * @return Column with each of the values rounded */ std::unique_ptr round( column_view const& input, diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index f43089210fd..922bed3b1ea 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -190,10 +190,9 @@ std::unique_ptr
stable_sort_by_key( * for column * @param percentage flag to convert ranks to percentage in range (0,1] * @param mr Device memory resource used to allocate the returned column's device memory - * @return std::unique_ptr A column of containing the rank of the each - * element of the column of `input`. The output column type will be `size_type` - * column by default or else `double` when `method=rank_method::AVERAGE` or - *`percentage=True` + * @return A column of containing the rank of the each element of the column of `input`. The output + * column type will be `size_type`column by default or else `double` when + * `method=rank_method::AVERAGE` or `percentage=True` */ std::unique_ptr rank( column_view const& input, diff --git a/cpp/include/cudf/strings/contains.hpp b/cpp/include/cudf/strings/contains.hpp index aee349415e3..aebc4ae7dab 100644 --- a/cpp/include/cudf/strings/contains.hpp +++ b/cpp/include/cudf/strings/contains.hpp @@ -34,33 +34,6 @@ struct regex_program; * @brief Strings APIs for regex contains, count, matches */ -/** - * @brief Returns a boolean column identifying rows which - * match the given regex pattern. - * - * @code{.pseudo} - * Example: - * s = ["abc","123","def456"] - * r = contains_re(s,"\\d+") - * r is now [false, true, true] - * @endcode - * - * Any null string entries return corresponding null output column entries. - * - * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. - * - * @param strings Strings instance for this operation. - * @param pattern Regex pattern to match to each string. - * @param flags Regex flags for interpreting special characters in the pattern. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. - */ -std::unique_ptr contains_re( - strings_column_view const& strings, - std::string_view pattern, - regex_flags const flags = regex_flags::DEFAULT, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** * @brief Returns a boolean column identifying rows which * match the given regex_program object @@ -89,26 +62,29 @@ std::unique_ptr contains_re( /** * @brief Returns a boolean column identifying rows which - * matching the given regex pattern but only at the beginning the string. + * match the given regex pattern. * * @code{.pseudo} * Example: * s = ["abc","123","def456"] - * r = matches_re(s,"\\d+") - * r is now [false, true, false] + * r = contains_re(s,"\\d+") + * r is now [false, true, true] * @endcode * * Any null string entries return corresponding null output column entries. * * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. * + * @deprecated Use @link contains_re contains_re(strings_column_view const&, + * regex_program const&, rmm::mr::device_memory_resource*) @endlink + * * @param strings Strings instance for this operation. * @param pattern Regex pattern to match to each string. * @param flags Regex flags for interpreting special characters in the pattern. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New column of boolean results for each string. */ -std::unique_ptr matches_re( +[[deprecated]] std::unique_ptr contains_re( strings_column_view const& strings, std::string_view pattern, regex_flags const flags = regex_flags::DEFAULT, @@ -141,27 +117,30 @@ std::unique_ptr matches_re( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Returns the number of times the given regex pattern - * matches in each string. + * @brief Returns a boolean column identifying rows which + * matching the given regex pattern but only at the beginning the string. * * @code{.pseudo} * Example: - * s = ["abc","123","def45"] - * r = count_re(s,"\\d") - * r is now [0, 3, 2] + * s = ["abc","123","def456"] + * r = matches_re(s,"\\d+") + * r is now [false, true, false] * @endcode * * Any null string entries return corresponding null output column entries. * * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. * + * @deprecated Use @link matches_re matches_re(strings_column_view const&, + * regex_program const&, rmm::mr::device_memory_resource*) @endlink + * * @param strings Strings instance for this operation. - * @param pattern Regex pattern to match within each string. + * @param pattern Regex pattern to match to each string. * @param flags Regex flags for interpreting special characters in the pattern. * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New INT32 column with counts for each string. + * @return New column of boolean results for each string. */ -std::unique_ptr count_re( +[[deprecated]] std::unique_ptr matches_re( strings_column_view const& strings, std::string_view pattern, regex_flags const flags = regex_flags::DEFAULT, @@ -193,6 +172,36 @@ std::unique_ptr count_re( regex_program const& prog, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns the number of times the given regex pattern + * matches in each string. + * + * @code{.pseudo} + * Example: + * s = ["abc","123","def45"] + * r = count_re(s,"\\d") + * r is now [0, 3, 2] + * @endcode + * + * Any null string entries return corresponding null output column entries. + * + * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. + * + * @deprecated Use @link count_re count_re(strings_column_view const&, + * regex_program const&, rmm::mr::device_memory_resource*) @endlink + * + * @param strings Strings instance for this operation. + * @param pattern Regex pattern to match within each string. + * @param flags Regex flags for interpreting special characters in the pattern. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return New INT32 column with counts for each string. + */ +[[deprecated]] std::unique_ptr count_re( + strings_column_view const& strings, + std::string_view pattern, + regex_flags const flags = regex_flags::DEFAULT, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Returns a boolean column identifying rows which * match the given like pattern. diff --git a/cpp/include/cudf/strings/extract.hpp b/cpp/include/cudf/strings/extract.hpp index a80d971438d..e1a940259ac 100644 --- a/cpp/include/cudf/strings/extract.hpp +++ b/cpp/include/cudf/strings/extract.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,14 +27,14 @@ namespace strings { struct regex_program; /** - * @addtogroup strings_substring + * @addtogroup strings_extract * @{ * @file */ /** * @brief Returns a table of strings columns where each column corresponds to the matching - * group specified in the given regular expression pattern. + * group specified in the given regex_program object * * All the strings for the first group will go in the first output column; the second group * go in the second column and so on. Null entries are added to the columns in row `i` if @@ -45,28 +45,27 @@ struct regex_program; * @code{.pseudo} * Example: * s = ["a1", "b2", "c3"] - * r = extract(s, "([ab])(\\d)") + * p = regex_program::create("([ab])(\\d)") + * r = extract(s, p) * r is now [ ["a", "b", null], * ["1", "2", null] ] * @endcode * * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. * - * @param strings Strings instance for this operation. - * @param pattern The regular expression pattern with group indicators. - * @param flags Regex flags for interpreting special characters in the pattern. - * @param mr Device memory resource used to allocate the returned table's device memory. - * @return Columns of strings extracted from the input column. + * @param strings Strings instance for this operation + * @param prog Regex program instance + * @param mr Device memory resource used to allocate the returned table's device memory + * @return Columns of strings extracted from the input column */ std::unique_ptr
extract( strings_column_view const& strings, - std::string_view pattern, - regex_flags const flags = regex_flags::DEFAULT, + regex_program const& prog, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a table of strings columns where each column corresponds to the matching - * group specified in the given regex_program object + * group specified in the given regular expression pattern. * * All the strings for the first group will go in the first output column; the second group * go in the second column and so on. Null entries are added to the columns in row `i` if @@ -77,27 +76,31 @@ std::unique_ptr
extract( * @code{.pseudo} * Example: * s = ["a1", "b2", "c3"] - * p = regex_program::create("([ab])(\\d)") - * r = extract(s, p) + * r = extract(s, "([ab])(\\d)") * r is now [ ["a", "b", null], * ["1", "2", null] ] * @endcode * * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. * - * @param strings Strings instance for this operation - * @param prog Regex program instance - * @param mr Device memory resource used to allocate the returned table's device memory - * @return Columns of strings extracted from the input column + * @deprecated Use @link extract extract(strings_column_view const&, + * regex_program const&, rmm::mr::device_memory_resource*) @endlink + * + * @param strings Strings instance for this operation. + * @param pattern The regular expression pattern with group indicators. + * @param flags Regex flags for interpreting special characters in the pattern. + * @param mr Device memory resource used to allocate the returned table's device memory. + * @return Columns of strings extracted from the input column. */ -std::unique_ptr
extract( +[[deprecated]] std::unique_ptr
extract( strings_column_view const& strings, - regex_program const& prog, + std::string_view pattern, + regex_flags const flags = regex_flags::DEFAULT, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a lists column of strings where each string column row corresponds to the - * matching group specified in the given regular expression pattern. + * matching group specified in the given regex_program object * * All the matching groups for the first row will go in the first row output column; the second * row results will go into the second row output column and so on. @@ -108,7 +111,8 @@ std::unique_ptr
extract( * @code{.pseudo} * Example: * s = ["a1 b4", "b2", "c3 a5", "b", null] - * r = extract_all_record(s,"([ab])(\\d)") + * p = regex_program::create("([ab])(\\d)") + * r = extract_all_record(s, p) * r is now [ ["a", "1", "b", "4"], * ["b", "2"], * ["a", "5"], @@ -118,21 +122,19 @@ std::unique_ptr
extract( * * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. * - * @param strings Strings instance for this operation. - * @param pattern The regular expression pattern with group indicators. - * @param flags Regex flags for interpreting special characters in the pattern. - * @param mr Device memory resource used to allocate any returned device memory. - * @return Lists column containing strings extracted from the input column. + * @param strings Strings instance for this operation + * @param prog Regex program instance + * @param mr Device memory resource used to allocate any returned device memory + * @return Lists column containing strings extracted from the input column */ std::unique_ptr extract_all_record( strings_column_view const& strings, - std::string_view pattern, - regex_flags const flags = regex_flags::DEFAULT, + regex_program const& prog, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a lists column of strings where each string column row corresponds to the - * matching group specified in the given regex_program object + * matching group specified in the given regular expression pattern. * * All the matching groups for the first row will go in the first row output column; the second * row results will go into the second row output column and so on. @@ -143,8 +145,7 @@ std::unique_ptr extract_all_record( * @code{.pseudo} * Example: * s = ["a1 b4", "b2", "c3 a5", "b", null] - * p = regex_program::create("([ab])(\\d)") - * r = extract_all_record(s, p) + * r = extract_all_record(s,"([ab])(\\d)") * r is now [ ["a", "1", "b", "4"], * ["b", "2"], * ["a", "5"], @@ -154,14 +155,19 @@ std::unique_ptr extract_all_record( * * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. * - * @param strings Strings instance for this operation - * @param prog Regex program instance - * @param mr Device memory resource used to allocate any returned device memory - * @return Lists column containing strings extracted from the input column + * @deprecated Use @link extract_all_record extract_all_record(strings_column_view const&, + * regex_program const&, rmm::mr::device_memory_resource*) @endlink + * + * @param strings Strings instance for this operation. + * @param pattern The regular expression pattern with group indicators. + * @param flags Regex flags for interpreting special characters in the pattern. + * @param mr Device memory resource used to allocate any returned device memory. + * @return Lists column containing strings extracted from the input column. */ -std::unique_ptr extract_all_record( +[[deprecated]] std::unique_ptr extract_all_record( strings_column_view const& strings, - regex_program const& prog, + std::string_view pattern, + regex_flags const flags = regex_flags::DEFAULT, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/findall.hpp b/cpp/include/cudf/strings/findall.hpp index 366e1eb0482..3ac881777e4 100644 --- a/cpp/include/cudf/strings/findall.hpp +++ b/cpp/include/cudf/strings/findall.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,8 +33,8 @@ struct regex_program; */ /** - * @brief Returns a lists column of strings for each matching occurrence of the - * regex pattern within each string. + * @brief Returns a lists column of strings for each matching occurrence using + * the regex_program pattern within each string * * Each output row includes all the substrings within the corresponding input row * that match the given pattern. If no matches are found, the output row is empty. @@ -42,7 +42,8 @@ struct regex_program; * @code{.pseudo} * Example: * s = ["bunny", "rabbit", "hare", "dog"] - * r = findall(s, "[ab]") + * p = regex_program::create("[ab]") + * r = findall(s, p) * r is now a lists column like: * [ ["b"] * ["a","b","b"] @@ -54,21 +55,19 @@ struct regex_program; * * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. * - * @param input Strings instance for this operation. - * @param pattern Regex pattern to match within each string. - * @param flags Regex flags for interpreting special characters in the pattern. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New lists column of strings. + * @param input Strings instance for this operation + * @param prog Regex program instance + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New lists column of strings */ std::unique_ptr findall( strings_column_view const& input, - std::string_view pattern, - regex_flags const flags = regex_flags::DEFAULT, + regex_program const& prog, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Returns a lists column of strings for each matching occurrence using - * the regex_program pattern within each string + * @brief Returns a lists column of strings for each matching occurrence of the + * regex pattern within each string. * * Each output row includes all the substrings within the corresponding input row * that match the given pattern. If no matches are found, the output row is empty. @@ -76,8 +75,7 @@ std::unique_ptr findall( * @code{.pseudo} * Example: * s = ["bunny", "rabbit", "hare", "dog"] - * p = regex_program::create("[ab]") - * r = findall(s, p) + * r = findall(s, "[ab]") * r is now a lists column like: * [ ["b"] * ["a","b","b"] @@ -89,14 +87,19 @@ std::unique_ptr findall( * * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. * - * @param input Strings instance for this operation - * @param prog Regex program instance - * @param mr Device memory resource used to allocate the returned column's device memory - * @return New lists column of strings + * @deprecated Use @link findall findall(strings_column_view const&, + * regex_program const&, rmm::mr::device_memory_resource*) @endlink + * + * @param input Strings instance for this operation. + * @param pattern Regex pattern to match within each string. + * @param flags Regex flags for interpreting special characters in the pattern. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return New lists column of strings. */ -std::unique_ptr findall( +[[deprecated]] std::unique_ptr findall( strings_column_view const& input, - regex_program const& prog, + std::string_view pattern, + regex_flags const flags = regex_flags::DEFAULT, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/json.hpp b/cpp/include/cudf/strings/json.hpp index 11e8daa9855..8fabee6b9a5 100644 --- a/cpp/include/cudf/strings/json.hpp +++ b/cpp/include/cudf/strings/json.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -160,6 +160,8 @@ class get_json_object_options { * @param options Options for controlling the behavior of the function * @param mr Resource for allocating device memory. * @return New strings column containing the retrieved json object strings + * + * @throw std::invalid_argument if provided an invalid operator or an empty name */ std::unique_ptr get_json_object( cudf::strings_column_view const& col, diff --git a/cpp/include/cudf/strings/replace_re.hpp b/cpp/include/cudf/strings/replace_re.hpp index 60c66956fb8..70e44a68c9a 100644 --- a/cpp/include/cudf/strings/replace_re.hpp +++ b/cpp/include/cudf/strings/replace_re.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,53 +36,56 @@ struct regex_program; */ /** - * @brief For each string, replaces any character sequence matching the given pattern + * @brief For each string, replaces any character sequence matching the given regex * with the provided replacement string. * * Any null string entries return corresponding null output column entries. * * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. * - * @param strings Strings instance for this operation. - * @param pattern The regular expression pattern to search within each string. + * @param strings Strings instance for this operation + * @param prog Regex program instance * @param replacement The string used to replace the matched sequence in each string. * Default is an empty string. * @param max_replace_count The maximum number of times to replace the matched pattern * within each string. Default replaces every substring that is matched. - * @param flags Regex flags for interpreting special characters in the pattern. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr replace_re( strings_column_view const& strings, - std::string_view pattern, + regex_program const& prog, string_scalar const& replacement = string_scalar(""), std::optional max_replace_count = std::nullopt, - regex_flags const flags = regex_flags::DEFAULT, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief For each string, replaces any character sequence matching the given regex + * @brief For each string, replaces any character sequence matching the given pattern * with the provided replacement string. * * Any null string entries return corresponding null output column entries. * * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. * - * @param strings Strings instance for this operation - * @param prog Regex program instance + * @deprecated Use @link replace_re replace_re(strings_column_view const&, regex_program const&, + * string_scalar const&, std::optional, rmm::mr::device_memory_resource*) @endlink + * + * @param strings Strings instance for this operation. + * @param pattern The regular expression pattern to search within each string. * @param replacement The string used to replace the matched sequence in each string. * Default is an empty string. * @param max_replace_count The maximum number of times to replace the matched pattern * within each string. Default replaces every substring that is matched. - * @param mr Device memory resource used to allocate the returned column's device memory - * @return New strings column + * @param flags Regex flags for interpreting special characters in the pattern. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return New strings column. */ -std::unique_ptr replace_re( +[[deprecated]] std::unique_ptr replace_re( strings_column_view const& strings, - regex_program const& prog, + std::string_view pattern, string_scalar const& replacement = string_scalar(""), std::optional max_replace_count = std::nullopt, + regex_flags const flags = regex_flags::DEFAULT, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -108,7 +111,7 @@ std::unique_ptr replace_re( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief For each string, replaces any character sequence matching the given pattern + * @brief For each string, replaces any character sequence matching the given regex * using the replacement template for back-references. * * Any null string entries return corresponding null output column entries. @@ -118,41 +121,44 @@ std::unique_ptr replace_re( * @throw cudf::logic_error if capture index values in `replacement` are not in range 0-99, and also * if the index exceeds the group count specified in the pattern * - * @param strings Strings instance for this operation. - * @param pattern The regular expression patterns to search within each string. - * @param replacement The replacement template for creating the output string. - * @param flags Regex flags for interpreting special characters in the pattern. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param strings Strings instance for this operation + * @param prog Regex program instance + * @param replacement The replacement template for creating the output string + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr replace_with_backrefs( strings_column_view const& strings, - std::string_view pattern, + regex_program const& prog, std::string_view replacement, - regex_flags const flags = regex_flags::DEFAULT, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief For each string, replaces any character sequence matching the given regex + * @brief For each string, replaces any character sequence matching the given pattern * using the replacement template for back-references. * * Any null string entries return corresponding null output column entries. * * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. * + * @deprecated Use @link replace_with_backrefs replace_with_backrefs(strings_column_view const&, + * regex_program const&, string_view, rmm::mr::device_memory_resource*) @endlink + * * @throw cudf::logic_error if capture index values in `replacement` are not in range 0-99, and also * if the index exceeds the group count specified in the pattern * - * @param strings Strings instance for this operation - * @param prog Regex program instance - * @param replacement The replacement template for creating the output string - * @param mr Device memory resource used to allocate the returned column's device memory - * @return New strings column + * @param strings Strings instance for this operation. + * @param pattern The regular expression patterns to search within each string. + * @param replacement The replacement template for creating the output string. + * @param flags Regex flags for interpreting special characters in the pattern. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return New strings column. */ -std::unique_ptr replace_with_backrefs( +[[deprecated]] std::unique_ptr replace_with_backrefs( strings_column_view const& strings, - regex_program const& prog, + std::string_view pattern, std::string_view replacement, + regex_flags const flags = regex_flags::DEFAULT, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace strings diff --git a/cpp/include/cudf/strings/split/split_re.hpp b/cpp/include/cudf/strings/split/split_re.hpp index c6bd1345ae6..fac5f130064 100644 --- a/cpp/include/cudf/strings/split/split_re.hpp +++ b/cpp/include/cudf/strings/split/split_re.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ struct regex_program; /** * @brief Splits strings elements into a table of strings columns - * using a regex pattern to delimit each string. + * using a regex_program's pattern to delimit each string * * Each element generates a vector of strings that are stored in corresponding * rows in the output table -- `table[col,row] = token[col] of strings[row]` @@ -51,15 +51,19 @@ struct regex_program; * corresponding row of the first column. * A null row will produce corresponding null rows in the output table. * + * The regex_program's regex_flags are ignored. + * * @code{.pseudo} * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "] - * s1 = split_re(s, "[_ ]") + * p1 = regex_program::create("[_ ]") + * s1 = split_re(s, p1) * s1 is a table of strings columns: * [ ["a", "a", "", "ab"], * ["bc", "", "ab", "cd"], * ["def", "bc", "cd", ""], * ["g", null, null, null] ] - * s2 = split_re(s, "[ _]", 1) + * p2 = regex_program::create("[ _]") + * s2 = split_re(s, p2, 1) * s2 is a table of strings columns: * [ ["a", "a", "", "ab"], * ["bc def_g", "_bc", "ab cd", "cd "] ] @@ -67,22 +71,22 @@ struct regex_program; * * @throw cudf::logic_error if `pattern` is empty. * - * @param input A column of string elements to be split. - * @param pattern The regex pattern for delimiting characters within each string. + * @param input A column of string elements to be split + * @param prog Regex program instance * @param maxsplit Maximum number of splits to perform. * Default of -1 indicates all possible splits on each string. - * @param mr Device memory resource used to allocate the returned result's device memory. - * @return A table of columns of strings. + * @param mr Device memory resource used to allocate the returned result's device memory + * @return A table of columns of strings */ std::unique_ptr
split_re( strings_column_view const& input, - std::string_view pattern, + regex_program const& prog, size_type maxsplit = -1, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Splits strings elements into a table of strings columns - * using a regex_program's pattern to delimit each string + * using a regex pattern to delimit each string. * * Each element generates a vector of strings that are stored in corresponding * rows in the output table -- `table[col,row] = token[col] of strings[row]` @@ -99,42 +103,41 @@ std::unique_ptr
split_re( * corresponding row of the first column. * A null row will produce corresponding null rows in the output table. * - * The regex_program's regex_flags are ignored. - * * @code{.pseudo} * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "] - * p1 = regex_program::create("[_ ]") - * s1 = split_re(s, p1) + * s1 = split_re(s, "[_ ]") * s1 is a table of strings columns: * [ ["a", "a", "", "ab"], * ["bc", "", "ab", "cd"], * ["def", "bc", "cd", ""], * ["g", null, null, null] ] - * p2 = regex_program::create("[ _]") - * s2 = split_re(s, p2, 1) + * s2 = split_re(s, "[ _]", 1) * s2 is a table of strings columns: * [ ["a", "a", "", "ab"], * ["bc def_g", "_bc", "ab cd", "cd "] ] * @endcode * + * @deprecated Use @link split_re split_re(strings_column_view const&, + * regex_program const&, size_type, rmm::mr::device_memory_resource*) @endlink + * * @throw cudf::logic_error if `pattern` is empty. * - * @param input A column of string elements to be split - * @param prog Regex program instance + * @param input A column of string elements to be split. + * @param pattern The regex pattern for delimiting characters within each string. * @param maxsplit Maximum number of splits to perform. * Default of -1 indicates all possible splits on each string. - * @param mr Device memory resource used to allocate the returned result's device memory - * @return A table of columns of strings + * @param mr Device memory resource used to allocate the returned result's device memory. + * @return A table of columns of strings. */ -std::unique_ptr
split_re( +[[deprecated]] std::unique_ptr
split_re( strings_column_view const& input, - regex_program const& prog, + std::string_view pattern, size_type maxsplit = -1, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Splits strings elements into a table of strings columns - * using a regex pattern to delimit each string starting from the end of the string. + * @brief Splits strings elements into a table of strings columns using a + * regex_program's pattern to delimit each string starting from the end of the string * * Each element generates a vector of strings that are stored in corresponding * rows in the output table -- `table[col,row] = token[col] of string[row]` @@ -153,15 +156,19 @@ std::unique_ptr
split_re( * corresponding row of the first column. * A null row will produce corresponding null rows in the output table. * + * The regex_program's regex_flags are ignored. + * * @code{.pseudo} * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "] - * s1 = rsplit_re(s, "[_ ]") + * p1 = regex_program::create("[_ ]") + * s1 = rsplit_re(s, p1) * s1 is a table of strings columns: * [ ["a", "a", "", "ab"], * ["bc", "", "ab", "cd"], * ["def", "bc", "cd", ""], * ["g", null, null, null] ] - * s2 = rsplit_re(s, "[ _]", 1) + * p2 = regex_program::create("[ _]") + * s2 = rsplit_re(s, p2, 1) * s2 is a table of strings columns: * [ ["a_bc def", "a_", "_ab", "ab"], * ["g", "bc", "cd", "cd "] ] @@ -170,7 +177,7 @@ std::unique_ptr
split_re( * @throw cudf::logic_error if `pattern` is empty. * * @param input A column of string elements to be split. - * @param pattern The regex pattern for delimiting characters within each string. + * @param prog Regex program instance * @param maxsplit Maximum number of splits to perform. * Default of -1 indicates all possible splits on each string. * @param mr Device memory resource used to allocate the returned result's device memory. @@ -178,13 +185,13 @@ std::unique_ptr
split_re( */ std::unique_ptr
rsplit_re( strings_column_view const& input, - std::string_view pattern, + regex_program const& prog, size_type maxsplit = -1, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Splits strings elements into a table of strings columns using a - * regex_program's pattern to delimit each string starting from the end of the string + * @brief Splits strings elements into a table of strings columns + * using a regex pattern to delimit each string starting from the end of the string. * * Each element generates a vector of strings that are stored in corresponding * rows in the output table -- `table[col,row] = token[col] of string[row]` @@ -203,42 +210,41 @@ std::unique_ptr
rsplit_re( * corresponding row of the first column. * A null row will produce corresponding null rows in the output table. * - * The regex_program's regex_flags are ignored. - * * @code{.pseudo} * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "] - * p1 = regex_program::create("[_ ]") - * s1 = rsplit_re(s, p1) + * s1 = rsplit_re(s, "[_ ]") * s1 is a table of strings columns: * [ ["a", "a", "", "ab"], * ["bc", "", "ab", "cd"], * ["def", "bc", "cd", ""], * ["g", null, null, null] ] - * p2 = regex_program::create("[ _]") - * s2 = rsplit_re(s, p2, 1) + * s2 = rsplit_re(s, "[ _]", 1) * s2 is a table of strings columns: * [ ["a_bc def", "a_", "_ab", "ab"], * ["g", "bc", "cd", "cd "] ] * @endcode * + * @deprecated Use @link rsplit_re rsplit_re(strings_column_view const&, + * regex_program const&, size_type, rmm::mr::device_memory_resource*) @endlink + * * @throw cudf::logic_error if `pattern` is empty. * * @param input A column of string elements to be split. - * @param prog Regex program instance + * @param pattern The regex pattern for delimiting characters within each string. * @param maxsplit Maximum number of splits to perform. * Default of -1 indicates all possible splits on each string. * @param mr Device memory resource used to allocate the returned result's device memory. * @return A table of columns of strings. */ -std::unique_ptr
rsplit_re( +[[deprecated]] std::unique_ptr
rsplit_re( strings_column_view const& input, - regex_program const& prog, + std::string_view pattern, size_type maxsplit = -1, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Splits strings elements into a list column of strings - * using the given regex pattern to delimit each string. + * using the given regex_program to delimit each string * * Each element generates an array of strings that are stored in an output * lists column -- `list[row] = [token1, token2, ...] found in input[row]` @@ -255,15 +261,19 @@ std::unique_ptr
rsplit_re( * An empty input string will produce a corresponding empty list item output row. * A null row will produce a corresponding null output row. * + * The regex_program's regex_flags are ignored. + * * @code{.pseudo} * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "] - * s1 = split_record_re(s, "[_ ]") + * p1 = regex_program::create("[_ ]") + * s1 = split_record_re(s, p1) * s1 is a lists column of strings: * [ ["a", "bc", "def", "g"], * ["a", "", "bc"], * ["", "ab", "cd"], * ["ab", "cd", ""] ] - * s2 = split_record_re(s, "[ _]", 1) + * p2 = regex_program::create("[ _]") + * s2 = split_record_re(s, p2, 1) * s2 is a lists column of strings: * [ ["a", "bc def_g"], * ["a", "_bc"], @@ -275,22 +285,22 @@ std::unique_ptr
rsplit_re( * * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. * - * @param input A column of string elements to be split. - * @param pattern The regex pattern for delimiting characters within each string. + * @param input A column of string elements to be split + * @param prog Regex program instance * @param maxsplit Maximum number of splits to perform. * Default of -1 indicates all possible splits on each string. - * @param mr Device memory resource used to allocate the returned result's device memory. + * @param mr Device memory resource used to allocate the returned result's device memory * @return Lists column of strings. */ std::unique_ptr split_record_re( strings_column_view const& input, - std::string_view pattern, + regex_program const& prog, size_type maxsplit = -1, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Splits strings elements into a list column of strings - * using the given regex_program to delimit each string + * using the given regex pattern to delimit each string. * * Each element generates an array of strings that are stored in an output * lists column -- `list[row] = [token1, token2, ...] found in input[row]` @@ -307,19 +317,15 @@ std::unique_ptr split_record_re( * An empty input string will produce a corresponding empty list item output row. * A null row will produce a corresponding null output row. * - * The regex_program's regex_flags are ignored. - * * @code{.pseudo} * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "] - * p1 = regex_program::create("[_ ]") - * s1 = split_record_re(s, p1) + * s1 = split_record_re(s, "[_ ]") * s1 is a lists column of strings: * [ ["a", "bc", "def", "g"], * ["a", "", "bc"], * ["", "ab", "cd"], * ["ab", "cd", ""] ] - * p2 = regex_program::create("[ _]") - * s2 = split_record_re(s, p2, 1) + * s2 = split_record_re(s, "[ _]", 1) * s2 is a lists column of strings: * [ ["a", "bc def_g"], * ["a", "_bc"], @@ -327,26 +333,29 @@ std::unique_ptr split_record_re( * ["ab", "cd "] ] * @endcode * - * @throw cudf::logic_error if `pattern` is empty. - * * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. * - * @param input A column of string elements to be split - * @param prog Regex program instance + * @deprecated Use @link split_record_re split_record_re(strings_column_view const&, + * regex_program const&, size_type, rmm::mr::device_memory_resource*) @endlink + * + * @throw cudf::logic_error if `pattern` is empty. + * + * @param input A column of string elements to be split. + * @param pattern The regex pattern for delimiting characters within each string. * @param maxsplit Maximum number of splits to perform. * Default of -1 indicates all possible splits on each string. - * @param mr Device memory resource used to allocate the returned result's device memory + * @param mr Device memory resource used to allocate the returned result's device memory. * @return Lists column of strings. */ -std::unique_ptr split_record_re( +[[deprecated]] std::unique_ptr split_record_re( strings_column_view const& input, - regex_program const& prog, + std::string_view pattern, size_type maxsplit = -1, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Splits strings elements into a list column of strings - * using the given regex pattern to delimit each string starting from the end of the string. + * @brief Splits strings elements into a list column of strings using the given + * regex_program to delimit each string starting from the end of the string * * Each element generates a vector of strings that are stored in an output * lists column -- `list[row] = [token1, token2, ...] found in input[row]` @@ -365,15 +374,19 @@ std::unique_ptr split_record_re( * An empty input string will produce a corresponding empty list item output row. * A null row will produce a corresponding null output row. * + * The regex_program's regex_flags are ignored. + * * @code{.pseudo} * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "] - * s1 = rsplit_record_re(s, "[_ ]") + * p1 = regex_program::create("[_ ]") + * s1 = rsplit_record_re(s, p1) * s1 is a lists column of strings: * [ ["a", "bc", "def", "g"], * ["a", "", "bc"], * ["", "ab", "cd"], * ["ab", "cd", ""] ] - * s2 = rsplit_record_re(s, "[ _]", 1) + * p2 = regex_program::create("[ _]") + * s2 = rsplit_record_re(s, p2, 1) * s2 is a lists column of strings: * [ ["a_bc def", "g"], * ["a_", "bc"], @@ -385,22 +398,22 @@ std::unique_ptr split_record_re( * * @throw cudf::logic_error if `pattern` is empty. * - * @param input A column of string elements to be split. - * @param pattern The regex pattern for delimiting characters within each string. + * @param input A column of string elements to be split + * @param prog Regex program instance * @param maxsplit Maximum number of splits to perform. * Default of -1 indicates all possible splits on each string. - * @param mr Device memory resource used to allocate the returned result's device memory. - * @return Lists column of strings. + * @param mr Device memory resource used to allocate the returned result's device memory + * @return Lists column of strings */ std::unique_ptr rsplit_record_re( strings_column_view const& input, - std::string_view pattern, + regex_program const& prog, size_type maxsplit = -1, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Splits strings elements into a list column of strings using the given - * regex_program to delimit each string starting from the end of the string + * @brief Splits strings elements into a list column of strings + * using the given regex pattern to delimit each string starting from the end of the string. * * Each element generates a vector of strings that are stored in an output * lists column -- `list[row] = [token1, token2, ...] found in input[row]` @@ -419,19 +432,15 @@ std::unique_ptr rsplit_record_re( * An empty input string will produce a corresponding empty list item output row. * A null row will produce a corresponding null output row. * - * The regex_program's regex_flags are ignored. - * * @code{.pseudo} * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "] - * p1 = regex_program::create("[_ ]") - * s1 = rsplit_record_re(s, p1) + * s1 = rsplit_record_re(s, "[_ ]") * s1 is a lists column of strings: * [ ["a", "bc", "def", "g"], * ["a", "", "bc"], * ["", "ab", "cd"], * ["ab", "cd", ""] ] - * p2 = regex_program::create("[ _]") - * s2 = rsplit_record_re(s, p2, 1) + * s2 = rsplit_record_re(s, "[ _]", 1) * s2 is a lists column of strings: * [ ["a_bc def", "g"], * ["a_", "bc"], @@ -441,18 +450,21 @@ std::unique_ptr rsplit_record_re( * * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. * + * @deprecated Use @link rsplit_record_re rsplit_record_re(strings_column_view const&, + * regex_program const&, size_type, rmm::mr::device_memory_resource*) @endlink + * * @throw cudf::logic_error if `pattern` is empty. * - * @param input A column of string elements to be split - * @param prog Regex program instance + * @param input A column of string elements to be split. + * @param pattern The regex pattern for delimiting characters within each string. * @param maxsplit Maximum number of splits to perform. * Default of -1 indicates all possible splits on each string. - * @param mr Device memory resource used to allocate the returned result's device memory - * @return Lists column of strings + * @param mr Device memory resource used to allocate the returned result's device memory. + * @return Lists column of strings. */ -std::unique_ptr rsplit_record_re( +[[deprecated]] std::unique_ptr rsplit_record_re( strings_column_view const& input, - regex_program const& prog, + std::string_view pattern, size_type maxsplit = -1, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index f9ffbfcdf7b..58f20adb923 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -487,7 +487,8 @@ class device_row_comparator { // element_index because either both rows have a deeply nested NULL at the // same position, and we'll "continue" in our iteration, or we will early // exit if only one of the rows has a deeply nested NULL - if (lcol.nullable() and l_def_levels[l_dremel_index] == l_max_def_level - 1) { + if ((lcol.nullable() and l_def_levels[l_dremel_index] == l_max_def_level - 1) or + (rcol.nullable() and r_def_levels[r_dremel_index] == r_max_def_level - 1)) { ++element_index; } if (l_def_level == r_def_level) { continue; } @@ -755,9 +756,8 @@ struct preprocessed_table { /** * @brief Get a device array containing the desired order of each column in the preprocessed table * - * @return std::optional> Device array containing respective column - * orders. If no explicit column orders were specified during the creation of this object then - * this will be `nullopt`. + * @return Device array containing respective column orders. If no explicit column orders were + * specified during the creation of this object then this will be `nullopt`. */ [[nodiscard]] std::optional> column_order() const { @@ -769,9 +769,8 @@ struct preprocessed_table { * @brief Get a device array containing the desired null precedence of each column in the * preprocessed table * - * @return std::optional> Device array containing respective column - * null precedence. If no explicit column null precedences were specified during the creation of - * this object then this will be `nullopt`. + * @return Device array containing respective column null precedence. If no explicit column null + * precedences were specified during the creation of this object then this will be `nullopt`. */ [[nodiscard]] std::optional> null_precedence() const { diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index 969bec84716..412fe17ef26 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -82,7 +82,7 @@ std::pair, size_type> nans_to_nulls( * @param table The table used for expression evaluation * @param expr The root of the expression tree * @param mr Device memory resource - * @return std::unique_ptr Output column + * @return Output column */ std::unique_ptr compute_column( table_view const& table, diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index 8a1e4c9aee7..3bc1f9d6da7 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,7 +89,7 @@ using thread_index_type = int64_t; ///< Thread index type in kernels * @tparam T Iterator type * @param f "first" iterator * @param l "last" iterator - * @return size_type The distance between first and last + * @return The distance between first and last */ template size_type distance(T f, T l) diff --git a/cpp/include/cudf/utilities/error.hpp b/cpp/include/cudf/utilities/error.hpp index 38ca0f2651e..f70ef4e5f07 100644 --- a/cpp/include/cudf/utilities/error.hpp +++ b/cpp/include/cudf/utilities/error.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -84,6 +84,29 @@ struct cuda_error : public std::runtime_error { struct fatal_cuda_error : public cuda_error { using cuda_error::cuda_error; // Inherit constructors }; + +/** + * @brief Exception thrown when an operation is attempted on an unsupported dtype. + * + * This exception should be thrown when an operation is attempted on an + * unsupported data_type. This exception should not be thrown directly and is + * instead thrown by the CUDF_EXPECTS or CUDF_FAIL macros. + */ +struct data_type_error : public std::invalid_argument { + /** + * @brief Constructs a data_type_error with the error message. + * + * @param message Message to be associated with the exception + */ + data_type_error(char const* const message) : std::invalid_argument(message) {} + + /** + * @brief Construct a new data_type_error object with error message + * + * @param message Message to be associated with the exception + */ + data_type_error(std::string const& message) : std::invalid_argument(message) {} +}; /** @} */ } // namespace cudf diff --git a/cpp/include/doxygen_groups.h b/cpp/include/doxygen_groups.h index b1d56f43057..628d48f64cd 100644 --- a/cpp/include/doxygen_groups.h +++ b/cpp/include/doxygen_groups.h @@ -128,6 +128,7 @@ * @defgroup strings_modify Modifying * @defgroup strings_replace Replacing * @defgroup strings_split Splitting + * @defgroup strings_extract Extracting * @defgroup strings_json JSON * @defgroup strings_regex Regex * @} diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index b23c1fc9fe1..f81f0dcc311 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -203,7 +203,7 @@ std::unique_ptr binary_operation(LhsType const& lhs, return cudf::binops::compiled::string_null_min_max(lhs, rhs, op, output_type, stream, mr); if (not cudf::binops::compiled::is_supported_operation(output_type, lhs.type(), rhs.type(), op)) - CUDF_FAIL("Unsupported operator for these types"); + CUDF_FAIL("Unsupported operator for these types", cudf::data_type_error); if (cudf::is_fixed_point(lhs.type()) or cudf::is_fixed_point(rhs.type())) { cudf::binops::compiled::fixed_point_binary_operation_validation( diff --git a/cpp/src/binaryop/compiled/equality_ops.cu b/cpp/src/binaryop/compiled/equality_ops.cu index 61f02252a26..041fca76494 100644 --- a/cpp/src/binaryop/compiled/equality_ops.cu +++ b/cpp/src/binaryop/compiled/equality_ops.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,8 @@ void dispatch_equality_op(mutable_column_view& out, rmm::cuda_stream_view stream) { CUDF_EXPECTS(op == binary_operator::EQUAL || op == binary_operator::NOT_EQUAL, - "Unsupported operator for these types"); + "Unsupported operator for these types", + cudf::data_type_error); auto common_dtype = get_common_type(out.type(), lhs.type(), rhs.type()); auto outd = mutable_column_device_view::create(out, stream); auto lhsd = column_device_view::create(lhs, stream); diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh index d167f0fe3c5..8418493318f 100644 --- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh +++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh @@ -149,7 +149,8 @@ void apply_struct_equality_op(mutable_column_view& out, { CUDF_EXPECTS(op == binary_operator::EQUAL || op == binary_operator::NOT_EQUAL || op == binary_operator::NULL_EQUALS, - "Unsupported operator for these types"); + "Unsupported operator for these types", + cudf::data_type_error); auto tlhs = table_view{{lhs}}; auto trhs = table_view{{rhs}}; diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index 577d6427b19..5d36d70696c 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -228,7 +228,8 @@ std::unique_ptr fused_concatenate(host_span views, auto const output_size = std::get<3>(device_views); CUDF_EXPECTS(output_size <= static_cast(std::numeric_limits::max()), - "Total number of concatenated rows exceeds size_type range"); + "Total number of concatenated rows exceeds size_type range", + std::overflow_error); // Allocate output auto const policy = has_nulls ? mask_policy::ALWAYS : mask_policy::NEVER; @@ -398,7 +399,8 @@ void traverse_children::operator()(host_span(std::numeric_limits::max()), - "Total number of concatenated chars exceeds size_type range"); + "Total number of concatenated chars exceeds size_type range", + std::overflow_error); } template <> @@ -469,7 +471,8 @@ void bounds_and_type_check(host_span cols, rmm::cuda_stream_v }); // note: output text must include "exceeds size_type range" for python error handling CUDF_EXPECTS(total_row_count <= static_cast(std::numeric_limits::max()), - "Total number of concatenated rows exceeds size_type range"); + "Total number of concatenated rows exceeds size_type range", + std::overflow_error); // traverse children cudf::type_dispatcher(cols.front().type(), traverse_children{}, cols, stream); diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index 0e90848af3a..1979108eaa2 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index 479ce166724..9ebac957e8f 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/src/groupby/sort/group_std.cu b/cpp/src/groupby/sort/group_std.cu index 87fd9f7e843..a3efc1f172a 100644 --- a/cpp/src/groupby/sort/group_std.cu +++ b/cpp/src/groupby/sort/group_std.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -99,9 +99,6 @@ struct var_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { -// Running this in debug build causes a runtime error: -// `reduce_by_key failed on 2nd step: invalid device function` -#if !defined(__CUDACC_DEBUG__) using ResultType = cudf::detail::target_type_t; std::unique_ptr result = make_numeric_column(data_type(type_to_id()), @@ -141,9 +138,6 @@ struct var_functor { }); return result; -#else - CUDF_FAIL("Groupby std/var supported in debug build"); -#endif } template diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu index b53955472b1..ebafcd75e6d 100644 --- a/cpp/src/groupby/sort/sort_helper.cu +++ b/cpp/src/groupby/sort/sort_helper.cu @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -61,8 +60,6 @@ sort_groupby_helper::sort_groupby_helper(table_view const& keys, _include_null_keys(include_null_keys), _null_precedence(null_precedence) { - using namespace cudf::structs::detail; - // Cannot depend on caller's sorting if the column contains nulls, // and null values are to be excluded. // Re-sort the data, to filter out nulls more easily. diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu index a7ae4d3bdd1..b4bcb5548de 100644 --- a/cpp/src/io/json/write_json.cu +++ b/cpp/src/io/json/write_json.cu @@ -240,6 +240,7 @@ std::unique_ptr struct_to_strings(table_view const& strings_columns, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + CUDF_FUNC_RANGE(); CUDF_EXPECTS(column_names.type().id() == type_id::STRING, "Column names must be of type string"); auto const num_columns = strings_columns.num_columns(); CUDF_EXPECTS(num_columns == column_names.size(), @@ -481,6 +482,7 @@ struct column_to_strings_fn { column_iterator column_end, host_span children_names) const { + CUDF_FUNC_RANGE(); auto const num_columns = std::distance(column_begin, column_end); auto column_names = make_column_names_column(children_names, num_columns, stream_); auto column_names_view = column_names->view(); @@ -590,6 +592,7 @@ void write_chunked(data_sink* out_sink, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + CUDF_FUNC_RANGE(); CUDF_EXPECTS(str_column_view.size() > 0, "Unexpected empty strings column."); auto p_str_col_w_nl = cudf::strings::detail::join_strings(str_column_view, @@ -620,6 +623,7 @@ void write_json(data_sink* out_sink, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + CUDF_FUNC_RANGE(); std::vector user_column_names = [&]() { auto const& metadata = options.get_metadata(); if (metadata.has_value() and not metadata->schema_info.empty()) { diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index a6effeefc6c..8d85b001817 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -2246,6 +2246,10 @@ void writer::impl::write(table_view const& table) enc_data.streams, comp_results, stream); + + // deallocate encoded data as it is not needed anymore + enc_data.data = rmm::device_uvector{0, stream}; + strm_descs.device_to_host(stream); comp_results.device_to_host(stream, true); } diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index ee115e7432a..25b9f7fd285 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -328,7 +328,9 @@ __device__ void gpuDecodeStream( * @param[in] t Warp1 thread ID (0..31) * * @return A pair containing the new output position, and the total length of strings decoded (this - * will only be valid on thread 0 and if sizes_only is true) + * will only be valid on thread 0 and if sizes_only is true). In the event that this function + * decodes strings beyond target_pos, the total length of strings returned will include these + * additional values. */ template __device__ cuda::std::pair gpuDecodeDictionaryIndices(volatile page_state_s* s, @@ -415,13 +417,9 @@ __device__ cuda::std::pair gpuDecodeDictionaryIndices(volatile page_st // if we're computing sizes, add the length(s) if constexpr (sizes_only) { int const len = [&]() { - if (t >= batch_len) { return 0; } - // we may end up decoding more indices than we asked for. so don't include those in the - // size calculation - if (pos + t >= target_pos) { return 0; } - // TODO: refactor this with gpuGetStringData / gpuGetStringSize + if (t >= batch_len || (pos + t >= target_pos)) { return 0; } uint32_t const dict_pos = (s->dict_bits > 0) ? dict_idx * sizeof(string_index_pair) : 0; - if (target_pos && dict_pos < (uint32_t)s->dict_size) { + if (dict_pos < (uint32_t)s->dict_size) { const auto* src = reinterpret_cast(s->dict_base + dict_pos); return src->second; } @@ -512,6 +510,7 @@ __device__ int gpuDecodeRleBooleans(volatile page_state_s* s, int target_pos, in * * @return Total length of strings processed */ +template __device__ size_type gpuInitStringDescriptors(volatile page_state_s* s, int target_pos, int t) { int pos = s->dict_pos; @@ -532,8 +531,10 @@ __device__ size_type gpuInitStringDescriptors(volatile page_state_s* s, int targ } else { len = 0; } - s->dict_idx[rolling_index(pos)] = k; - s->str_len[rolling_index(pos)] = len; + if constexpr (!sizes_only) { + s->dict_idx[rolling_index(pos)] = k; + s->str_len[rolling_index(pos)] = len; + } k += len; total_len += len; pos++; @@ -1167,6 +1168,8 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s, s->dict_bits = 0; s->dict_base = nullptr; s->dict_size = 0; + // NOTE: if additional encodings are supported in the future, modifications must + // be made to is_supported_encoding() in reader_impl_preprocess.cu switch (s->page.encoding) { case Encoding::PLAIN_DICTIONARY: case Encoding::RLE_DICTIONARY: @@ -1603,6 +1606,7 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s, uint32_t const warp_row_count_mask = ballot(is_new_row); int const is_new_leaf = (d >= s->nesting_info[max_depth - 1].max_def_level) ? 1 : 0; uint32_t const warp_leaf_count_mask = ballot(is_new_leaf); + // is this thread within row bounds? on the first pass we don't know the bounds, so we will be // computing the full size of the column. on the second pass, we will know our actual row // bounds, so the computation will cap sizes properly. @@ -1654,18 +1658,27 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s, } } -__device__ size_type gpuGetStringSize(page_state_s* s, int target_count, int t) +/** + * @brief Returns the total size in bytes of string char data in the page. + * + * This function expects the dictionary position to be at 0 and will traverse + * the entire thing. + * + * @param s The local page info + * @param t Thread index + */ +__device__ size_type gpuDecodeTotalPageStringSize(page_state_s* s, int t) { - auto dict_target_pos = target_count; + size_type target_pos = s->num_input_values; size_type str_len = 0; if (s->dict_base) { - auto const [new_target_pos, len] = gpuDecodeDictionaryIndices(s, target_count, t); - dict_target_pos = new_target_pos; + auto const [new_target_pos, len] = gpuDecodeDictionaryIndices(s, target_pos, t); + target_pos = new_target_pos; str_len = len; } else if ((s->col.data_type & 7) == BYTE_ARRAY) { - str_len = gpuInitStringDescriptors(s, target_count, t); + str_len = gpuInitStringDescriptors(s, target_pos, t); } - if (!t) { *(volatile int32_t*)&s->dict_pos = dict_target_pos; } + if (!t) { *(volatile int32_t*)&s->dict_pos = target_pos; } return str_len; } @@ -1795,14 +1808,14 @@ __global__ void __launch_bounds__(block_size) // process what we got back gpuUpdatePageSizes(s, actual_input_count, t, !is_base_pass); - if (compute_string_sizes) { - auto const str_len = gpuGetStringSize(s, s->input_leaf_count, t); - if (!t) { s->page.str_bytes += str_len; } - } - target_input_count = actual_input_count + batch_size; __syncwarp(); } + + // retrieve total string size. + // TODO: investigate if it is possible to do this with a separate warp at the same time levels + // are being decoded above. + if (compute_string_sizes) { s->page.str_bytes = gpuDecodeTotalPageStringSize(s, t); } } // update output results: @@ -1913,7 +1926,7 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData( } else if ((s->col.data_type & 7) == BOOLEAN) { src_target_pos = gpuDecodeRleBooleans(s, src_target_pos, t & 0x1f); } else if ((s->col.data_type & 7) == BYTE_ARRAY) { - gpuInitStringDescriptors(s, src_target_pos, t & 0x1f); + gpuInitStringDescriptors(s, src_target_pos, t & 0x1f); } if (t == 32) { *(volatile int32_t*)&s->dict_pos = src_target_pos; } } else { diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 9f8f42702cd..5a12acec2a3 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -126,129 +126,164 @@ constexpr size_t underflow_safe_subtract(size_t a, size_t b) return a - b; } +void __device__ init_frag_state(frag_init_state_s* const s, + uint32_t fragment_size, + int part_end_row) +{ + // frag.num_rows = fragment_size except for the last fragment in partition which can be + // smaller. num_rows is fixed but fragment size could be larger if the data is strings or + // nested. + s->frag.num_rows = min(fragment_size, part_end_row - s->frag.start_row); + s->frag.num_dict_vals = 0; + s->frag.fragment_data_size = 0; + s->frag.dict_data_size = 0; + + s->frag.start_value_idx = row_to_value_idx(s->frag.start_row, s->col); + auto const end_value_idx = row_to_value_idx(s->frag.start_row + s->frag.num_rows, s->col); + s->frag.num_leaf_values = end_value_idx - s->frag.start_value_idx; + + if (s->col.level_offsets != nullptr) { + // For nested schemas, the number of values in a fragment is not directly related to the + // number of encoded data elements or the number of rows. It is simply the number of + // repetition/definition values which together encode validity and nesting information. + auto const first_level_val_idx = s->col.level_offsets[s->frag.start_row]; + auto const last_level_val_idx = s->col.level_offsets[s->frag.start_row + s->frag.num_rows]; + s->frag.num_values = last_level_val_idx - first_level_val_idx; + } else { + s->frag.num_values = s->frag.num_rows; + } +} + +template +void __device__ calculate_frag_size(frag_init_state_s* const s, int t) +{ + using block_reduce = cub::BlockReduce; + __shared__ typename block_reduce::TempStorage reduce_storage; + + auto const physical_type = s->col.physical_type; + auto const leaf_type = s->col.leaf_column->type().id(); + auto const dtype_len = physical_type_len(physical_type, leaf_type); + auto const nvals = s->frag.num_leaf_values; + auto const start_value_idx = s->frag.start_value_idx; + + for (uint32_t i = 0; i < nvals; i += block_size) { + auto const val_idx = start_value_idx + i + t; + auto const is_valid = i + t < nvals && val_idx < s->col.leaf_column->size() && + s->col.leaf_column->is_valid(val_idx); + uint32_t len; + if (is_valid) { + len = dtype_len; + if (physical_type == BYTE_ARRAY) { + switch (leaf_type) { + case type_id::STRING: { + auto str = s->col.leaf_column->element(val_idx); + len += str.size_bytes(); + } break; + case type_id::LIST: { + auto list_element = + get_element(*s->col.leaf_column, val_idx); + len += list_element.size_bytes(); + } break; + default: CUDF_UNREACHABLE("Unsupported data type for leaf column"); + } + } + } else { + len = 0; + } + + len = block_reduce(reduce_storage).Sum(len); + if (t == 0) { s->frag.fragment_data_size += len; } + __syncthreads(); + // page fragment size must fit in a 32-bit signed integer + if (s->frag.fragment_data_size > std::numeric_limits::max()) { + CUDF_UNREACHABLE("page fragment size exceeds maximum for i32"); + } + } +} + } // anonymous namespace // blockDim {512,1,1} template __global__ void __launch_bounds__(block_size) - gpuInitPageFragments(device_2dspan frag, - device_span col_desc, - device_span partitions, - device_span part_frag_offset, - uint32_t fragment_size) + gpuInitRowGroupFragments(device_2dspan frag, + device_span col_desc, + device_span partitions, + device_span part_frag_offset, + uint32_t fragment_size) { __shared__ __align__(16) frag_init_state_s state_g; - using block_reduce = cub::BlockReduce; - __shared__ typename block_reduce::TempStorage reduce_storage; - frag_init_state_s* const s = &state_g; uint32_t const t = threadIdx.x; - auto const physical_type = col_desc[blockIdx.x].physical_type; uint32_t const num_fragments_per_column = frag.size().second; if (t == 0) { s->col = col_desc[blockIdx.x]; } __syncthreads(); - auto const leaf_type = s->col.leaf_column->type().id(); - auto const dtype_len = physical_type_len(physical_type, leaf_type); - for (uint32_t frag_y = blockIdx.y; frag_y < num_fragments_per_column; frag_y += gridDim.y) { if (t == 0) { // Find which partition this fragment came from auto it = thrust::upper_bound(thrust::seq, part_frag_offset.begin(), part_frag_offset.end(), frag_y); - int p = it - part_frag_offset.begin() - 1; - int part_end_row = partitions[p].start_row + partitions[p].num_rows; + int const p = it - part_frag_offset.begin() - 1; + int const part_end_row = partitions[p].start_row + partitions[p].num_rows; s->frag.start_row = (frag_y - part_frag_offset[p]) * fragment_size + partitions[p].start_row; - - // frag.num_rows = fragment_size except for the last fragment in partition which can be - // smaller. num_rows is fixed but fragment size could be larger if the data is strings or - // nested. - s->frag.num_rows = min(fragment_size, part_end_row - s->frag.start_row); - s->frag.num_dict_vals = 0; - s->frag.fragment_data_size = 0; - s->frag.dict_data_size = 0; - - s->frag.start_value_idx = row_to_value_idx(s->frag.start_row, s->col); - size_type end_value_idx = row_to_value_idx(s->frag.start_row + s->frag.num_rows, s->col); - s->frag.num_leaf_values = end_value_idx - s->frag.start_value_idx; - - if (s->col.level_offsets != nullptr) { - // For nested schemas, the number of values in a fragment is not directly related to the - // number of encoded data elements or the number of rows. It is simply the number of - // repetition/definition values which together encode validity and nesting information. - size_type first_level_val_idx = s->col.level_offsets[s->frag.start_row]; - size_type last_level_val_idx = s->col.level_offsets[s->frag.start_row + s->frag.num_rows]; - s->frag.num_values = last_level_val_idx - first_level_val_idx; - } else { - s->frag.num_values = s->frag.num_rows; - } + s->frag.chunk = frag[blockIdx.x][frag_y].chunk; + init_frag_state(s, fragment_size, part_end_row); } __syncthreads(); - size_type nvals = s->frag.num_leaf_values; - size_type start_value_idx = s->frag.start_value_idx; - - for (uint32_t i = 0; i < nvals; i += block_size) { - uint32_t val_idx = start_value_idx + i + t; - uint32_t is_valid = (i + t < nvals && val_idx < s->col.leaf_column->size()) - ? s->col.leaf_column->is_valid(val_idx) - : 0; - uint32_t len; - if (is_valid) { - len = dtype_len; - if (physical_type == BYTE_ARRAY) { - switch (leaf_type) { - case type_id::STRING: { - auto str = s->col.leaf_column->element(val_idx); - len += str.size_bytes(); - } break; - case type_id::LIST: { - auto list_element = - get_element(*s->col.leaf_column, val_idx); - len += list_element.size_bytes(); - } break; - default: CUDF_UNREACHABLE("Unsupported data type for leaf column"); - } - } - } else { - len = 0; - } - - len = block_reduce(reduce_storage).Sum(len); - if (t == 0) { s->frag.fragment_data_size += len; } - __syncthreads(); - // page fragment size must fit in a 32-bit signed integer - if (s->frag.fragment_data_size > std::numeric_limits::max()) { - CUDF_UNREACHABLE("page fragment size exceeds maximum for i32"); - } - } + calculate_frag_size(s, t); __syncthreads(); if (t == 0) { frag[blockIdx.x][frag_y] = s->frag; } } } +// blockDim {512,1,1} +template +__global__ void __launch_bounds__(block_size) + gpuCalculatePageFragments(device_span frag, + device_span column_frag_sizes) +{ + __shared__ __align__(16) frag_init_state_s state_g; + + EncColumnChunk* const ck_g = frag[blockIdx.x].chunk; + frag_init_state_s* const s = &state_g; + uint32_t const t = threadIdx.x; + auto const fragment_size = column_frag_sizes[ck_g->col_desc_id]; + + if (t == 0) { s->col = *ck_g->col_desc; } + __syncthreads(); + + if (t == 0) { + int const part_end_row = ck_g->start_row + ck_g->num_rows; + s->frag.start_row = ck_g->start_row + (blockIdx.x - ck_g->first_fragment) * fragment_size; + s->frag.chunk = ck_g; + init_frag_state(s, fragment_size, part_end_row); + } + __syncthreads(); + + calculate_frag_size(s, t); + if (t == 0) { frag[blockIdx.x] = s->frag; } +} + // blockDim {128,1,1} __global__ void __launch_bounds__(128) - gpuInitFragmentStats(device_2dspan groups, - device_2dspan fragments, - device_span col_desc) + gpuInitFragmentStats(device_span groups, + device_span fragments) { - uint32_t const lane_id = threadIdx.x & WARP_MASK; - uint32_t const column_id = blockIdx.x; - uint32_t const num_fragments_per_column = fragments.size().second; - - uint32_t frag_id = blockIdx.y * 4 + (threadIdx.x / cudf::detail::warp_size); - while (frag_id < num_fragments_per_column) { + uint32_t const lane_id = threadIdx.x & WARP_MASK; + uint32_t const frag_id = blockIdx.x * 4 + (threadIdx.x / cudf::detail::warp_size); + if (frag_id < fragments.size()) { if (lane_id == 0) { statistics_group g; - g.col = &col_desc[column_id]; - g.start_row = fragments[column_id][frag_id].start_value_idx; - g.num_rows = fragments[column_id][frag_id].num_leaf_values; - groups[column_id][frag_id] = g; + auto* const ck_g = fragments[frag_id].chunk; + g.col = ck_g->col_desc; + g.start_row = fragments[frag_id].start_value_idx; + g.num_rows = fragments[frag_id].num_leaf_values; + groups[frag_id] = g; } - frag_id += gridDim.y * 4; } } @@ -389,7 +424,7 @@ __global__ void __launch_bounds__(128) if (num_rows >= ck_g.num_rows || (values_in_page > 0 && (page_size + fragment_data_size > this_max_page_size)) || - rows_in_page >= max_page_size_rows) { + rows_in_page + frag_g.num_rows > max_page_size_rows) { if (ck_g.use_dictionary) { // Additional byte to store entry bit width page_size = 1 + max_RLE_page_size(ck_g.dict_rle_bits, values_in_page); @@ -2057,33 +2092,35 @@ __global__ void __launch_bounds__(1) ck_g->column_index_size = static_cast(col_idx_end - ck_g->column_index_blob); } -void InitPageFragments(device_2dspan frag, - device_span col_desc, - device_span partitions, - device_span part_frag_offset, - uint32_t fragment_size, - rmm::cuda_stream_view stream) +void InitRowGroupFragments(device_2dspan frag, + device_span col_desc, + device_span partitions, + device_span part_frag_offset, + uint32_t fragment_size, + rmm::cuda_stream_view stream) { auto const num_columns = frag.size().first; auto const num_fragments_per_column = frag.size().second; auto const grid_y = std::min(static_cast(num_fragments_per_column), MAX_GRID_Y_SIZE); dim3 const dim_grid(num_columns, grid_y); // 1 threadblock per fragment - gpuInitPageFragments<512><<>>( + gpuInitRowGroupFragments<512><<>>( frag, col_desc, partitions, part_frag_offset, fragment_size); } -void InitFragmentStatistics(device_2dspan groups, - device_2dspan fragments, - device_span col_desc, +void CalculatePageFragments(device_span frag, + device_span column_frag_sizes, + rmm::cuda_stream_view stream) +{ + gpuCalculatePageFragments<512><<>>(frag, column_frag_sizes); +} + +void InitFragmentStatistics(device_span groups, + device_span fragments, rmm::cuda_stream_view stream) { - int const num_columns = col_desc.size(); - int const num_fragments_per_column = fragments.size().second; - auto const y_dim = - util::div_rounding_up_safe(num_fragments_per_column, 128 / cudf::detail::warp_size); - auto const grid_y = std::min(static_cast(y_dim), MAX_GRID_Y_SIZE); - dim3 const dim_grid(num_columns, grid_y); // 1 warp per fragment - gpuInitFragmentStats<<>>(groups, fragments, col_desc); + int const num_fragments = fragments.size(); + int const dim = util::div_rounding_up_safe(num_fragments, 128 / cudf::detail::warp_size); + gpuInitFragmentStats<<>>(groups, fragments); } void InitEncoderPages(device_2dspan chunks, diff --git a/cpp/src/io/parquet/parquet_common.hpp b/cpp/src/io/parquet/parquet_common.hpp index d56f2fb08ca..ab6290c4ed6 100644 --- a/cpp/src/io/parquet/parquet_common.hpp +++ b/cpp/src/io/parquet/parquet_common.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -86,11 +86,12 @@ enum class Encoding : uint8_t { GROUP_VAR_INT = 1, // Deprecated, never used PLAIN_DICTIONARY = 2, RLE = 3, - BIT_PACKED = 4, + BIT_PACKED = 4, // Deprecated by parquet-format in 2013, superseded by RLE DELTA_BINARY_PACKED = 5, DELTA_LENGTH_BYTE_ARRAY = 6, DELTA_BYTE_ARRAY = 7, RLE_DICTIONARY = 8, + BYTE_STREAM_SPLIT = 9, }; /** diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 9b156745e41..c3d3843362a 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -445,8 +445,8 @@ void BuildStringDictionaryIndex(ColumnChunkDesc* chunks, * * @param pages All pages to be decoded * @param chunks All chunks to be decoded - * @param num_rows Maximum number of rows to read * @param min_rows crop all rows below min_row + * @param num_rows Maximum number of rows to read * @param compute_num_rows If set to true, the num_rows field in PageInfo will be * computed * @param compute_string_sizes If set to true, the str_bytes field in PageInfo will @@ -455,8 +455,8 @@ void BuildStringDictionaryIndex(ColumnChunkDesc* chunks, */ void ComputePageSizes(hostdevice_vector& pages, hostdevice_vector const& chunks, - size_t num_rows, size_t min_row, + size_t num_rows, bool compute_num_rows, bool compute_string_sizes, rmm::cuda_stream_view stream); @@ -480,8 +480,9 @@ void DecodePageData(hostdevice_vector& pages, rmm::cuda_stream_view stream); /** - * @brief Launches kernel for initializing encoder page fragments + * @brief Launches kernel for initializing encoder row group fragments * + * These fragments are used to calculate row group boundaries. * Based on the number of rows in each fragment, populates the value count, the size of data in the * fragment, the number of unique values, and the data size of unique values. * @@ -492,24 +493,38 @@ void DecodePageData(hostdevice_vector& pages, * @param[in] fragment_size Number of rows per fragment * @param[in] stream CUDA stream to use */ -void InitPageFragments(cudf::detail::device_2dspan frag, - device_span col_desc, - device_span partitions, - device_span first_frag_in_part, - uint32_t fragment_size, - rmm::cuda_stream_view stream); +void InitRowGroupFragments(cudf::detail::device_2dspan frag, + device_span col_desc, + device_span partitions, + device_span first_frag_in_part, + uint32_t fragment_size, + rmm::cuda_stream_view stream); + +/** + * @brief Launches kernel for calculating encoder page fragments with variable fragment sizes + * + * Based on the number of rows in each fragment, populates the value count, the size of data in the + * fragment, the number of unique values, and the data size of unique values. + * + * This assumes an initial call to InitRowGroupFragments has been made. + * + * @param[out] frag Fragment array [fragment_id] + * @param[in] column_frag_sizes Number of rows per fragment per column [column_id] + * @param[in] stream CUDA stream to use + */ +void CalculatePageFragments(device_span frag, + device_span column_frag_sizes, + rmm::cuda_stream_view stream); /** - * @brief Launches kernel for initializing fragment statistics groups + * @brief Launches kernel for initializing fragment statistics groups with variable fragment sizes * - * @param[out] groups Statistics groups [num_columns x num_fragments] - * @param[in] fragments Page fragments [num_columns x num_fragments] - * @param[in] col_desc Column description [num_columns] + * @param[out] groups Statistics groups [total_fragments] + * @param[in] fragments Page fragments [total_fragments] * @param[in] stream CUDA stream to use */ -void InitFragmentStatistics(cudf::detail::device_2dspan groups, - cudf::detail::device_2dspan fragments, - device_span col_desc, +void InitFragmentStatistics(device_span groups, + device_span fragments, rmm::cuda_stream_view stream); /** diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp index fcfea35f50c..8b86412ae63 100644 --- a/cpp/src/io/parquet/reader_impl.hpp +++ b/cpp/src/io/parquet/reader_impl.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -130,10 +130,21 @@ class reader::impl { bool uses_custom_row_bounds, host_span const> row_group_indices); + /** + * @brief Create chunk information and start file reads + * + * @param row_groups_info vector of information about row groups to read + * @param num_rows Maximum number of rows to read + * @return pair of boolean indicating if compressed chunks were found and a vector of futures for + * read completion + */ + std::pair>> create_and_read_column_chunks( + cudf::host_span const row_groups_info, size_type num_rows); + /** * @brief Load and decompress the input file(s) into memory. */ - void load_and_decompress_data(std::vector const& row_groups_info, + void load_and_decompress_data(cudf::host_span const row_groups_info, size_type num_rows); /** diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index b1d013a96a3..6b5d4ba3640 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -307,6 +307,18 @@ template return total_pages; } +// see setupLocalPageInfo() in page_data.cu for supported page encodings +constexpr bool is_supported_encoding(Encoding enc) +{ + switch (enc) { + case Encoding::PLAIN: + case Encoding::PLAIN_DICTIONARY: + case Encoding::RLE: + case Encoding::RLE_DICTIONARY: return true; + default: return false; + } +} + /** * @brief Decode the page information from the given column chunks. * @@ -329,6 +341,12 @@ void decode_page_headers(hostdevice_vector& chunks, chunks.host_to_device(stream); gpu::DecodePageHeaders(chunks.device_ptr(), chunks.size(), stream); pages.device_to_host(stream, true); + + // validate page encodings + CUDF_EXPECTS(std::all_of(pages.begin(), + pages.end(), + [](auto const& page) { return is_supported_encoding(page.encoding); }), + "Unsupported page encoding detected"); } /** @@ -651,16 +669,11 @@ void reader::impl::allocate_nesting_info() page_nesting_decode_info.host_to_device(_stream); } -void reader::impl::load_and_decompress_data(std::vector const& row_groups_info, - size_type num_rows) +std::pair>> reader::impl::create_and_read_column_chunks( + cudf::host_span const row_groups_info, size_type num_rows) { - // This function should never be called if `num_rows == 0`. - CUDF_EXPECTS(num_rows > 0, "Number of reading rows must not be zero."); - - auto& raw_page_data = _file_itm_data.raw_page_data; - auto& decomp_page_data = _file_itm_data.decomp_page_data; - auto& chunks = _file_itm_data.chunks; - auto& pages_info = _file_itm_data.pages_info; + auto& raw_page_data = _file_itm_data.raw_page_data; + auto& chunks = _file_itm_data.chunks; // Descriptors for all the chunks that make up the selected columns const auto num_input_columns = _input_columns.size(); @@ -732,7 +745,7 @@ void reader::impl::load_and_decompress_data(std::vector const& r total_decompressed_size += col_meta.total_uncompressed_size; } } - remaining_rows -= row_group.num_rows; + remaining_rows -= row_group_rows; } // Read compressed chunk data to device memory @@ -745,12 +758,29 @@ void reader::impl::load_and_decompress_data(std::vector const& r chunk_source_map, _stream)); + CUDF_EXPECTS(remaining_rows == 0, "All rows data must be read."); + + return {total_decompressed_size > 0, std::move(read_rowgroup_tasks)}; +} + +void reader::impl::load_and_decompress_data( + cudf::host_span const row_groups_info, size_type num_rows) +{ + // This function should never be called if `num_rows == 0`. + CUDF_EXPECTS(num_rows > 0, "Number of reading rows must not be zero."); + + auto& raw_page_data = _file_itm_data.raw_page_data; + auto& decomp_page_data = _file_itm_data.decomp_page_data; + auto& chunks = _file_itm_data.chunks; + auto& pages_info = _file_itm_data.pages_info; + + auto const [has_compressed_data, read_rowgroup_tasks] = + create_and_read_column_chunks(row_groups_info, num_rows); + for (auto& task : read_rowgroup_tasks) { task.wait(); } - CUDF_EXPECTS(remaining_rows <= 0, "All rows data must be read."); - // Process dataset chunk pages into output columns auto const total_pages = count_page_headers(chunks, _stream); pages_info = hostdevice_vector(total_pages, total_pages, _stream); @@ -758,14 +788,11 @@ void reader::impl::load_and_decompress_data(std::vector const& r if (total_pages > 0) { // decoding of column/page information decode_page_headers(chunks, pages_info, _stream); - if (total_decompressed_size > 0) { + if (has_compressed_data) { decomp_page_data = decompress_page_data(chunks, pages_info, _stream); // Free compressed data for (size_t c = 0; c < chunks.size(); c++) { - if (chunks[c].codec != parquet::Compression::UNCOMPRESSED) { - raw_page_data[c].reset(); - // TODO: Check if this is called - } + if (chunks[c].codec != parquet::Compression::UNCOMPRESSED) { raw_page_data[c].reset(); } } } @@ -807,13 +834,15 @@ void print_pages(hostdevice_vector& pages, rmm::cuda_stream_view // skip dictionary pages if (p.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) { continue; } printf( - "P(%lu, s:%d): chunk_row(%d), num_rows(%d), skipped_values(%d), skipped_leaf_values(%d)\n", + "P(%lu, s:%d): chunk_row(%d), num_rows(%d), skipped_values(%d), skipped_leaf_values(%d), " + "str_bytes(%d)\n", idx, p.src_col_schema, p.chunk_row, p.num_rows, p.skipped_values, - p.skipped_leaf_values); + p.skipped_leaf_values, + p.str_bytes); } } diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 13ec2d652a6..2c9bff33a14 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -38,6 +39,7 @@ #include #include #include +#include #include #include @@ -85,6 +87,44 @@ parquet::Compression to_parquet_compression(compression_type compression) } } +size_t column_size(column_view const& column, rmm::cuda_stream_view stream) +{ + if (column.size() == 0) { return 0; } + + if (is_fixed_width(column.type())) { + return size_of(column.type()) * column.size(); + } else if (column.type().id() == type_id::STRING) { + auto const scol = strings_column_view(column); + return cudf::detail::get_value(scol.offsets(), column.size(), stream) - + cudf::detail::get_value(scol.offsets(), 0, stream); + } else if (column.type().id() == type_id::STRUCT) { + auto const scol = structs_column_view(column); + size_t ret = 0; + for (int i = 0; i < scol.num_children(); i++) { + ret += column_size(scol.get_sliced_child(i), stream); + } + return ret; + } else if (column.type().id() == type_id::LIST) { + auto const lcol = lists_column_view(column); + return column_size(lcol.get_sliced_child(stream), stream); + } + + CUDF_FAIL("Unexpected compound type"); +} + +// checks to see if the given column has a fixed size. This doesn't +// check every row, so assumes string and list columns are not fixed, even +// if each row is the same width. +// TODO: update this if FIXED_LEN_BYTE_ARRAY is ever supported for writes. +bool is_col_fixed_width(column_view const& column) +{ + if (column.type().id() == type_id::STRUCT) { + return std::all_of(column.child_begin(), column.child_end(), is_col_fixed_width); + } + + return is_fixed_width(column.type()); +} + } // namespace struct aggregate_writer_metadata { @@ -886,34 +926,33 @@ gpu::parquet_column_device_view parquet_column_view::get_device_view( return desc; } -void writer::impl::init_page_fragments(cudf::detail::hostdevice_2dvector& frag, - device_span col_desc, - host_span partitions, - device_span part_frag_offset, - uint32_t fragment_size) +void writer::impl::init_row_group_fragments( + cudf::detail::hostdevice_2dvector& frag, + device_span col_desc, + host_span partitions, + device_span part_frag_offset, + uint32_t fragment_size) { auto d_partitions = cudf::detail::make_device_uvector_async(partitions, stream); - gpu::InitPageFragments(frag, col_desc, d_partitions, part_frag_offset, fragment_size, stream); + gpu::InitRowGroupFragments(frag, col_desc, d_partitions, part_frag_offset, fragment_size, stream); frag.device_to_host(stream, true); } -void writer::impl::gather_fragment_statistics( - device_2dspan frag_stats_chunk, - device_2dspan frag, - device_span col_desc, - uint32_t num_fragments) +void writer::impl::calculate_page_fragments(device_span frag, + host_span frag_sizes) { - auto num_columns = col_desc.size(); - rmm::device_uvector frag_stats_group(num_fragments * num_columns, stream); - auto frag_stats_group_2dview = - device_2dspan(frag_stats_group.data(), num_columns, num_fragments); - - gpu::InitFragmentStatistics(frag_stats_group_2dview, frag, col_desc, stream); - detail::calculate_group_statistics(frag_stats_chunk.data(), - frag_stats_group.data(), - num_fragments * num_columns, - stream, - int96_timestamps); + auto d_frag_sz = cudf::detail::make_device_uvector_async(frag_sizes, stream); + gpu::CalculatePageFragments(frag, d_frag_sz, stream); +} + +void writer::impl::gather_fragment_statistics(device_span frag_stats, + device_span frags) +{ + rmm::device_uvector frag_stats_group(frag_stats.size(), stream); + + gpu::InitFragmentStatistics(frag_stats_group, frags, stream); + detail::calculate_group_statistics( + frag_stats.data(), frag_stats_group.data(), frag_stats.size(), stream, int96_timestamps); stream.synchronize(); } @@ -1407,23 +1446,63 @@ void writer::impl::write(table_view const& table, std::vector co }); // Init page fragments - // 5000 is good enough for up to ~200-character strings. Longer strings will start producing - // fragments larger than the desired page size -> TODO: keep track of the max fragment size, and - // iteratively reduce this value if the largest fragment exceeds the max page size limit (we - // ideally want the page size to be below 1MB so as to have enough pages to get good - // compression/decompression performance). - // If using the default fragment size, scale it up or down depending on the requested page size. - if (max_page_fragment_size_ == cudf::io::default_max_page_fragment_size) { - max_page_fragment_size_ = (cudf::io::default_max_page_fragment_size * max_page_size_bytes) / - cudf::io::default_max_page_size_bytes; + // 5000 is good enough for up to ~200-character strings. Longer strings and deeply nested columns + // will start producing fragments larger than the desired page size, so calculate fragment sizes + // for each leaf column. Skip if the fragment size is not the default. + auto max_page_fragment_size = max_page_fragment_size_.value_or(default_max_page_fragment_size); + + std::vector column_frag_size(num_columns, max_page_fragment_size); + + if (table.num_rows() > 0 && not max_page_fragment_size_.has_value()) { + std::vector column_sizes; + std::transform(single_streams_table.begin(), + single_streams_table.end(), + std::back_inserter(column_sizes), + [this](auto const& column) { return column_size(column, stream); }); + + // adjust global fragment size if a single fragment will overrun a rowgroup + auto const table_size = std::reduce(column_sizes.begin(), column_sizes.end()); + auto const avg_row_len = util::div_rounding_up_safe(table_size, table.num_rows()); + if (avg_row_len > 0) { + auto const rg_frag_size = util::div_rounding_up_safe(max_row_group_size, avg_row_len); + max_page_fragment_size = std::min(rg_frag_size, max_page_fragment_size); + } + + // dividing page size by average row length will tend to overshoot the desired + // page size when there's high variability in the row lengths. instead, shoot + // for multiple fragments per page to smooth things out. using 2 was too + // unbalanced in final page sizes, so using 4 which seems to be a good + // compromise at smoothing things out without getting fragment sizes too small. + auto frag_size_fn = [&](auto const& col, size_type col_size) { + const int target_frags_per_page = is_col_fixed_width(col) ? 1 : 4; + auto const avg_len = + target_frags_per_page * util::div_rounding_up_safe(col_size, table.num_rows()); + if (avg_len > 0) { + auto const frag_size = util::div_rounding_up_safe(max_page_size_bytes, avg_len); + return std::min(max_page_fragment_size, frag_size); + } else { + return max_page_fragment_size; + } + }; + + std::transform(single_streams_table.begin(), + single_streams_table.end(), + column_sizes.begin(), + column_frag_size.begin(), + frag_size_fn); } + // Fragments are calculated in two passes. In the first pass, a uniform number of fragments + // per column is used. This is done to satisfy the requirement that each column chunk within + // a row group has the same number of rows. After the row group (and thus column chunk) + // boundaries are known, a second pass is done to calculate fragments to be used in determining + // page boundaries within each column chunk. std::vector num_frag_in_part; std::transform(partitions.begin(), partitions.end(), std::back_inserter(num_frag_in_part), - [this](auto const& part) { - return util::div_rounding_up_unsafe(part.num_rows, max_page_fragment_size_); + [this, max_page_fragment_size](auto const& part) { + return util::div_rounding_up_unsafe(part.num_rows, max_page_fragment_size); }); size_type num_fragments = std::reduce(num_frag_in_part.begin(), num_frag_in_part.end()); @@ -1434,7 +1513,7 @@ void writer::impl::write(table_view const& table, std::vector co part_frag_offset.push_back(part_frag_offset.back() + num_frag_in_part.back()); auto d_part_frag_offset = cudf::detail::make_device_uvector_async(part_frag_offset, stream); - cudf::detail::hostdevice_2dvector fragments( + cudf::detail::hostdevice_2dvector row_group_fragments( num_columns, num_fragments, stream); if (num_fragments != 0) { @@ -1443,8 +1522,8 @@ void writer::impl::write(table_view const& table, std::vector co leaf_column_views = create_leaf_column_device_views( col_desc, *parent_column_table_device_view, stream); - init_page_fragments( - fragments, col_desc, partitions, d_part_frag_offset, max_page_fragment_size_); + init_row_group_fragments( + row_group_fragments, col_desc, partitions, d_part_frag_offset, max_page_fragment_size); } std::vector const global_rowgroup_base = md->num_row_groups_per_file(); @@ -1461,9 +1540,9 @@ void writer::impl::write(table_view const& table, std::vector co for (auto f = first_frag_in_rg; f <= last_frag_in_part; ++f) { size_t fragment_data_size = 0; for (auto c = 0; c < num_columns; c++) { - fragment_data_size += fragments[c][f].fragment_data_size; + fragment_data_size += row_group_fragments[c][f].fragment_data_size; } - size_type fragment_num_rows = fragments[0][f].num_rows; + size_type fragment_num_rows = row_group_fragments[0][f].num_rows; // If the fragment size gets larger than rg limit then break off a rg if (f > first_frag_in_rg && // There has to be at least one fragment in row group @@ -1490,17 +1569,6 @@ void writer::impl::write(table_view const& table, std::vector co } } - // Allocate column chunks and gather fragment statistics - rmm::device_uvector frag_stats(0, stream); - if (stats_granularity_ != statistics_freq::STATISTICS_NONE) { - frag_stats.resize(num_fragments * num_columns, stream); - if (not frag_stats.is_empty()) { - auto frag_stats_2dview = - device_2dspan(frag_stats.data(), num_columns, num_fragments); - gather_fragment_statistics(frag_stats_2dview, fragments, col_desc, num_fragments); - } - } - std::vector first_rg_in_part; std::exclusive_scan( num_rg_in_part.begin(), num_rg_in_part.end(), std::back_inserter(first_rg_in_part), 0); @@ -1509,6 +1577,9 @@ void writer::impl::write(table_view const& table, std::vector co auto const num_chunks = num_rowgroups * num_columns; hostdevice_2dvector chunks(num_rowgroups, num_columns, stream); + // total fragments per column (in case they are non-uniform) + std::vector frags_per_column(num_columns, 0); + for (size_t p = 0; p < partitions.size(); ++p) { int f = part_frag_offset[p]; size_type start_row = partitions[p].start_row; @@ -1516,22 +1587,21 @@ void writer::impl::write(table_view const& table, std::vector co size_t global_r = global_rowgroup_base[p] + r; // Number of rowgroups already in file/part auto& row_group = md->file(p).row_groups[global_r]; uint32_t fragments_in_chunk = - util::div_rounding_up_unsafe(row_group.num_rows, max_page_fragment_size_); + util::div_rounding_up_unsafe(row_group.num_rows, max_page_fragment_size); row_group.total_byte_size = 0; row_group.columns.resize(num_columns); for (int c = 0; c < num_columns; c++) { gpu::EncColumnChunk& ck = chunks[r + first_rg_in_part[p]][c]; - ck = {}; - ck.col_desc = col_desc.device_ptr() + c; - ck.col_desc_id = c; - ck.fragments = &fragments.device_view()[c][f]; - ck.stats = - (not frag_stats.is_empty()) ? frag_stats.data() + c * num_fragments + f : nullptr; + ck = {}; + ck.col_desc = col_desc.device_ptr() + c; + ck.col_desc_id = c; + ck.fragments = &row_group_fragments.device_view()[c][f]; + ck.stats = nullptr; ck.start_row = start_row; ck.num_rows = (uint32_t)row_group.num_rows; ck.first_fragment = c * num_fragments + f; - auto chunk_fragments = fragments[c].subspan(f, fragments_in_chunk); + auto chunk_fragments = row_group_fragments[c].subspan(f, fragments_in_chunk); // In fragment struct, add a pointer to the chunk it belongs to // In each fragment in chunk_fragments, update the chunk pointer here. for (auto& frag : chunk_fragments) { @@ -1551,15 +1621,23 @@ void writer::impl::write(table_view const& table, std::vector co column_chunk_meta.path_in_schema = parquet_columns[c].get_path_in_schema(); column_chunk_meta.codec = UNCOMPRESSED; column_chunk_meta.num_values = ck.num_values; + + frags_per_column[c] += util::div_rounding_up_unsafe( + row_group.num_rows, std::min(column_frag_size[c], max_page_fragment_size)); } f += fragments_in_chunk; start_row += (uint32_t)row_group.num_rows; } } - fragments.host_to_device(stream); - auto dict_info_owner = build_chunk_dictionaries( - chunks, col_desc, fragments, compression_, dict_policy_, max_dictionary_size_, stream); + row_group_fragments.host_to_device(stream); + auto dict_info_owner = build_chunk_dictionaries(chunks, + col_desc, + row_group_fragments, + compression_, + dict_policy_, + max_dictionary_size_, + stream); for (size_t p = 0; p < partitions.size(); p++) { for (int rg = 0; rg < num_rg_in_part[p]; rg++) { size_t global_rg = global_rowgroup_base[p] + rg; @@ -1572,7 +1650,72 @@ void writer::impl::write(table_view const& table, std::vector co } } - // Build chunk dictionaries and count pages + // The code preceding this used a uniform fragment size for all columns. Now recompute + // fragments with a (potentially) varying number of fragments per column. + + // first figure out the total number of fragments and calculate the start offset for each column + std::vector frag_offsets; + size_type const total_frags = [&]() { + if (frags_per_column.size() > 0) { + std::exclusive_scan(frags_per_column.data(), + frags_per_column.data() + num_columns + 1, + std::back_inserter(frag_offsets), + 0); + return frag_offsets[num_columns]; + } else { + return 0; + } + }(); + + rmm::device_uvector frag_stats(0, stream); + hostdevice_vector page_fragments(total_frags, stream); + + // update fragments and/or prepare for fragment statistics calculation if necessary + if (total_frags != 0) { + if (stats_granularity_ != statistics_freq::STATISTICS_NONE) { + frag_stats.resize(total_frags, stream); + } + + for (int c = 0; c < num_columns; c++) { + auto frag_offset = frag_offsets[c]; + auto const frag_size = column_frag_size[c]; + + for (size_t p = 0; p < partitions.size(); ++p) { + for (int r = 0; r < num_rg_in_part[p]; r++) { + auto const global_r = global_rowgroup_base[p] + r; + auto const& row_group = md->file(p).row_groups[global_r]; + uint32_t const fragments_in_chunk = + util::div_rounding_up_unsafe(row_group.num_rows, frag_size); + gpu::EncColumnChunk& ck = chunks[r + first_rg_in_part[p]][c]; + ck.fragments = page_fragments.device_ptr(frag_offset); + ck.first_fragment = frag_offset; + + // update the chunk pointer here for each fragment in chunk.fragments + for (uint32_t i = 0; i < fragments_in_chunk; i++) { + page_fragments[frag_offset + i].chunk = + &chunks.device_view()[r + first_rg_in_part[p]][c]; + } + + if (not frag_stats.is_empty()) { ck.stats = frag_stats.data() + frag_offset; } + frag_offset += fragments_in_chunk; + } + } + } + + chunks.host_to_device(stream); + + // re-initialize page fragments + page_fragments.host_to_device(stream); + calculate_page_fragments(page_fragments, column_frag_size); + + // and gather fragment statistics + if (not frag_stats.is_empty()) { + gather_fragment_statistics(frag_stats, + {page_fragments.device_ptr(), static_cast(total_frags)}); + } + } + + // Build chunk dictionaries and count pages. Sends chunks to device. hostdevice_vector comp_page_sizes = init_page_sizes( chunks, col_desc, num_columns, max_page_size_bytes, max_page_size_rows, compression_, stream); diff --git a/cpp/src/io/parquet/writer_impl.hpp b/cpp/src/io/parquet/writer_impl.hpp index 3569281fb47..24c35455ff7 100644 --- a/cpp/src/io/parquet/writer_impl.hpp +++ b/cpp/src/io/parquet/writer_impl.hpp @@ -122,32 +122,42 @@ class writer::impl { private: /** - * @brief Gather page fragments + * @brief Gather row group fragments * - * @param frag Destination page fragments + * This calculates fragments to be used in determining row group boundariesa. + * + * @param frag Destination row group fragments * @param col_desc column description array * @param[in] partitions Information about partitioning of table * @param[in] part_frag_offset A Partition's offset into fragment array * @param fragment_size Number of rows per fragment */ - void init_page_fragments(hostdevice_2dvector& frag, - device_span col_desc, - host_span partitions, - device_span part_frag_offset, - uint32_t fragment_size); + void init_row_group_fragments(hostdevice_2dvector& frag, + device_span col_desc, + host_span partitions, + device_span part_frag_offset, + uint32_t fragment_size); + + /** + * @brief Recalculate page fragments + * + * This calculates fragments to be used to determine page boundaries within + * column chunks. + * + * @param frag Destination page fragments + * @param frag_sizes Array of fragment sizes for each column + */ + void calculate_page_fragments(device_span frag, + host_span frag_sizes); /** * @brief Gather per-fragment statistics * - * @param dst_stats output statistics - * @param frag Input page fragments - * @param col_desc column description array - * @param num_fragments Total number of fragments per column + * @param frag_stats output statistics + * @param frags Input page fragments */ - void gather_fragment_statistics(device_2dspan dst_stats, - device_2dspan frag, - device_span col_desc, - uint32_t num_fragments); + void gather_fragment_statistics(device_span frag_stats, + device_span frags); /** * @brief Initialize encoder pages @@ -220,9 +230,9 @@ class writer::impl { statistics_freq stats_granularity_ = statistics_freq::STATISTICS_NONE; dictionary_policy dict_policy_ = dictionary_policy::ALWAYS; size_t max_dictionary_size_ = default_max_dictionary_size; - size_type max_page_fragment_size_ = default_max_page_fragment_size; bool int96_timestamps = false; int32_t column_index_truncate_length = default_column_index_truncate_length; + std::optional max_page_fragment_size_; // Overall file metadata. Filled in during the process and written during write_chunked_end() std::unique_ptr md; // File footer key-value metadata. Written during write_chunked_end() diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index cce917a24de..7fb35e179e9 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -299,8 +299,8 @@ hash_join::hash_join(cudf::table_view const& build, // need to store off the owning structures for some of the views in _build _flattened_build_table = structs::detail::flatten_nested_columns( - build, {}, {}, structs::detail::column_nullability::FORCE); - _build = _flattened_build_table; + build, {}, {}, structs::detail::column_nullability::FORCE, stream); + _build = _flattened_build_table->flattened_columns(); if (_is_empty) { return; } @@ -357,8 +357,8 @@ std::size_t hash_join::inner_join_size(cudf::table_view const& probe, if (_is_empty) { return 0; } auto flattened_probe = structs::detail::flatten_nested_columns( - probe, {}, {}, structs::detail::column_nullability::FORCE); - auto const flattened_probe_table = flattened_probe.flattened_columns(); + probe, {}, {}, structs::detail::column_nullability::FORCE, stream); + auto const flattened_probe_table = flattened_probe->flattened_columns(); auto build_table_ptr = cudf::table_device_view::create(_build, stream); auto flattened_probe_table_ptr = cudf::table_device_view::create(flattened_probe_table, stream); @@ -382,8 +382,8 @@ std::size_t hash_join::left_join_size(cudf::table_view const& probe, if (_is_empty) { return probe.num_rows(); } auto flattened_probe = structs::detail::flatten_nested_columns( - probe, {}, {}, structs::detail::column_nullability::FORCE); - auto const flattened_probe_table = flattened_probe.flattened_columns(); + probe, {}, {}, structs::detail::column_nullability::FORCE, stream); + auto const flattened_probe_table = flattened_probe->flattened_columns(); auto build_table_ptr = cudf::table_device_view::create(_build, stream); auto flattened_probe_table_ptr = cudf::table_device_view::create(flattened_probe_table, stream); @@ -408,8 +408,8 @@ std::size_t hash_join::full_join_size(cudf::table_view const& probe, if (_is_empty) { return probe.num_rows(); } auto flattened_probe = structs::detail::flatten_nested_columns( - probe, {}, {}, structs::detail::column_nullability::FORCE); - auto const flattened_probe_table = flattened_probe.flattened_columns(); + probe, {}, {}, structs::detail::column_nullability::FORCE, stream); + auto const flattened_probe_table = flattened_probe->flattened_columns(); auto build_table_ptr = cudf::table_device_view::create(_build, stream); auto flattened_probe_table_ptr = cudf::table_device_view::create(flattened_probe_table, stream); @@ -475,8 +475,8 @@ hash_join::compute_hash_join(cudf::table_view const& probe, "Probe column size is too big for hash join"); auto flattened_probe = structs::detail::flatten_nested_columns( - probe, {}, {}, structs::detail::column_nullability::FORCE); - auto const flattened_probe_table = flattened_probe.flattened_columns(); + probe, {}, {}, structs::detail::column_nullability::FORCE, stream); + auto const flattened_probe_table = flattened_probe->flattened_columns(); CUDF_EXPECTS(_build.num_columns() == flattened_probe_table.num_columns(), "Mismatch in number of columns to be joined on"); diff --git a/cpp/src/lists/combine/concatenate_list_elements.cu b/cpp/src/lists/combine/concatenate_list_elements.cu index 496d9ee670a..257b0aed82f 100644 --- a/cpp/src/lists/combine/concatenate_list_elements.cu +++ b/cpp/src/lists/combine/concatenate_list_elements.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -257,11 +257,13 @@ std::unique_ptr concatenate_list_elements(column_view const& input, rmm::mr::device_memory_resource* mr) { auto type = input.type(); // Column that is lists of lists. - CUDF_EXPECTS(type.id() == type_id::LIST, "Input column must be a lists column."); + CUDF_EXPECTS( + type.id() == type_id::LIST, "Input column must be a lists column.", std::invalid_argument); auto col = lists_column_view(input).child(); // Rows, which are lists. type = col.type(); - CUDF_EXPECTS(type.id() == type_id::LIST, "Rows of the input column must be lists."); + CUDF_EXPECTS( + type.id() == type_id::LIST, "Rows of the input column must be lists.", std::invalid_argument); col = lists_column_view(col).child(); // The last level entries what we need to check. type = col.type(); diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu index 05fe82d1713..a3293e36825 100644 --- a/cpp/src/lists/contains.cu +++ b/cpp/src/lists/contains.cu @@ -309,7 +309,8 @@ struct dispatch_index_of { auto const child = lists.child(); CUDF_EXPECTS(child.type() == search_keys.type(), - "Type/Scale of search key does not match list column element type."); + "Type/Scale of search key does not match list column element type.", + cudf::data_type_error); CUDF_EXPECTS(search_keys.type().id() != type_id::EMPTY, "Type cannot be empty."); auto constexpr search_key_is_scalar = std::is_same_v; diff --git a/cpp/src/lists/copying/segmented_gather.cu b/cpp/src/lists/copying/segmented_gather.cu index 2c12e09bcd9..79d33e7c17d 100644 --- a/cpp/src/lists/copying/segmented_gather.cu +++ b/cpp/src/lists/copying/segmented_gather.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ std::unique_ptr segmented_gather(lists_column_view const& value_column, { CUDF_EXPECTS(is_index_type(gather_map.child().type()), "Gather map should be list column of index type"); - CUDF_EXPECTS(!gather_map.has_nulls(), "Gather map contains nulls"); + CUDF_EXPECTS(!gather_map.has_nulls(), "Gather map contains nulls", std::invalid_argument); CUDF_EXPECTS(value_column.size() == gather_map.size(), "Gather map and list column should be same size"); diff --git a/cpp/src/lists/dremel.cu b/cpp/src/lists/dremel.cu index 26988622aee..c96a21df905 100644 --- a/cpp/src/lists/dremel.cu +++ b/cpp/src/lists/dremel.cu @@ -35,7 +35,7 @@ #include namespace cudf::detail { - +namespace { /** * @brief Functor to get definition level value for a nested struct column until the leaf level or * the first list level. @@ -46,6 +46,7 @@ struct def_level_fn { uint8_t const* d_nullability; uint8_t sub_level_start; uint8_t curr_def_level; + bool always_nullable; __device__ uint32_t operator()(size_type i) { @@ -55,7 +56,7 @@ struct def_level_fn { auto col = *parent_col; do { // If col not nullable then it does not contribute to def levels - if (d_nullability[l]) { + if (always_nullable or d_nullability[l]) { if (not col.nullable() or bit_is_set(col.null_mask(), i)) { ++def; } else { // We have found the shallowest level at which this row is null @@ -72,10 +73,11 @@ struct def_level_fn { } }; -dremel_data get_dremel_data(column_view h_col, - std::vector nullability, - bool output_as_byte_array, - rmm::cuda_stream_view stream) +dremel_data get_encoding(column_view h_col, + std::vector nullability, + bool output_as_byte_array, + bool always_nullable, + rmm::cuda_stream_view stream) { auto get_list_level = [](column_view col) { while (col.type().id() == type_id::STRUCT) { @@ -173,14 +175,14 @@ dremel_data get_dremel_data(column_view h_col, uint32_t def = 0; start_at_sub_level.push_back(curr_nesting_level_idx); while (col.type().id() == type_id::STRUCT) { - def += (nullability[curr_nesting_level_idx]) ? 1 : 0; + def += (always_nullable or nullability[curr_nesting_level_idx]) ? 1 : 0; col = col.child(0); ++curr_nesting_level_idx; } // At the end of all those structs is either a list column or the leaf. List column contributes // at least one def level. Leaf contributes 1 level only if it is nullable. - def += - (col.type().id() == type_id::LIST ? 1 : 0) + (nullability[curr_nesting_level_idx] ? 1 : 0); + def += (col.type().id() == type_id::LIST ? 1 : 0) + + (always_nullable or nullability[curr_nesting_level_idx] ? 1 : 0); def_at_level.push_back(def); ++curr_nesting_level_idx; }; @@ -209,7 +211,7 @@ dremel_data get_dremel_data(column_view h_col, } } - auto [device_view_owners, d_nesting_levels] = + [[maybe_unused]] auto [device_view_owners, d_nesting_levels] = contiguous_copy_column_device_views(nesting_levels, stream); auto max_def_level = def_at_level.back(); @@ -297,7 +299,8 @@ dremel_data get_dremel_data(column_view h_col, def_level_fn{d_nesting_levels + level, d_nullability.data(), start_at_sub_level[level], - def_at_level[level]}); + def_at_level[level], + always_nullable}); // `nesting_levels.size()` == no of list levels + leaf. Max repetition level = no of list levels auto input_child_rep_it = thrust::make_constant_iterator(nesting_levels.size() - 1); @@ -306,7 +309,8 @@ dremel_data get_dremel_data(column_view h_col, def_level_fn{d_nesting_levels + level + 1, d_nullability.data(), start_at_sub_level[level + 1], - def_at_level[level + 1]}); + def_at_level[level + 1], + always_nullable}); // Zip the input and output value iterators so that merge operation is done only once auto input_parent_zip_it = @@ -389,7 +393,8 @@ dremel_data get_dremel_data(column_view h_col, def_level_fn{d_nesting_levels + level, d_nullability.data(), start_at_sub_level[level], - def_at_level[level]}); + def_at_level[level], + always_nullable}); // Zip the input and output value iterators so that merge operation is done only once auto input_parent_zip_it = @@ -459,5 +464,22 @@ dremel_data get_dremel_data(column_view h_col, leaf_data_size, max_def_level}; } +} // namespace + +dremel_data get_dremel_data(column_view h_col, + std::vector nullability, + bool output_as_byte_array, + rmm::cuda_stream_view stream) +{ + return get_encoding(h_col, nullability, output_as_byte_array, false, stream); +} + +dremel_data get_comparator_data(column_view h_col, + std::vector nullability, + bool output_as_byte_array, + rmm::cuda_stream_view stream) +{ + return get_encoding(h_col, nullability, output_as_byte_array, true, stream); +} } // namespace cudf::detail diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index edf5d6d6612..54dffc85aca 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include @@ -36,6 +35,9 @@ #include #include +#include +#include + namespace cudf { namespace { // Launch configuration for optimized hash partition @@ -389,7 +391,15 @@ rmm::device_uvector compute_gather_map(size_type num_rows, } struct copy_block_partitions_dispatcher { - template ()>* = nullptr> + template + constexpr static bool is_copy_block_supported() + { + // The shared-memory used for fixed-width types in the copy_block_partitions_impl function + // will be too large for any DataType greater than int64_t. + return is_fixed_width() && (sizeof(DataType) <= sizeof(int64_t)); + } + + template ())> std::unique_ptr operator()(column_view const& input, const size_type num_partitions, size_type const* row_partition_numbers, @@ -416,7 +426,7 @@ struct copy_block_partitions_dispatcher { return std::make_unique(input.type(), input.size(), std::move(output)); } - template ()>* = nullptr> + template ())> std::unique_ptr operator()(column_view const& input, const size_type num_partitions, size_type const* row_partition_numbers, @@ -713,7 +723,7 @@ struct dispatch_map_type { } // namespace namespace detail { -namespace local { +namespace { template