diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index c19bb68986f..8190b5d0297 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -33,3 +33,4 @@ ENV SCCACHE_REGION="us-east-2" ENV SCCACHE_BUCKET="rapids-sccache-devs" ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai" ENV HISTFILE="/home/coder/.cache/._bash_history" +ENV LIBCUDF_KERNEL_CACHE_PATH="/home/coder/cudf/cpp/build/${PYTHON_PACKAGE_MANAGER}/cuda-${CUDA_VERSION}/latest/jitify_cache" diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index 6e71505fc7e..944a73ecc98 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,12 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.04-cpp-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-mambaforge-ubuntu22.04" } }, + "runArgs": [ + "--rm", + "--name", + "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda" + ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index 15b51da8dea..8b802333bda 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,12 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.04-cpp-cuda11.8-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ubuntu22.04" } }, + "runArgs": [ + "--rm", + "--name", + "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip" + ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.2-conda/devcontainer.json b/.devcontainer/cuda12.2-conda/devcontainer.json index 31ae8426763..886b07025cc 100644 --- a/.devcontainer/cuda12.2-conda/devcontainer.json +++ b/.devcontainer/cuda12.2-conda/devcontainer.json @@ -5,12 +5,17 @@ "args": { "CUDA": "12.2", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.04-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.06-cpp-mambaforge-ubuntu22.04" } }, + "runArgs": [ + "--rm", + "--name", + "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda" + ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.2-pip/devcontainer.json b/.devcontainer/cuda12.2-pip/devcontainer.json index 93367527a86..86df56ada19 100644 --- a/.devcontainer/cuda12.2-pip/devcontainer.json +++ b/.devcontainer/cuda12.2-pip/devcontainer.json @@ -5,12 +5,17 @@ "args": { "CUDA": "12.2", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.04-cpp-cuda12.2-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ubuntu22.04" } }, + "runArgs": [ + "--rm", + "--name", + "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip" + ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 31cfeaf4ca3..9efac3f1904 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -16,8 +16,14 @@ cpp/libcudf_kafka/CMakeLists.txt @rapidsai/cudf-cmake-codeowners #java code owners java/ @rapidsai/cudf-java-codeowners -#build/ops code owners -.github/ @rapidsai/ops-codeowners -/ci/ @rapidsai/ops-codeowners -conda/ @rapidsai/ops-codeowners -dependencies.yaml @rapidsai/ops-codeowners +#CI code owners +/.github/ @rapidsai/ci-codeowners +/ci/ @rapidsai/ci-codeowners +/.pre-commit-config.yaml @rapidsai/ci-codeowners + +#packaging code owners +/.devcontainers/ @rapidsai/packaging-codeowners +/conda/ @rapidsai/packaging-codeowners +/dependencies.yaml @rapidsai/packaging-codeowners +/build.sh @rapidsai/packaging-codeowners +pyproject.toml @rapidsai/packaging-codeowners diff --git a/.github/ISSUE_TEMPLATE/pandas_function_request.md b/.github/ISSUE_TEMPLATE/pandas_function_request.md index 1cecca72953..19f1377dfe7 100644 --- a/.github/ISSUE_TEMPLATE/pandas_function_request.md +++ b/.github/ISSUE_TEMPLATE/pandas_function_request.md @@ -2,7 +2,7 @@ name: Request a Missing Pandas Function about: Request GPU support for a function executed on the CPU in pandas accelerator mode. title: "[FEA]" -labels: "? - Needs Triage, feature request" +labels: "Needs Triage, feature request, cudf.pandas" assignees: '' --- diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index ef2141ed934..6942ef0009d 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,10 +69,9 @@ jobs: sha: ${{ inputs.sha }} wheel-build-cudf: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06 with: build_type: ${{ inputs.build_type || 'branch' }} - build-2_28-wheels: "true" branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }} @@ -80,7 +79,7 @@ jobs: wheel-publish-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -90,7 +89,7 @@ jobs: wheel-build-dask-cudf: needs: wheel-publish-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -102,10 +101,28 @@ jobs: wheel-publish-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }} package-name: dask_cudf + trigger-pandas-tests: + if: inputs.build_type == 'nightly' + needs: wheel-build-cudf + runs-on: ubuntu-latest + steps: + - name: Checkout code repo + uses: actions/checkout@v4 + with: + ref: ${{ inputs.sha }} + persist-credentials: false + - name: Trigger pandas-tests + env: + GH_TOKEN: ${{ github.token }} + run: | + gh workflow run pandas-tests.yaml \ + -f branch=${{ inputs.branch }} \ + -f sha=${{ inputs.sha }} \ + -f date=${{ inputs.date }} diff --git a/.github/workflows/jni-docker-build.yml b/.github/workflows/jni-docker-build.yml deleted file mode 100644 index 0bdc409d0ab..00000000000 --- a/.github/workflows/jni-docker-build.yml +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: JNI Docker Build - -on: - workflow_dispatch: # manual trigger only - -concurrency: - group: jni-docker-build-${{ github.ref }} - cancel-in-progress: true - -jobs: - docker-build: - if: github.repository == 'rapidsai/cudf' - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v2 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Login to DockerHub - uses: docker/login-action@v2 - with: - username: ${{ secrets.GPUCIBOT_DOCKERHUB_USER }} - password: ${{ secrets.GPUCIBOT_DOCKERHUB_TOKEN }} - - - name: Set ENVs - run: | - echo "IMAGE_NAME=rapidsai/cudf-jni-build" >> $GITHUB_ENV - echo "IMAGE_REF=${GITHUB_REF_NAME}" >> $GITHUB_ENV - - - name: Build and Push - uses: docker/build-push-action@v3 - with: - push: true - file: java/ci/Dockerfile.centos7 - tags: "${{ env.IMAGE_NAME }}:${{ env.IMAGE_REF }}" diff --git a/.github/workflows/pandas-tests.yaml b/.github/workflows/pandas-tests.yaml new file mode 100644 index 00000000000..60544294809 --- /dev/null +++ b/.github/workflows/pandas-tests.yaml @@ -0,0 +1,27 @@ +name: Pandas Test Job + +on: + workflow_dispatch: + inputs: + branch: + required: true + type: string + date: + required: true + type: string + sha: + required: true + type: string + +jobs: + pandas-tests: + # run the Pandas unit tests + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06 + with: + matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.9" and .CUDA_VER == "12.2.2" )) + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + script: ci/cudf_pandas_scripts/pandas-tests/run.sh main diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 41bf22cf47f..f9d5976f1fe 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -20,6 +20,7 @@ jobs: - conda-python-cudf-tests - conda-python-other-tests - conda-java-tests + - static-configure - conda-notebook-tests - docs-build - wheel-build-cudf @@ -29,44 +30,43 @@ jobs: - devcontainer - unit-tests-cudf-pandas - pandas-tests - #- pandas-tests-diff - #- pandas-tests-diff-comment + - pandas-tests-diff secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.06 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.06 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.06 with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.06 with: build_type: pull-request enable_check_symbols: true conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.06 with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.06 with: build_type: pull-request conda-python-cudf-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.06 with: build_type: pull-request script: "ci/test_python_cudf.sh" @@ -74,24 +74,34 @@ jobs: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.06 with: build_type: pull-request script: "ci/test_python_other.sh" conda-java-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06 with: build_type: pull-request node_type: "gpu-v100-latest-1" arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/test_java.sh" + static-configure: + needs: checks + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06 + with: + build_type: pull-request + # Use the wheel container so we can skip conda solves and since our + # primary static consumers (Spark) are not in conda anyway. + container_image: "rapidsai/ci-wheel:latest" + run_script: "ci/configure_cpp_static.sh" conda-notebook-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -101,7 +111,7 @@ jobs: docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -111,22 +121,21 @@ jobs: wheel-build-cudf: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06 with: build_type: pull-request - build-2_28-wheels: "true" script: "ci/build_wheel_cudf.sh" wheel-tests-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06 with: build_type: pull-request script: ci/test_wheel_cudf.sh wheel-build-dask-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -135,7 +144,7 @@ jobs: wheel-tests-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -143,7 +152,7 @@ jobs: script: ci/test_wheel_dask_cudf.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.06 with: arch: '["amd64"]' cuda: '["12.2"]' @@ -154,7 +163,7 @@ jobs: unit-tests-cudf-pandas: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06 with: matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) build_type: pull-request @@ -163,42 +172,18 @@ jobs: # run the Pandas unit tests using PR branch needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06 with: - matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) + matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.9" and .CUDA_VER == "12.2.2" )) build_type: pull-request script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr # Hide test failures because they exceed the GITHUB_STEP_SUMMARY output limit. test_summary_show: "none" - #pandas-tests-diff: - # # diff the results of running the Pandas unit tests and publish a job summary - # needs: [pandas-tests-main, pandas-tests-pr] - # secrets: inherit - # # This branch exports a `job_output` output that the downstream job reads. - # uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 - # with: - # node_type: cpu4 - # build_type: pull-request - # run_script: ci/cudf_pandas_scripts/pandas-tests/diff.sh - #pandas-tests-diff-comment: - # # Post comment of pass/fail rate on PR - # runs-on: ubuntu-latest - # needs: pandas-tests-diff - # steps: - # - uses: actions/github-script@v6 - # with: - # script: | - # const branch = process.env.GITHUB_REF_NAME; - # const prBranchPattern = new RegExp("^pull-request/[0-9]+$"); - # if (!branch.match(prBranchPattern)) { - # throw new Error(`${branch} does not match PR branch pattern.`); - # } - # const summary_url = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; - # const prNumber = branch.split("/")[1]; - # const summary_comment = `${{ needs.pandas-tests-diff.outputs.job_output }}`; - # github.rest.issues.createComment({ - # issue_number: prNumber, - # owner: context.repo.owner, - # repo: context.repo.repo, - # body: `${summary_comment}\n\nHere is [a link to the full test summary](${summary_url}).\n` - # }) + pandas-tests-diff: + # diff the results of running the Pandas unit tests and publish a job summary + needs: pandas-tests + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06 + with: + node_type: cpu4 + build_type: pull-request + run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh" diff --git a/.github/workflows/status.yaml b/.github/workflows/status.yaml new file mode 100644 index 00000000000..781264bc55e --- /dev/null +++ b/.github/workflows/status.yaml @@ -0,0 +1,120 @@ +name: Custom GH Status from Workflow Artifacts + +on: + workflow_run: + workflows: ["pr"] + types: + - completed + +jobs: + process_artifacts: + if: ${{ github.event.workflow_run.conclusion == 'success' }} + runs-on: ubuntu-latest + outputs: + artifact_downloaded: ${{ steps.download_artifact.outputs.artifact_downloaded }} + permissions: + actions: read + checks: read + contents: read + deployments: read + id-token: write + issues: read + discussions: read + packages: read + pages: read + pull-requests: read + repository-projects: read + security-events: read + statuses: write + steps: + - name: Download artifact + id: download_artifact + uses: actions/github-script@v7 + with: + retries: 3 + script: | + const fs = require('fs'); + const path = require('path'); + const artifactName = 'gh-status'; + + const allArtifacts = await github.rest.actions.listWorkflowRunArtifacts({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: context.payload.workflow_run.id, + }); + // Find the specific artifact + const artifact = allArtifacts.data.artifacts.find(artifact => artifact.name === artifactName); + if (!artifact) { + core.info(`Artifact "${artifactName}" not found. Exiting safely.`); + core.setOutput('artifact_downloaded', 'false'); + return; + } + core.setOutput('artifact_downloaded', 'true'); + // Download the artifact + const download = await github.rest.actions.downloadArtifact({ + owner: context.repo.owner, + repo: context.repo.repo, + artifact_id: artifact.id, + archive_format: 'zip', + }); + + // Write the artifact to a file + fs.writeFileSync(`${process.env.GITHUB_WORKSPACE}/${artifactName}.zip`, Buffer.from(download.data)); + + - name: 'Unzip artifact' + if: ${{ steps.download_artifact.outputs.artifact_downloaded == 'true' }} + run: unzip 'gh-status.zip' + + - name: Create status + if: ${{ steps.download_artifact.outputs.artifact_downloaded == 'true' }} + uses: actions/github-script@v7 + env: + WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }} + COMMIT_SHA: ${{ github.event.workflow_run.head_sha }} + ATTEMPTS: ${{ github.event.workflow_run.run_attempt }} + with: + retries: 3 + script: | + // Load the JSON content + const contentJSON = require('./gh-status.json'); + const { + job_name: JOB_NAME, + context: CUSTOM_CONTEXT = 'Custom CI Status Check', + description: CUSTOM_DESCRIPTION = 'Custom CI Status description', + target_url: CUSTOM_TARGET_URL, + state: CUSTOM_STATE = 'success' + } = contentJSON; + + // Fetch all jobs using pagination + const jobs = await github.paginate( + github.rest.actions.listJobsForWorkflowRun, + { + owner: context.repo.owner, + repo: context.repo.repo, + run_id: process.env.WORKFLOW_RUN_ID, + } + ); + + // Fetch the first job ID from the workflow run + const job = jobs.find(job => job.name === JOB_NAME); + const JOB_ID = job ? job.id : null; + + // Set default target URL if not defined + const targetUrl = CUSTOM_TARGET_URL || `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${process.env.WORKFLOW_RUN_ID}/attempts/${process.env.ATTEMPTS}#summary-${JOB_ID}`; + + console.log("job id: ", JOB_ID); + console.log("state: ", CUSTOM_STATE); + console.log("target url: ", targetUrl); + console.log("description: ", CUSTOM_DESCRIPTION); + console.log("context: ", CUSTOM_CONTEXT); + + // Create status + await github.rest.repos.createCommitStatus({ + owner: context.repo.owner, + repo: context.repo.repo, + sha: process.env.COMMIT_SHA, + state: CUSTOM_STATE, + target_url: targetUrl, + description: CUSTOM_DESCRIPTION, + context: CUSTOM_CONTEXT, + }); diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index aeb092111a7..170f45e23fd 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -25,7 +25,7 @@ jobs: enable_check_symbols: true conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -33,7 +33,7 @@ jobs: sha: ${{ inputs.sha }} conda-cpp-memcheck-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -43,9 +43,18 @@ jobs: arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/test_cpp_memcheck.sh" + static-configure: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06 + with: + build_type: pull-request + # Use the wheel container so we can skip conda solves and since our + # primary static consumers (Spark) are not in conda anyway. + container_image: "rapidsai/ci-wheel:latest" + run_script: "ci/configure_cpp_static.sh" conda-python-cudf-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -55,7 +64,7 @@ jobs: conda-python-other-tests: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -64,7 +73,7 @@ jobs: script: "ci/test_python_other.sh" conda-java-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -76,7 +85,7 @@ jobs: run_script: "ci/test_java.sh" conda-notebook-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -88,7 +97,7 @@ jobs: run_script: "ci/test_notebooks.sh" wheel-tests-cudf: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -97,7 +106,7 @@ jobs: script: ci/test_wheel_cudf.sh wheel-tests-dask-cudf: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -108,21 +117,10 @@ jobs: script: ci/test_wheel_dask_cudf.sh unit-tests-cudf-pandas: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06 with: build_type: nightly branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: ci/cudf_pandas_scripts/run_tests.sh - pandas-tests: - # run the Pandas unit tests - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 - with: - matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) - build_type: nightly - branch: ${{ inputs.branch }} - date: ${{ inputs.date }} - sha: ${{ inputs.sha }} - script: ci/cudf_pandas_scripts/pandas-tests/run.sh main diff --git a/.gitignore b/.gitignore index 471d4100458..c89fb49697a 100644 --- a/.gitignore +++ b/.gitignore @@ -78,6 +78,7 @@ CMakeFiles/ Debug build/ cpp/build/ +cpp/examples/*/install/ cpp/include/cudf/ipc_generated/*.h cpp/thirdparty/googletest/ @@ -160,9 +161,6 @@ ENV/ # Dask dask-worker-space/ -# protobuf -**/*_pb2.py - # Sphinx docs & build artifacts docs/cudf/source/api_docs/generated/* docs/cudf/source/user_guide/api_docs/api/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ce5d4f93444..2d3ffc287e9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,16 +2,18 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: trailing-whitespace exclude: | (?x)^( + ^cpp/cmake/thirdparty/patches/.*| ^python/cudf/cudf/tests/data/subword_tokenizer_data/.* ) - id: end-of-file-fixer exclude: | (?x)^( + ^cpp/cmake/thirdparty/patches/.*| ^python/cudf/cudf/tests/data/subword_tokenizer_data/.* ) - repo: https://github.com/PyCQA/isort @@ -22,13 +24,15 @@ repos: # project can specify its own first/third-party packages. args: ["--config-root=python/", "--resolve-all-configs"] files: python/.* + exclude: | + (?x)^(^python/cudf_polars/.*) types_or: [python, cython, pyi] - repo: https://github.com/MarcoGorelli/cython-lint - rev: v0.16.0 + rev: v0.16.2 hooks: - id: cython-lint - repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v1.3.0' + rev: 'v1.10.0' hooks: - id: mypy additional_dependencies: [types-cachetools] @@ -36,22 +40,11 @@ repos: "python/cudf/cudf", "python/custreamz/custreamz", "python/cudf_kafka/cudf_kafka", + "python/cudf_polars/cudf_polars", "python/dask_cudf/dask_cudf"] pass_filenames: false - - repo: https://github.com/PyCQA/pydocstyle - rev: 6.3.0 - hooks: - - id: pydocstyle - # https://github.com/PyCQA/pydocstyle/issues/603 - additional_dependencies: [tomli] - args: ["--config=pyproject.toml"] - exclude: | - (?x)^( - ^python/cudf/cudf/pandas/scripts/.*| - ^python/cudf/cudf_pandas_tests/.* - ) - repo: https://github.com/nbQA-dev/nbQA - rev: 1.7.1 + rev: 1.8.5 hooks: - id: nbqa-isort # Use the cudf_kafka isort orderings in notebooks so that dask @@ -64,7 +57,7 @@ repos: types_or: [c, c++, cuda] args: ["-fallback-style=none", "-style=file", "-i"] - repo: https://github.com/sirosen/texthooks - rev: 0.6.3 + rev: 0.6.6 hooks: - id: fix-smartquotes exclude: | @@ -125,7 +118,7 @@ repos: pass_filenames: false verbose: true - repo: https://github.com/codespell-project/codespell - rev: v2.2.2 + rev: v2.2.6 hooks: - id: codespell additional_dependencies: [tomli] @@ -136,12 +129,12 @@ repos: ^CHANGELOG.md$ ) - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.8.0 + rev: v1.13.4 hooks: - id: rapids-dependency-file-generator args: ["--clean"] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.13 + rev: v0.4.3 hooks: - id: ruff files: python/.*$ @@ -152,9 +145,11 @@ repos: hooks: - id: verify-copyright exclude: | - (?x) - cpp/include/cudf_test/cxxopts[.]hpp$ - + (?x)^( + cpp/include/cudf_test/cxxopts[.]hpp$| + cpp/src/io/parquet/ipc/Message_generated[.]h$| + cpp/src/io/parquet/ipc/Schema_generated[.]h$ + ) default_language_version: python: python3 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5eb4ac9845b..871ef8ba1df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,309 @@ +# cudf 24.06.00 (5 Jun 2024) + +## 🚨 Breaking Changes + +- Deprecate `Groupby.collect` ([#15808](https://github.com/rapidsai/cudf/pull/15808)) [@galipremsagar](https://github.com/galipremsagar) +- Raise FileNotFoundError when a literal JSON string that looks like a json filename is passed ([#15806](https://github.com/rapidsai/cudf/pull/15806)) [@lithomas1](https://github.com/lithomas1) +- Support filtered I/O in `chunked_parquet_reader` and simplify the use of `parquet_reader_options` ([#15764](https://github.com/rapidsai/cudf/pull/15764)) [@mhaseeb123](https://github.com/mhaseeb123) +- Raise errors for unsupported operations on certain types ([#15712](https://github.com/rapidsai/cudf/pull/15712)) [@galipremsagar](https://github.com/galipremsagar) +- Support `DurationType` in cudf parquet reader via `arrow:schema` ([#15617](https://github.com/rapidsai/cudf/pull/15617)) [@mhaseeb123](https://github.com/mhaseeb123) +- Remove protobuf and use parsed ORC statistics from libcudf ([#15564](https://github.com/rapidsai/cudf/pull/15564)) [@bdice](https://github.com/bdice) +- Remove legacy JSON reader from Python ([#15538](https://github.com/rapidsai/cudf/pull/15538)) [@bdice](https://github.com/bdice) +- Removing all batching code from parquet writer ([#15528](https://github.com/rapidsai/cudf/pull/15528)) [@mhaseeb123](https://github.com/mhaseeb123) +- Convert libcudf resource parameters to rmm::device_async_resource_ref ([#15507](https://github.com/rapidsai/cudf/pull/15507)) [@harrism](https://github.com/harrism) +- Remove deprecated strings offsets_begin ([#15454](https://github.com/rapidsai/cudf/pull/15454)) [@davidwendt](https://github.com/davidwendt) +- Floating <--> fixed-point conversion must now be called explicitly ([#15438](https://github.com/rapidsai/cudf/pull/15438)) [@pmattione-nvidia](https://github.com/pmattione-nvidia) +- Bind `read_parquet_metadata` API to libcudf instead of pyarrow and extract `RowGroup` information ([#15398](https://github.com/rapidsai/cudf/pull/15398)) [@mhaseeb123](https://github.com/mhaseeb123) +- Remove deprecated hash() and spark_murmurhash3_x86_32() ([#15375](https://github.com/rapidsai/cudf/pull/15375)) [@davidwendt](https://github.com/davidwendt) +- Remove empty elements from exploded character-ngrams output ([#15371](https://github.com/rapidsai/cudf/pull/15371)) [@davidwendt](https://github.com/davidwendt) +- [FEA] Performance improvement for mixed left semi/anti join ([#15288](https://github.com/rapidsai/cudf/pull/15288)) [@tgujar](https://github.com/tgujar) +- Align date_range defaults with pandas, support tz ([#15139](https://github.com/rapidsai/cudf/pull/15139)) [@mroeschke](https://github.com/mroeschke) + +## 🐛 Bug Fixes + +- Revert "Fix docs for IO readers and strings_convert" ([#15872](https://github.com/rapidsai/cudf/pull/15872)) [@vyasr](https://github.com/vyasr) +- Remove problematic call of index setter to unblock dask-cuda CI ([#15844](https://github.com/rapidsai/cudf/pull/15844)) [@charlesbluca](https://github.com/charlesbluca) +- Use rapids_cpm_nvtx3 to get same nvtx3 target state as rmm ([#15840](https://github.com/rapidsai/cudf/pull/15840)) [@robertmaynard](https://github.com/robertmaynard) +- Return boolean from config_host_memory_resource instead of throwing ([#15815](https://github.com/rapidsai/cudf/pull/15815)) [@abellina](https://github.com/abellina) +- Add temporary dask-cudf workaround for categorical sorting ([#15801](https://github.com/rapidsai/cudf/pull/15801)) [@rjzamora](https://github.com/rjzamora) +- Fix row group alignment in ORC writer ([#15789](https://github.com/rapidsai/cudf/pull/15789)) [@vuule](https://github.com/vuule) +- Raise error when sorting by categorical column in dask-cudf ([#15788](https://github.com/rapidsai/cudf/pull/15788)) [@rjzamora](https://github.com/rjzamora) +- Upgrade `arrow` to 16.1 ([#15787](https://github.com/rapidsai/cudf/pull/15787)) [@galipremsagar](https://github.com/galipremsagar) +- Add support for `PandasArray` for `pandas<2.1.0` ([#15786](https://github.com/rapidsai/cudf/pull/15786)) [@galipremsagar](https://github.com/galipremsagar) +- Limit runtime dependency to `libarrow>=16.0.0,<16.1.0a0` ([#15782](https://github.com/rapidsai/cudf/pull/15782)) [@pentschev](https://github.com/pentschev) +- Fix cat.as_ordered not propogating correct size ([#15780](https://github.com/rapidsai/cudf/pull/15780)) [@mroeschke](https://github.com/mroeschke) +- Handle mixed-like homogeneous types in `isin` ([#15771](https://github.com/rapidsai/cudf/pull/15771)) [@galipremsagar](https://github.com/galipremsagar) +- Fix id_vars and value_vars not accepting string scalars in melt ([#15765](https://github.com/rapidsai/cudf/pull/15765)) [@mroeschke](https://github.com/mroeschke) +- Fix `DatetimeIndex.loc` for all types of ordering cases ([#15761](https://github.com/rapidsai/cudf/pull/15761)) [@galipremsagar](https://github.com/galipremsagar) +- Fix arrow versioning logic ([#15755](https://github.com/rapidsai/cudf/pull/15755)) [@vyasr](https://github.com/vyasr) +- Avoid running sanitizer on Java test designed to cause an error ([#15753](https://github.com/rapidsai/cudf/pull/15753)) [@jlowe](https://github.com/jlowe) +- Handle empty dataframe object with index present in setitem of `loc` ([#15752](https://github.com/rapidsai/cudf/pull/15752)) [@galipremsagar](https://github.com/galipremsagar) +- Eliminate circular reference in DataFrame/Series.iloc/loc ([#15749](https://github.com/rapidsai/cudf/pull/15749)) [@mroeschke](https://github.com/mroeschke) +- Cap the absolute row index per pass in parquet chunked reader. ([#15735](https://github.com/rapidsai/cudf/pull/15735)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix `Index.repeat` for `datetime64` types ([#15722](https://github.com/rapidsai/cudf/pull/15722)) [@galipremsagar](https://github.com/galipremsagar) +- Fix multibyte check for case convert for large strings ([#15721](https://github.com/rapidsai/cudf/pull/15721)) [@davidwendt](https://github.com/davidwendt) +- Fix `get_loc` to properly fetch results from an index that is in decreasing order ([#15719](https://github.com/rapidsai/cudf/pull/15719)) [@galipremsagar](https://github.com/galipremsagar) +- Return same type as the original index for `.loc` operations ([#15717](https://github.com/rapidsai/cudf/pull/15717)) [@galipremsagar](https://github.com/galipremsagar) +- Correct static builds + static arrow ([#15715](https://github.com/rapidsai/cudf/pull/15715)) [@robertmaynard](https://github.com/robertmaynard) +- Raise errors for unsupported operations on certain types ([#15712](https://github.com/rapidsai/cudf/pull/15712)) [@galipremsagar](https://github.com/galipremsagar) +- Fix ColumnAccessor caching of nrows if empty previously ([#15710](https://github.com/rapidsai/cudf/pull/15710)) [@mroeschke](https://github.com/mroeschke) +- Allow `None` when `nan_as_null=False` in column constructor ([#15709](https://github.com/rapidsai/cudf/pull/15709)) [@galipremsagar](https://github.com/galipremsagar) +- Refine `CudaTest.testCudaException` in case throwing wrong type of CudaError under aarch64 ([#15706](https://github.com/rapidsai/cudf/pull/15706)) [@sperlingxx](https://github.com/sperlingxx) +- Fix maxima of categorical column ([#15701](https://github.com/rapidsai/cudf/pull/15701)) [@rjzamora](https://github.com/rjzamora) +- Add proxy for inplace operations in `cudf.pandas` ([#15695](https://github.com/rapidsai/cudf/pull/15695)) [@galipremsagar](https://github.com/galipremsagar) +- Make `nan_as_null` behavior consistent across all APIs ([#15692](https://github.com/rapidsai/cudf/pull/15692)) [@galipremsagar](https://github.com/galipremsagar) +- Fix CI s3 api command to fetch latest results ([#15687](https://github.com/rapidsai/cudf/pull/15687)) [@galipremsagar](https://github.com/galipremsagar) +- Add `NumpyExtensionArray` proxy type in `cudf.pandas` ([#15686](https://github.com/rapidsai/cudf/pull/15686)) [@galipremsagar](https://github.com/galipremsagar) +- Properly implement binaryops for proxy types ([#15684](https://github.com/rapidsai/cudf/pull/15684)) [@galipremsagar](https://github.com/galipremsagar) +- Fix copy assignment and the comparison operator of `rmm_host_allocator` ([#15677](https://github.com/rapidsai/cudf/pull/15677)) [@vuule](https://github.com/vuule) +- Fix multi-source reading in JSON byte range reader ([#15671](https://github.com/rapidsai/cudf/pull/15671)) [@shrshi](https://github.com/shrshi) +- Return `int64` when pandas compatible mode is turned on for `get_indexer` ([#15659](https://github.com/rapidsai/cudf/pull/15659)) [@galipremsagar](https://github.com/galipremsagar) +- Fix Index contains for error validations and float vs int comparisons ([#15657](https://github.com/rapidsai/cudf/pull/15657)) [@galipremsagar](https://github.com/galipremsagar) +- Preserve sub-second data for time scalars in column construction ([#15655](https://github.com/rapidsai/cudf/pull/15655)) [@galipremsagar](https://github.com/galipremsagar) +- Check row limit size in cudf::strings::join_strings ([#15643](https://github.com/rapidsai/cudf/pull/15643)) [@davidwendt](https://github.com/davidwendt) +- Enable sorting on column with nulls using query-planning ([#15639](https://github.com/rapidsai/cudf/pull/15639)) [@rjzamora](https://github.com/rjzamora) +- Fix operator precedence problem in Parquet reader ([#15638](https://github.com/rapidsai/cudf/pull/15638)) [@etseidl](https://github.com/etseidl) +- Fix decoding of dictionary encoded FIXED_LEN_BYTE_ARRAY data in Parquet reader ([#15601](https://github.com/rapidsai/cudf/pull/15601)) [@etseidl](https://github.com/etseidl) +- Fix debug warnings/errors in from_arrow_device_test.cpp ([#15596](https://github.com/rapidsai/cudf/pull/15596)) [@davidwendt](https://github.com/davidwendt) +- Add "collect" aggregation support to dask-cudf ([#15593](https://github.com/rapidsai/cudf/pull/15593)) [@rjzamora](https://github.com/rjzamora) +- Fix categorical-accessor support and testing in dask-cudf ([#15591](https://github.com/rapidsai/cudf/pull/15591)) [@rjzamora](https://github.com/rjzamora) +- Disable compute-sanitizer usage in CI tests with CUDA<11.6 ([#15584](https://github.com/rapidsai/cudf/pull/15584)) [@davidwendt](https://github.com/davidwendt) +- Preserve RangeIndex.step in to_arrow/from_arrow ([#15581](https://github.com/rapidsai/cudf/pull/15581)) [@mroeschke](https://github.com/mroeschke) +- Ignore new cupy warning ([#15574](https://github.com/rapidsai/cudf/pull/15574)) [@vyasr](https://github.com/vyasr) +- Add cuda-sanitizer-api dependency for test-cpp matrix 11.4 ([#15573](https://github.com/rapidsai/cudf/pull/15573)) [@davidwendt](https://github.com/davidwendt) +- Allow apply udf to reference global modules in cudf.pandas ([#15569](https://github.com/rapidsai/cudf/pull/15569)) [@mroeschke](https://github.com/mroeschke) +- Fix deprecation warnings for json legacy reader ([#15563](https://github.com/rapidsai/cudf/pull/15563)) [@davidwendt](https://github.com/davidwendt) +- Fix millisecond resampling in cudf Python ([#15560](https://github.com/rapidsai/cudf/pull/15560)) [@mroeschke](https://github.com/mroeschke) +- Rename JSON_READER_OPTION to JSON_READER_OPTION_NVBENCH. ([#15553](https://github.com/rapidsai/cudf/pull/15553)) [@bdice](https://github.com/bdice) +- Fix a JNI bug in JSON parsing fixup ([#15550](https://github.com/rapidsai/cudf/pull/15550)) [@revans2](https://github.com/revans2) +- Remove conda channel setup from wheel CI image script. ([#15539](https://github.com/rapidsai/cudf/pull/15539)) [@bdice](https://github.com/bdice) +- cudf.pandas: Series dt accessor is CombinedDatetimelikeProperties ([#15523](https://github.com/rapidsai/cudf/pull/15523)) [@wence-](https://github.com/wence-) +- Fix for some compiler warnings in parquet/page_decode.cuh ([#15518](https://github.com/rapidsai/cudf/pull/15518)) [@etseidl](https://github.com/etseidl) +- Fix exponent overflow in strings-to-double conversion ([#15517](https://github.com/rapidsai/cudf/pull/15517)) [@davidwendt](https://github.com/davidwendt) +- nanoarrow uses package override for proper pinned versions generation ([#15515](https://github.com/rapidsai/cudf/pull/15515)) [@robertmaynard](https://github.com/robertmaynard) +- Remove index name overrides in dask-cudf pyarrow table dispatch ([#15514](https://github.com/rapidsai/cudf/pull/15514)) [@charlesbluca](https://github.com/charlesbluca) +- Fix async synchronization issues in json_column.cu ([#15497](https://github.com/rapidsai/cudf/pull/15497)) [@karthikeyann](https://github.com/karthikeyann) +- Add new patch to hide more CCCL APIs ([#15493](https://github.com/rapidsai/cudf/pull/15493)) [@vyasr](https://github.com/vyasr) +- Make improvements in pandas-test reporting ([#15485](https://github.com/rapidsai/cudf/pull/15485)) [@galipremsagar](https://github.com/galipremsagar) +- Fixed page data truncation in parquet writer under certain conditions. ([#15474](https://github.com/rapidsai/cudf/pull/15474)) [@nvdbaranec](https://github.com/nvdbaranec) +- Only use data_type constructor with scale for decimal types ([#15472](https://github.com/rapidsai/cudf/pull/15472)) [@wence-](https://github.com/wence-) +- Avoid "p2p" shuffle as a default when `dask_cudf` is imported ([#15469](https://github.com/rapidsai/cudf/pull/15469)) [@rjzamora](https://github.com/rjzamora) +- Fix debug build errors from to_arrow_device_test.cpp ([#15463](https://github.com/rapidsai/cudf/pull/15463)) [@davidwendt](https://github.com/davidwendt) +- Fix base_normalator::integer_sizeof_fn integer dispatch ([#15457](https://github.com/rapidsai/cudf/pull/15457)) [@davidwendt](https://github.com/davidwendt) +- Allow consumers of static builds to find nanoarrow ([#15456](https://github.com/rapidsai/cudf/pull/15456)) [@robertmaynard](https://github.com/robertmaynard) +- Allow jit compilation when using a splayed CUDA toolkit ([#15451](https://github.com/rapidsai/cudf/pull/15451)) [@robertmaynard](https://github.com/robertmaynard) +- Handle case of scan aggregation in groupby-transform ([#15450](https://github.com/rapidsai/cudf/pull/15450)) [@wence-](https://github.com/wence-) +- Test static builds in CI and fix nanoarrow configure ([#15437](https://github.com/rapidsai/cudf/pull/15437)) [@vyasr](https://github.com/vyasr) +- Fixes potential race in JSON parser when parsing JSON lines format and when recovering from invalid lines ([#15419](https://github.com/rapidsai/cudf/pull/15419)) [@elstehle](https://github.com/elstehle) +- Fix errors in chunked ORC writer when no tables were (successfully) written ([#15393](https://github.com/rapidsai/cudf/pull/15393)) [@vuule](https://github.com/vuule) +- Support implicit array conversion with query-planning enabled ([#15378](https://github.com/rapidsai/cudf/pull/15378)) [@rjzamora](https://github.com/rjzamora) +- Fix arrow-based round trip of empty dataframes ([#15373](https://github.com/rapidsai/cudf/pull/15373)) [@wence-](https://github.com/wence-) +- Remove empty elements from exploded character-ngrams output ([#15371](https://github.com/rapidsai/cudf/pull/15371)) [@davidwendt](https://github.com/davidwendt) +- Remove boundscheck=False setting in cython files ([#15362](https://github.com/rapidsai/cudf/pull/15362)) [@wence-](https://github.com/wence-) +- Patch dask-expr `var` logic in dask-cudf ([#15347](https://github.com/rapidsai/cudf/pull/15347)) [@rjzamora](https://github.com/rjzamora) +- Fix for logical and syntactical errors in libcudf c++ examples ([#15346](https://github.com/rapidsai/cudf/pull/15346)) [@mhaseeb123](https://github.com/mhaseeb123) +- Disable dask-expr in docs builds. ([#15343](https://github.com/rapidsai/cudf/pull/15343)) [@bdice](https://github.com/bdice) +- Apply the cuFile error work around to data_sink as well ([#15335](https://github.com/rapidsai/cudf/pull/15335)) [@vuule](https://github.com/vuule) +- Fix parquet predicate filtering with column projection ([#15113](https://github.com/rapidsai/cudf/pull/15113)) [@karthikeyann](https://github.com/karthikeyann) +- Check column type equality, handling nested types correctly. ([#14531](https://github.com/rapidsai/cudf/pull/14531)) [@bdice](https://github.com/bdice) + +## 📖 Documentation + +- Fix docs for IO readers and strings_convert ([#15842](https://github.com/rapidsai/cudf/pull/15842)) [@bdice](https://github.com/bdice) +- Update cudf.pandas docs for GA ([#15744](https://github.com/rapidsai/cudf/pull/15744)) [@beckernick](https://github.com/beckernick) +- Add contributing warning about circular imports ([#15691](https://github.com/rapidsai/cudf/pull/15691)) [@er-eis](https://github.com/er-eis) +- Update libcudf developer guide for strings offsets column ([#15661](https://github.com/rapidsai/cudf/pull/15661)) [@davidwendt](https://github.com/davidwendt) +- Update developer guide with device_async_resource_ref guidelines ([#15562](https://github.com/rapidsai/cudf/pull/15562)) [@harrism](https://github.com/harrism) +- DOC: add pandas intersphinx mapping ([#15531](https://github.com/rapidsai/cudf/pull/15531)) [@raybellwaves](https://github.com/raybellwaves) +- rm-dup-doc in frame.py ([#15530](https://github.com/rapidsai/cudf/pull/15530)) [@raybellwaves](https://github.com/raybellwaves) +- Update CONTRIBUTING.md to use latest cuda env ([#15467](https://github.com/rapidsai/cudf/pull/15467)) [@raybellwaves](https://github.com/raybellwaves) +- Doc: interleave columns pandas compat ([#15383](https://github.com/rapidsai/cudf/pull/15383)) [@raybellwaves](https://github.com/raybellwaves) +- Simplified README Examples ([#15338](https://github.com/rapidsai/cudf/pull/15338)) [@wkaisertexas](https://github.com/wkaisertexas) +- Add debug tips section to libcudf developer guide ([#15329](https://github.com/rapidsai/cudf/pull/15329)) [@davidwendt](https://github.com/davidwendt) +- Fix and clarify notes on result ordering ([#13255](https://github.com/rapidsai/cudf/pull/13255)) [@shwina](https://github.com/shwina) + +## 🚀 New Features + +- Add JNI bindings for zstd compression of NVCOMP. ([#15729](https://github.com/rapidsai/cudf/pull/15729)) [@firestarman](https://github.com/firestarman) +- Fix spaces around CSV quoted strings ([#15727](https://github.com/rapidsai/cudf/pull/15727)) [@thabetx](https://github.com/thabetx) +- Add default pinned pool that falls back to new pinned allocations ([#15665](https://github.com/rapidsai/cudf/pull/15665)) [@vuule](https://github.com/vuule) +- Overhaul ops-codeowners coverage ([#15660](https://github.com/rapidsai/cudf/pull/15660)) [@raydouglass](https://github.com/raydouglass) +- Concatenate dictionary of objects along axis=1 ([#15623](https://github.com/rapidsai/cudf/pull/15623)) [@er-eis](https://github.com/er-eis) +- Construct `pylibcudf` columns from objects supporting `__cuda_array_interface__` ([#15615](https://github.com/rapidsai/cudf/pull/15615)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Expose some Parquet per-column configuration options via the python API ([#15613](https://github.com/rapidsai/cudf/pull/15613)) [@etseidl](https://github.com/etseidl) +- Migrate string `find` operations to `pylibcudf` ([#15604](https://github.com/rapidsai/cudf/pull/15604)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Round trip FIXED_LEN_BYTE_ARRAY data properly in Parquet writer ([#15600](https://github.com/rapidsai/cudf/pull/15600)) [@etseidl](https://github.com/etseidl) +- Reading multi-line JSON in string columns using runtime configurable delimiter ([#15556](https://github.com/rapidsai/cudf/pull/15556)) [@shrshi](https://github.com/shrshi) +- Remove public gtest dependency from libcudf conda package ([#15534](https://github.com/rapidsai/cudf/pull/15534)) [@robertmaynard](https://github.com/robertmaynard) +- Fea/move to latest nanoarrow ([#15526](https://github.com/rapidsai/cudf/pull/15526)) [@robertmaynard](https://github.com/robertmaynard) +- Migrate string `case` operations to `pylibcudf` ([#15489](https://github.com/rapidsai/cudf/pull/15489)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add Parquet encoding statistics to column chunk metadata ([#15452](https://github.com/rapidsai/cudf/pull/15452)) [@etseidl](https://github.com/etseidl) +- Implement JNI for chunked ORC reader ([#15446](https://github.com/rapidsai/cudf/pull/15446)) [@ttnghia](https://github.com/ttnghia) +- Add some missing optional fields to the Parquet RowGroup metadata ([#15421](https://github.com/rapidsai/cudf/pull/15421)) [@etseidl](https://github.com/etseidl) +- Adding parquet transcoding example ([#15420](https://github.com/rapidsai/cudf/pull/15420)) [@mhaseeb123](https://github.com/mhaseeb123) +- Add fields to Parquet Statistics structure that were added in parquet-format 2.10 ([#15412](https://github.com/rapidsai/cudf/pull/15412)) [@etseidl](https://github.com/etseidl) +- Add option to Parquet writer to skip compressing individual columns ([#15411](https://github.com/rapidsai/cudf/pull/15411)) [@etseidl](https://github.com/etseidl) +- Add BYTE_STREAM_SPLIT support to Parquet ([#15311](https://github.com/rapidsai/cudf/pull/15311)) [@etseidl](https://github.com/etseidl) +- Introduce benchmark suite for JSON reader options ([#15124](https://github.com/rapidsai/cudf/pull/15124)) [@shrshi](https://github.com/shrshi) +- Implement ORC chunked reader ([#15094](https://github.com/rapidsai/cudf/pull/15094)) [@ttnghia](https://github.com/ttnghia) +- Extend cudf devcontainers to specify jitify2 kernel cache ([#15068](https://github.com/rapidsai/cudf/pull/15068)) [@robertmaynard](https://github.com/robertmaynard) +- Add `to_arrow_device` function to cudf interop using nanoarrow ([#15047](https://github.com/rapidsai/cudf/pull/15047)) [@zeroshade](https://github.com/zeroshade) +- Add JSON option to prune columns ([#14996](https://github.com/rapidsai/cudf/pull/14996)) [@karthikeyann](https://github.com/karthikeyann) + +## 🛠️ Improvements + +- Deprecate `Groupby.collect` ([#15808](https://github.com/rapidsai/cudf/pull/15808)) [@galipremsagar](https://github.com/galipremsagar) +- Raise FileNotFoundError when a literal JSON string that looks like a json filename is passed ([#15806](https://github.com/rapidsai/cudf/pull/15806)) [@lithomas1](https://github.com/lithomas1) +- Deprecate `divisions='quantile'` support in `set_index` ([#15804](https://github.com/rapidsai/cudf/pull/15804)) [@rjzamora](https://github.com/rjzamora) +- Improve performance of Series.to_numpy/to_cupy ([#15792](https://github.com/rapidsai/cudf/pull/15792)) [@mroeschke](https://github.com/mroeschke) +- Access `self.index` instead of `self._index` where possible ([#15781](https://github.com/rapidsai/cudf/pull/15781)) [@mroeschke](https://github.com/mroeschke) +- Support filtered I/O in `chunked_parquet_reader` and simplify the use of `parquet_reader_options` ([#15764](https://github.com/rapidsai/cudf/pull/15764)) [@mhaseeb123](https://github.com/mhaseeb123) +- Avoid index-to-column conversion in some DataFrame ops ([#15763](https://github.com/rapidsai/cudf/pull/15763)) [@mroeschke](https://github.com/mroeschke) +- Fix `chunked_parquet_reader` behavior when input has no more rows to read ([#15757](https://github.com/rapidsai/cudf/pull/15757)) [@mhaseeb123](https://github.com/mhaseeb123) +- [JNI] Expose java API for cudf::io::config_host_memory_resource ([#15745](https://github.com/rapidsai/cudf/pull/15745)) [@abellina](https://github.com/abellina) +- Migrate all cpp pxd files into pylibcudf ([#15740](https://github.com/rapidsai/cudf/pull/15740)) [@vyasr](https://github.com/vyasr) +- Validate and materialize iterators earlier in as_column ([#15739](https://github.com/rapidsai/cudf/pull/15739)) [@mroeschke](https://github.com/mroeschke) +- Push some as_column arrow logic to ColumnBase.from_arrow ([#15738](https://github.com/rapidsai/cudf/pull/15738)) [@mroeschke](https://github.com/mroeschke) +- Expose stream parameter in public reduction APIs ([#15737](https://github.com/rapidsai/cudf/pull/15737)) [@srinivasyadav18](https://github.com/srinivasyadav18) +- remove unnecessary 'setuptools' host dependency, simplify dependencies.yaml ([#15736](https://github.com/rapidsai/cudf/pull/15736)) [@jameslamb](https://github.com/jameslamb) +- Defer to C++ equality and hashing for pylibcudf DataType and Aggregation objects ([#15732](https://github.com/rapidsai/cudf/pull/15732)) [@wence-](https://github.com/wence-) +- Implement null-aware NOT_EQUALS binop ([#15731](https://github.com/rapidsai/cudf/pull/15731)) [@wence-](https://github.com/wence-) +- Fix split-record result list column offset type ([#15707](https://github.com/rapidsai/cudf/pull/15707)) [@davidwendt](https://github.com/davidwendt) +- Upgrade `arrow` to `16` ([#15703](https://github.com/rapidsai/cudf/pull/15703)) [@galipremsagar](https://github.com/galipremsagar) +- Remove experimental namespace from make_strings_children ([#15702](https://github.com/rapidsai/cudf/pull/15702)) [@davidwendt](https://github.com/davidwendt) +- Rework get_json_object benchmark to use nvbench ([#15698](https://github.com/rapidsai/cudf/pull/15698)) [@davidwendt](https://github.com/davidwendt) +- Rework some python tests of Parquet delta encodings ([#15693](https://github.com/rapidsai/cudf/pull/15693)) [@etseidl](https://github.com/etseidl) +- Skeleton cudf polars package ([#15688](https://github.com/rapidsai/cudf/pull/15688)) [@wence-](https://github.com/wence-) +- Upgrade pre commit hooks ([#15685](https://github.com/rapidsai/cudf/pull/15685)) [@wence-](https://github.com/wence-) +- Allow `fillna` to validate for `CategoricalColumn.fillna` ([#15683](https://github.com/rapidsai/cudf/pull/15683)) [@galipremsagar](https://github.com/galipremsagar) +- Misc Column cleanups ([#15682](https://github.com/rapidsai/cudf/pull/15682)) [@mroeschke](https://github.com/mroeschke) +- Reducing runtime of JSON reader options benchmark ([#15681](https://github.com/rapidsai/cudf/pull/15681)) [@shrshi](https://github.com/shrshi) +- Add `Timestamp` and `Timedelta` proxy types ([#15680](https://github.com/rapidsai/cudf/pull/15680)) [@galipremsagar](https://github.com/galipremsagar) +- Remove host_parse_nested_json. ([#15674](https://github.com/rapidsai/cudf/pull/15674)) [@bdice](https://github.com/bdice) +- Reduce runtime for ParquetChunkedReaderInputLimitTest gtests ([#15672](https://github.com/rapidsai/cudf/pull/15672)) [@davidwendt](https://github.com/davidwendt) +- Add large-strings gtest for cudf::interleave_columns ([#15669](https://github.com/rapidsai/cudf/pull/15669)) [@davidwendt](https://github.com/davidwendt) +- Use experimental make_strings_children for multi-replace_re ([#15667](https://github.com/rapidsai/cudf/pull/15667)) [@davidwendt](https://github.com/davidwendt) +- Enabled `Holiday` types in `cudf.pandas` ([#15664](https://github.com/rapidsai/cudf/pull/15664)) [@galipremsagar](https://github.com/galipremsagar) +- Remove obsolete `XFAIL` markers for query-planning ([#15662](https://github.com/rapidsai/cudf/pull/15662)) [@rjzamora](https://github.com/rjzamora) +- Clean up join benchmarks ([#15644](https://github.com/rapidsai/cudf/pull/15644)) [@PointKernel](https://github.com/PointKernel) +- Enable warnings as errors in custreamz ([#15642](https://github.com/rapidsai/cudf/pull/15642)) [@mroeschke](https://github.com/mroeschke) +- Improve distinct join with set `retrieve` ([#15636](https://github.com/rapidsai/cudf/pull/15636)) [@PointKernel](https://github.com/PointKernel) +- Fix -Werror=type-limits. ([#15635](https://github.com/rapidsai/cudf/pull/15635)) [@bdice](https://github.com/bdice) +- Enable FutureWarnings/DeprecationWarnings as errors for dask_cudf ([#15634](https://github.com/rapidsai/cudf/pull/15634)) [@mroeschke](https://github.com/mroeschke) +- Remove NVBench SHA override. ([#15633](https://github.com/rapidsai/cudf/pull/15633)) [@alliepiper](https://github.com/alliepiper) +- Add support for large string columns to Parquet reader and writer ([#15632](https://github.com/rapidsai/cudf/pull/15632)) [@etseidl](https://github.com/etseidl) +- Large strings support in MD5 and SHA hashers ([#15631](https://github.com/rapidsai/cudf/pull/15631)) [@davidwendt](https://github.com/davidwendt) +- Fix make_offsets_child_column usage in cudf::strings::detail::shift ([#15630](https://github.com/rapidsai/cudf/pull/15630)) [@davidwendt](https://github.com/davidwendt) +- Use experimental make_strings_children for strings convert ([#15629](https://github.com/rapidsai/cudf/pull/15629)) [@davidwendt](https://github.com/davidwendt) +- Forward-merge branch-24.04 to branch-24.06 ([#15627](https://github.com/rapidsai/cudf/pull/15627)) [@bdice](https://github.com/bdice) +- Avoid accessing attributes via `_column` if not needed ([#15624](https://github.com/rapidsai/cudf/pull/15624)) [@mroeschke](https://github.com/mroeschke) +- Make ColumnBase.__cuda_array_interface__ opt out instead of opt in ([#15622](https://github.com/rapidsai/cudf/pull/15622)) [@mroeschke](https://github.com/mroeschke) +- Large strings support for cudf::gather ([#15621](https://github.com/rapidsai/cudf/pull/15621)) [@davidwendt](https://github.com/davidwendt) +- Remove jni-docker-build workflow ([#15619](https://github.com/rapidsai/cudf/pull/15619)) [@bdice](https://github.com/bdice) +- Support `DurationType` in cudf parquet reader via `arrow:schema` ([#15617](https://github.com/rapidsai/cudf/pull/15617)) [@mhaseeb123](https://github.com/mhaseeb123) +- Drop Centos7 support ([#15608](https://github.com/rapidsai/cudf/pull/15608)) [@NvTimLiu](https://github.com/NvTimLiu) +- Use experimental make_strings_children for json/csv writers ([#15599](https://github.com/rapidsai/cudf/pull/15599)) [@davidwendt](https://github.com/davidwendt) +- Use experimental make_strings_children for strings join/url_encode/slice ([#15598](https://github.com/rapidsai/cudf/pull/15598)) [@davidwendt](https://github.com/davidwendt) +- Use experimental make_strings_children in nvtext APIs ([#15595](https://github.com/rapidsai/cudf/pull/15595)) [@davidwendt](https://github.com/davidwendt) +- Migrate to `{{ stdlib("c") }}` ([#15594](https://github.com/rapidsai/cudf/pull/15594)) [@hcho3](https://github.com/hcho3) +- Deprecate `to/from_dask_dataframe` APIs in dask-cudf ([#15592](https://github.com/rapidsai/cudf/pull/15592)) [@rjzamora](https://github.com/rjzamora) +- Minor fixups for future NumPy 2 compatibility ([#15590](https://github.com/rapidsai/cudf/pull/15590)) [@seberg](https://github.com/seberg) +- Delay materializing RangeIndex in .reset_index ([#15588](https://github.com/rapidsai/cudf/pull/15588)) [@mroeschke](https://github.com/mroeschke) +- Use experimental make_strings_children for capitalize/case/pad functions ([#15587](https://github.com/rapidsai/cudf/pull/15587)) [@davidwendt](https://github.com/davidwendt) +- Use experimental make_strings_children for strings replace/filter/translate ([#15586](https://github.com/rapidsai/cudf/pull/15586)) [@davidwendt](https://github.com/davidwendt) +- Add multithreaded parquet reader benchmarks. ([#15585](https://github.com/rapidsai/cudf/pull/15585)) [@nvdbaranec](https://github.com/nvdbaranec) +- Don't materialize column during RangeIndex methods ([#15582](https://github.com/rapidsai/cudf/pull/15582)) [@mroeschke](https://github.com/mroeschke) +- Improve performance for cudf::strings::count_re ([#15578](https://github.com/rapidsai/cudf/pull/15578)) [@davidwendt](https://github.com/davidwendt) +- Replace RangeIndex._start/_stop/_step with _range ([#15576](https://github.com/rapidsai/cudf/pull/15576)) [@mroeschke](https://github.com/mroeschke) +- add --rm and --name to devcontainer run args ([#15572](https://github.com/rapidsai/cudf/pull/15572)) [@trxcllnt](https://github.com/trxcllnt) +- Change the default dictionary policy in Parquet writer from `ALWAYS` to `ADAPTIVE` ([#15570](https://github.com/rapidsai/cudf/pull/15570)) [@mhaseeb123](https://github.com/mhaseeb123) +- Rename experimental JSON tests. ([#15568](https://github.com/rapidsai/cudf/pull/15568)) [@bdice](https://github.com/bdice) +- Refactor JNI native dependency loading to allow returning of library path ([#15566](https://github.com/rapidsai/cudf/pull/15566)) [@jlowe](https://github.com/jlowe) +- Remove protobuf and use parsed ORC statistics from libcudf ([#15564](https://github.com/rapidsai/cudf/pull/15564)) [@bdice](https://github.com/bdice) +- Deprecate legacy JSON reader options. ([#15558](https://github.com/rapidsai/cudf/pull/15558)) [@bdice](https://github.com/bdice) +- Use same .clang-format in cuDF JNI ([#15557](https://github.com/rapidsai/cudf/pull/15557)) [@bdice](https://github.com/bdice) +- Large strings support for cudf::fill ([#15555](https://github.com/rapidsai/cudf/pull/15555)) [@davidwendt](https://github.com/davidwendt) +- Upgrade upper bound pinning to `pandas-2.2.2` ([#15554](https://github.com/rapidsai/cudf/pull/15554)) [@galipremsagar](https://github.com/galipremsagar) +- Work around issues with cccl main ([#15552](https://github.com/rapidsai/cudf/pull/15552)) [@miscco](https://github.com/miscco) +- Enable pandas plotting unit tests for cudf.pandas ([#15547](https://github.com/rapidsai/cudf/pull/15547)) [@mroeschke](https://github.com/mroeschke) +- Move timezone conversion logic to `DatetimeColumn` ([#15545](https://github.com/rapidsai/cudf/pull/15545)) [@mroeschke](https://github.com/mroeschke) +- Large strings support for cudf::interleave_columns ([#15544](https://github.com/rapidsai/cudf/pull/15544)) [@davidwendt](https://github.com/davidwendt) +- [skip ci] Switch back to 24.06 branch for pandas tests ([#15543](https://github.com/rapidsai/cudf/pull/15543)) [@galipremsagar](https://github.com/galipremsagar) +- Remove checks dependency from static-configure test job. ([#15542](https://github.com/rapidsai/cudf/pull/15542)) [@bdice](https://github.com/bdice) +- Remove legacy JSON reader from Python ([#15538](https://github.com/rapidsai/cudf/pull/15538)) [@bdice](https://github.com/bdice) +- Enable more ignored pandas unit tests for cudf.pandas ([#15535](https://github.com/rapidsai/cudf/pull/15535)) [@mroeschke](https://github.com/mroeschke) +- Large strings support for cudf::clamp ([#15533](https://github.com/rapidsai/cudf/pull/15533)) [@davidwendt](https://github.com/davidwendt) +- Remove version hard-coding ([#15529](https://github.com/rapidsai/cudf/pull/15529)) [@galipremsagar](https://github.com/galipremsagar) +- Removing all batching code from parquet writer ([#15528](https://github.com/rapidsai/cudf/pull/15528)) [@mhaseeb123](https://github.com/mhaseeb123) +- Make some private class properties not settable ([#15527](https://github.com/rapidsai/cudf/pull/15527)) [@mroeschke](https://github.com/mroeschke) +- Large strings support in regex replace APIs ([#15524](https://github.com/rapidsai/cudf/pull/15524)) [@davidwendt](https://github.com/davidwendt) +- Skip pandas unit tests that crash pytest workers in `cudf.pandas` ([#15521](https://github.com/rapidsai/cudf/pull/15521)) [@mroeschke](https://github.com/mroeschke) +- Preserve column metadata during more DataFrame operations ([#15519](https://github.com/rapidsai/cudf/pull/15519)) [@mroeschke](https://github.com/mroeschke) +- Move to pandas-tests to a dedicated workflow file and trigger it from branch.yaml ([#15516](https://github.com/rapidsai/cudf/pull/15516)) [@galipremsagar](https://github.com/galipremsagar) +- Large strings gtest fixture and utilities ([#15513](https://github.com/rapidsai/cudf/pull/15513)) [@davidwendt](https://github.com/davidwendt) +- Convert libcudf resource parameters to rmm::device_async_resource_ref ([#15507](https://github.com/rapidsai/cudf/pull/15507)) [@harrism](https://github.com/harrism) +- Relax protobuf lower bound to 3.20. ([#15506](https://github.com/rapidsai/cudf/pull/15506)) [@bdice](https://github.com/bdice) +- Clean up index methods ([#15496](https://github.com/rapidsai/cudf/pull/15496)) [@mroeschke](https://github.com/mroeschke) +- Update strings contains benchmarks to nvbench ([#15495](https://github.com/rapidsai/cudf/pull/15495)) [@davidwendt](https://github.com/davidwendt) +- Update NVBench fixture to use new hooks, fix pinned memory segfault. ([#15492](https://github.com/rapidsai/cudf/pull/15492)) [@alliepiper](https://github.com/alliepiper) +- Enable tests/scalar and test/series in cudf.pandas tests ([#15486](https://github.com/rapidsai/cudf/pull/15486)) [@mroeschke](https://github.com/mroeschke) +- Clean up __cuda_array_interface__ handling in as_column ([#15477](https://github.com/rapidsai/cudf/pull/15477)) [@mroeschke](https://github.com/mroeschke) +- Avoid .ordered and .categories from being settable in CategoricalColumn and CategoricalDtype ([#15475](https://github.com/rapidsai/cudf/pull/15475)) [@mroeschke](https://github.com/mroeschke) +- Ignore pandas tests for cudf.pandas that need motoserver ([#15468](https://github.com/rapidsai/cudf/pull/15468)) [@mroeschke](https://github.com/mroeschke) +- Use cached_property for NumericColumn.nan_count instead of ._nan_count variable ([#15466](https://github.com/rapidsai/cudf/pull/15466)) [@mroeschke](https://github.com/mroeschke) +- Add to_arrow_device() functions that accept views ([#15465](https://github.com/rapidsai/cudf/pull/15465)) [@davidwendt](https://github.com/davidwendt) +- Add custom status check workflow ([#15464](https://github.com/rapidsai/cudf/pull/15464)) [@galipremsagar](https://github.com/galipremsagar) +- Disable pandas 2.x clipboard tests in cudf.pandas tests ([#15462](https://github.com/rapidsai/cudf/pull/15462)) [@mroeschke](https://github.com/mroeschke) +- Enable tests/strings/test_api.py and tests/io/pytables in cudf.pandas tests ([#15461](https://github.com/rapidsai/cudf/pull/15461)) [@mroeschke](https://github.com/mroeschke) +- Enable test_parsing in cudf.pandas tests ([#15460](https://github.com/rapidsai/cudf/pull/15460)) [@mroeschke](https://github.com/mroeschke) +- Add `from_arrow_device` function to cudf interop using nanoarrow ([#15458](https://github.com/rapidsai/cudf/pull/15458)) [@zeroshade](https://github.com/zeroshade) +- Remove deprecated strings offsets_begin ([#15454](https://github.com/rapidsai/cudf/pull/15454)) [@davidwendt](https://github.com/davidwendt) +- Enable tests/windows/ in cudf.pandas tests ([#15444](https://github.com/rapidsai/cudf/pull/15444)) [@mroeschke](https://github.com/mroeschke) +- Enable tests/interchange/test_impl.py in cudf.pandas tests ([#15443](https://github.com/rapidsai/cudf/pull/15443)) [@mroeschke](https://github.com/mroeschke) +- Enable tests/io/test_user_agent.py in cudf pandas tests ([#15442](https://github.com/rapidsai/cudf/pull/15442)) [@mroeschke](https://github.com/mroeschke) +- Performance improvement in libcudf case conversion for long strings ([#15441](https://github.com/rapidsai/cudf/pull/15441)) [@davidwendt](https://github.com/davidwendt) +- Remove prior test skipping in run-pandas-tests with testing 2.2.1 ([#15440](https://github.com/rapidsai/cudf/pull/15440)) [@mroeschke](https://github.com/mroeschke) +- Support orc and text IO with dask-expr using legacy conversion ([#15439](https://github.com/rapidsai/cudf/pull/15439)) [@rjzamora](https://github.com/rjzamora) +- Floating <--> fixed-point conversion must now be called explicitly ([#15438](https://github.com/rapidsai/cudf/pull/15438)) [@pmattione-nvidia](https://github.com/pmattione-nvidia) +- Unify Copy-On-Write and Spilling ([#15436](https://github.com/rapidsai/cudf/pull/15436)) [@madsbk](https://github.com/madsbk) +- Enable ``dask_cudf`` json and s3 tests with query-planning on ([#15408](https://github.com/rapidsai/cudf/pull/15408)) [@rjzamora](https://github.com/rjzamora) +- Bump ruff and codespell pre-commit checks ([#15407](https://github.com/rapidsai/cudf/pull/15407)) [@mroeschke](https://github.com/mroeschke) +- Enable all tests for `arm` arch ([#15402](https://github.com/rapidsai/cudf/pull/15402)) [@galipremsagar](https://github.com/galipremsagar) +- Bind `read_parquet_metadata` API to libcudf instead of pyarrow and extract `RowGroup` information ([#15398](https://github.com/rapidsai/cudf/pull/15398)) [@mhaseeb123](https://github.com/mhaseeb123) +- Optimizing multi-source byte range reading in JSON reader ([#15396](https://github.com/rapidsai/cudf/pull/15396)) [@shrshi](https://github.com/shrshi) +- add correct labels to pandas_function_request.md ([#15381](https://github.com/rapidsai/cudf/pull/15381)) [@raybellwaves](https://github.com/raybellwaves) +- Remove deprecated hash() and spark_murmurhash3_x86_32() ([#15375](https://github.com/rapidsai/cudf/pull/15375)) [@davidwendt](https://github.com/davidwendt) +- Large strings support in cudf::merge ([#15374](https://github.com/rapidsai/cudf/pull/15374)) [@davidwendt](https://github.com/davidwendt) +- Enable test-reporting for pandas pytests in CI ([#15369](https://github.com/rapidsai/cudf/pull/15369)) [@galipremsagar](https://github.com/galipremsagar) +- Use logical types in Parquet reader ([#15365](https://github.com/rapidsai/cudf/pull/15365)) [@etseidl](https://github.com/etseidl) +- Add experimental make_strings_children utility ([#15363](https://github.com/rapidsai/cudf/pull/15363)) [@davidwendt](https://github.com/davidwendt) +- Forward-merge branch-24.04 to branch-24.06 ([#15349](https://github.com/rapidsai/cudf/pull/15349)) [@bdice](https://github.com/bdice) +- Fix CMake files in libcudf C++ examples to use existing libcudf build if present ([#15348](https://github.com/rapidsai/cudf/pull/15348)) [@mhaseeb123](https://github.com/mhaseeb123) +- Use ruff pydocstyle over pydocstyle pre-commit hook ([#15345](https://github.com/rapidsai/cudf/pull/15345)) [@mroeschke](https://github.com/mroeschke) +- Refactor stream mode setup for gtests ([#15337](https://github.com/rapidsai/cudf/pull/15337)) [@davidwendt](https://github.com/davidwendt) +- Benchmark decimal <--> floating conversions. ([#15334](https://github.com/rapidsai/cudf/pull/15334)) [@pmattione-nvidia](https://github.com/pmattione-nvidia) +- Avoid duplicate dask-cudf testing ([#15333](https://github.com/rapidsai/cudf/pull/15333)) [@rjzamora](https://github.com/rjzamora) +- Skip decode steps in Parquet reader when nullable columns have no nulls ([#15332](https://github.com/rapidsai/cudf/pull/15332)) [@etseidl](https://github.com/etseidl) +- Update udf_cpp to use rapids_cpm_cccl. ([#15331](https://github.com/rapidsai/cudf/pull/15331)) [@bdice](https://github.com/bdice) +- Forward-merge branch-24.04 into branch-24.06 [skip ci] ([#15330](https://github.com/rapidsai/cudf/pull/15330)) [@rapids-bot[bot]](https://github.com/rapids-bot[bot]) +- Allow ``numeric_only=True`` for simple groupby reductions ([#15326](https://github.com/rapidsai/cudf/pull/15326)) [@rjzamora](https://github.com/rjzamora) +- Drop CentOS 7 support. ([#15323](https://github.com/rapidsai/cudf/pull/15323)) [@bdice](https://github.com/bdice) +- Rework cudf::find_and_replace_all to use gather-based make_strings_column ([#15305](https://github.com/rapidsai/cudf/pull/15305)) [@davidwendt](https://github.com/davidwendt) +- First pass at adding testing for pylibcudf ([#15300](https://github.com/rapidsai/cudf/pull/15300)) [@vyasr](https://github.com/vyasr) +- [FEA] Performance improvement for mixed left semi/anti join ([#15288](https://github.com/rapidsai/cudf/pull/15288)) [@tgujar](https://github.com/tgujar) +- Rework cudf::replace_nulls to use strings::detail::copy_if_else ([#15286](https://github.com/rapidsai/cudf/pull/15286)) [@davidwendt](https://github.com/davidwendt) +- Clean up special casing in `as_column` for non-typed input ([#15276](https://github.com/rapidsai/cudf/pull/15276)) [@mroeschke](https://github.com/mroeschke) +- Large strings support in cudf::concatenate ([#15195](https://github.com/rapidsai/cudf/pull/15195)) [@davidwendt](https://github.com/davidwendt) +- Use less _is_categorical_dtype ([#15148](https://github.com/rapidsai/cudf/pull/15148)) [@mroeschke](https://github.com/mroeschke) +- Align date_range defaults with pandas, support tz ([#15139](https://github.com/rapidsai/cudf/pull/15139)) [@mroeschke](https://github.com/mroeschke) +- `ModuleAccelerator` performance: cache the result of checking if a caller is in the denylist ([#15056](https://github.com/rapidsai/cudf/pull/15056)) [@shwina](https://github.com/shwina) +- Use offsetalator in cudf::strings::replace functions ([#14824](https://github.com/rapidsai/cudf/pull/14824)) [@davidwendt](https://github.com/davidwendt) +- Cleanup some timedelta/datetime column logic ([#14715](https://github.com/rapidsai/cudf/pull/14715)) [@mroeschke](https://github.com/mroeschke) +- Refactor numpy array input in as_column ([#14651](https://github.com/rapidsai/cudf/pull/14651)) [@mroeschke](https://github.com/mroeschke) +- Refactor joins for conditional semis and antis ([#14646](https://github.com/rapidsai/cudf/pull/14646)) [@DanialJavady96](https://github.com/DanialJavady96) +- Eagerly populate the class dict for cudf.pandas proxy types ([#14534](https://github.com/rapidsai/cudf/pull/14534)) [@shwina](https://github.com/shwina) +- Some additional kernel thread index refactoring. ([#14107](https://github.com/rapidsai/cudf/pull/14107)) [@bdice](https://github.com/bdice) + # cuDF 24.04.00 (10 Apr 2024) ## 🚨 Breaking Changes diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e7f7a20e307..98c2ec0a22e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -105,7 +105,7 @@ Instructions for a minimal build environment without conda are included below. # create the conda environment (assuming in base `cudf` directory) # note: RAPIDS currently doesn't support `channel_priority: strict`; # use `channel_priority: flexible` instead -conda env create --name cudf_dev --file conda/environments/all_cuda-118_arch-x86_64.yaml +conda env create --name cudf_dev --file conda/environments/all_cuda-122_arch-x86_64.yaml # activate the environment conda activate cudf_dev ``` @@ -161,6 +161,8 @@ To build all libraries and tests, with Python packages in development mode, simp ./build.sh --pydevelop libcudf libcudf_kafka cudf dask_cudf cudf_kafka custreamz ``` +- **Note**: if Cython files (`*.pyx` or `*.pxd`) have changed, the Python build must be rerun. + To run the C++ tests, run ```bash @@ -217,7 +219,7 @@ cuda-gdb -ex r --args python .py ``` ```bash -cuda-memcheck python .py +compute-sanitizer --tool memcheck python .py ``` ### Device debug symbols diff --git a/README.md b/README.md index 8f9e57ff3ad..75ee405bc1f 100644 --- a/README.md +++ b/README.md @@ -14,13 +14,8 @@ You can import `cudf` directly and use it like `pandas`: ```python import cudf -import requests -from io import StringIO -url = "https://github.com/plotly/datasets/raw/master/tips.csv" -content = requests.get(url).content.decode("utf-8") - -tips_df = cudf.read_csv(StringIO(content)) +tips_df = cudf.read_csv("https://github.com/plotly/datasets/raw/master/tips.csv") tips_df["tip_percentage"] = tips_df["tip"] / tips_df["total_bill"] * 100 # display average tip by dining party size @@ -36,13 +31,8 @@ supported operations and falling back to pandas when needed: %load_ext cudf.pandas # pandas operations now use the GPU! import pandas as pd -import requests -from io import StringIO - -url = "https://github.com/plotly/datasets/raw/master/tips.csv" -content = requests.get(url).content.decode("utf-8") -tips_df = pd.read_csv(StringIO(content)) +tips_df = pd.read_csv("https://github.com/plotly/datasets/raw/master/tips.csv") tips_df["tip_percentage"] = tips_df["tip"] / tips_df["total_bill"] * 100 # display average tip by dining party size @@ -93,7 +83,7 @@ cuDF can be installed with conda (via [miniconda](https://docs.conda.io/projects ```bash conda install -c rapidsai -c conda-forge -c nvidia \ - cudf=24.04 python=3.11 cuda-version=12.2 + cudf=24.06 python=3.11 cuda-version=12.2 ``` We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD diff --git a/VERSION b/VERSION index 1f534289510..0bff6981a3d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.04.01 +24.06.00 diff --git a/build.sh b/build.sh index e5daf2f3451..43bb04f7a18 100755 --- a/build.sh +++ b/build.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # cuDF build script @@ -109,8 +109,8 @@ function buildAll { } function buildLibCudfJniInDocker { - local cudaVersion="11.5.0" - local imageName="cudf-build:${cudaVersion}-devel-centos7" + local cudaVersion="11.8.0" + local imageName="cudf-build:${cudaVersion}-devel-rocky8" local CMAKE_GENERATOR="${CMAKE_GENERATOR:-Ninja}" local workspaceDir="/rapids" local localMavenRepo=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"} @@ -120,7 +120,7 @@ function buildLibCudfJniInDocker { mkdir -p "$CUDF_JAR_JAVA_BUILD_DIR/libcudf-cmake-build" mkdir -p "$HOME/.ccache" "$HOME/.m2" nvidia-docker build \ - -f java/ci/Dockerfile.centos7 \ + -f java/ci/Dockerfile.rocky \ --build-arg CUDA_VERSION=${cudaVersion} \ -t $imageName . nvidia-docker run -it -u $(id -u):$(id -g) --rm \ diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 668d52e530b..db306046667 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -46,11 +46,9 @@ pushd docs/cudf make dirhtml mkdir -p "${RAPIDS_DOCS_DIR}/cudf/html" mv build/dirhtml/* "${RAPIDS_DOCS_DIR}/cudf/html" -if [[ "${RAPIDS_BUILD_TYPE}" != "pull-request" ]]; then - make text - mkdir -p "${RAPIDS_DOCS_DIR}/cudf/txt" - mv build/text/* "${RAPIDS_DOCS_DIR}/cudf/txt" -fi +make text +mkdir -p "${RAPIDS_DOCS_DIR}/cudf/txt" +mv build/text/* "${RAPIDS_DOCS_DIR}/cudf/txt" popd rapids-logger "Build dask-cuDF Sphinx docs" @@ -58,11 +56,9 @@ pushd docs/dask_cudf make dirhtml mkdir -p "${RAPIDS_DOCS_DIR}/dask-cudf/html" mv build/dirhtml/* "${RAPIDS_DOCS_DIR}/dask-cudf/html" -if [[ "${RAPIDS_BUILD_TYPE}" != "pull-request" ]]; then - make text - mkdir -p "${RAPIDS_DOCS_DIR}/dask-cudf/txt" - mv build/text/* "${RAPIDS_DOCS_DIR}/dask-cudf/txt" -fi +make text +mkdir -p "${RAPIDS_DOCS_DIR}/dask-cudf/txt" +mv build/text/* "${RAPIDS_DOCS_DIR}/dask-cudf/txt" popd rapids-upload-docs diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh index cde22bb70d1..f0886a28fd9 100755 --- a/ci/build_wheel_cudf.sh +++ b/ci/build_wheel_cudf.sh @@ -13,4 +13,4 @@ python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="cudf_${AUDITWHEEL_POLICY}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist diff --git a/ci/configure_cpp_static.sh b/ci/configure_cpp_static.sh new file mode 100755 index 00000000000..11d5585d98f --- /dev/null +++ b/ci/configure_cpp_static.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +source rapids-date-string + +rapids-logger "Configure static cpp build" + +ENV_YAML_DIR="$(mktemp -d)" +REQUIREMENTS_FILE="${ENV_YAML_DIR}/requirements.txt" + +rapids-dependency-file-generator \ + --output requirements \ + --file_key test_static_build \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee "${REQUIREMENTS_FILE}" + +python -m pip install -r "${REQUIREMENTS_FILE}" +pyenv rehash + +cmake -S cpp -B build_static -GNinja -DBUILD_SHARED_LIBS=OFF -DCUDF_USE_ARROW_STATIC=ON -DBUILD_TESTS=OFF diff --git a/ci/cudf_pandas_scripts/pandas-tests/diff.sh b/ci/cudf_pandas_scripts/pandas-tests/diff.sh index 37adabdb9c6..6cf70a2347f 100755 --- a/ci/cudf_pandas_scripts/pandas-tests/diff.sh +++ b/ci/cudf_pandas_scripts/pandas-tests/diff.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. # All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -7,18 +7,32 @@ # branch and the PR branch: # Hard-coded needs to match the version deduced by rapids-upload-artifacts-dir -MAIN_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py310.main-results.json -PR_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py310.pr-results.json -aws s3 cp $MAIN_ARTIFACT main-results.json +GH_JOB_NAME="pandas-tests-diff / build" +RAPIDS_FULL_VERSION=$(<./VERSION) +rapids-logger "Github job name: ${GH_JOB_NAME}" +rapids-logger "Rapids version: ${RAPIDS_FULL_VERSION}" + +PY_VER="39" +MAIN_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py${PY_VER}.main-${RAPIDS_FULL_VERSION}-results.json +PR_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py${PY_VER}.pr-${RAPIDS_FULL_VERSION}-results.json + +rapids-logger "Fetching latest available results from nightly" +aws s3api list-objects-v2 --bucket rapids-downloads --prefix "nightly/" --query "sort_by(Contents[?ends_with(Key, '_py${PY_VER}.main-${RAPIDS_FULL_VERSION}-results.json')], &LastModified)[::].[Key]" --output text | tee s3_output.txt +COMPARE_ENV=$(tail -n 1 s3_output.txt) +rapids-logger "Latest available results from nightly: ${COMPARE_ENV}" + +aws s3 cp "s3://rapids-downloads/${COMPARE_ENV}" main-results.json aws s3 cp $PR_ARTIFACT pr-results.json # Compute the diff and prepare job summary: python -m pip install pandas tabulate python ci/cudf_pandas_scripts/pandas-tests/job-summary.py main-results.json pr-results.json | tee summary.txt >> "$GITHUB_STEP_SUMMARY" -COMMENT=$(head -1 summary.txt) - +COMMENT=$(head -1 summary.txt | grep -oP '\d+/\d+ \(\d+\.\d+%\).*?(a decrease by|an increase by) \d+\.\d+%') echo "$COMMENT" - -# Magic name that the custom-job.yaml workflow reads and re-exports -echo "job_output=${COMMENT}" >> "${GITHUB_OUTPUT}" +jq --arg COMMENT "$COMMENT" --arg GH_JOB_NAME "$GH_JOB_NAME" -n \ + '{"context": "Pandas tests", + "description": $COMMENT, + "state":"success", + "job_name": $GH_JOB_NAME}' \ + > gh-status.json diff --git a/ci/cudf_pandas_scripts/pandas-tests/job-summary.py b/ci/cudf_pandas_scripts/pandas-tests/job-summary.py index 1e83e51ab04..93a815838b7 100644 --- a/ci/cudf_pandas_scripts/pandas-tests/job-summary.py +++ b/ci/cudf_pandas_scripts/pandas-tests/job-summary.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. # All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -40,7 +40,7 @@ def get_total_and_passed(results): "Merging this PR would result in " f"{pr_passed}/{pr_total} ({passing_percentage:.2f}%) " "Pandas tests passing, " - f"{rate_change_type} in the test pass rate by " + f"{rate_change_type} by " f"{pass_rate_change:.2f}%. " f"Trunk stats: {main_passed}/{main_total}." ) diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh index f3c37ecde26..abde5e5d160 100755 --- a/ci/cudf_pandas_scripts/pandas-tests/run.sh +++ b/ci/cudf_pandas_scripts/pandas-tests/run.sh @@ -6,25 +6,12 @@ set -euo pipefail PANDAS_TESTS_BRANCH=${1} - -rapids-logger "Running Pandas tests using $PANDAS_TESTS_BRANCH branch" +RAPIDS_FULL_VERSION=$(<./VERSION) +rapids-logger "Running Pandas tests using $PANDAS_TESTS_BRANCH branch and rapids-version $RAPIDS_FULL_VERSION" rapids-logger "PR number: ${RAPIDS_REF_NAME:-"unknown"}" -# Set the manylinux version used for downloading the wheels so that we test the -# newer ABI wheels on the newer images that support their installation. -# Need to disable pipefail for the head not to fail, see -# https://stackoverflow.com/questions/19120263/why-exit-code-141-with-grep-q -set +o pipefail -glibc_minor_version=$(ldd --version | head -1 | grep -o "[0-9]\.[0-9]\+" | tail -1 | cut -d '.' -f2) -set -o pipefail -manylinux_version="2_17" -if [[ ${glibc_minor_version} -ge 28 ]]; then - manylinux_version="2_28" -fi -manylinux="manylinux_${manylinux_version}" - RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="cudf_${manylinux}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,pandas-tests] RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"} @@ -40,9 +27,10 @@ bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \ --dist worksteal \ --report-log=${PANDAS_TESTS_BRANCH}.json 2>&1 +SUMMARY_FILE_NAME=${PANDAS_TESTS_BRANCH}-${RAPIDS_FULL_VERSION}-results.json # summarize the results and save them to artifacts: -python python/cudf/cudf/pandas/scripts/summarize-test-results.py --output json pandas-testing/${PANDAS_TESTS_BRANCH}.json > pandas-testing/${PANDAS_TESTS_BRANCH}-results.json +python python/cudf/cudf/pandas/scripts/summarize-test-results.py --output json pandas-testing/${PANDAS_TESTS_BRANCH}.json > pandas-testing/${SUMMARY_FILE_NAME} RAPIDS_ARTIFACTS_DIR=${RAPIDS_ARTIFACTS_DIR:-"${PWD}/artifacts"} mkdir -p "${RAPIDS_ARTIFACTS_DIR}" -mv pandas-testing/${PANDAS_TESTS_BRANCH}-results.json ${RAPIDS_ARTIFACTS_DIR}/ -rapids-upload-to-s3 ${RAPIDS_ARTIFACTS_DIR}/${PANDAS_TESTS_BRANCH}-results.json "${RAPIDS_ARTIFACTS_DIR}" +mv pandas-testing/${SUMMARY_FILE_NAME} ${RAPIDS_ARTIFACTS_DIR}/ +rapids-upload-to-s3 ${RAPIDS_ARTIFACTS_DIR}/${SUMMARY_FILE_NAME} "${RAPIDS_ARTIFACTS_DIR}" diff --git a/ci/cudf_pandas_scripts/run_tests.sh b/ci/cudf_pandas_scripts/run_tests.sh index 4f1e4bbf993..78945d37f22 100755 --- a/ci/cudf_pandas_scripts/run_tests.sh +++ b/ci/cudf_pandas_scripts/run_tests.sh @@ -1,5 +1,5 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. # All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -31,21 +31,8 @@ done if [ "$no_cudf" = true ]; then echo "Skipping cudf install" else - # Set the manylinux version used for downloading the wheels so that we test the - # newer ABI wheels on the newer images that support their installation. - # Need to disable pipefail for the head not to fail, see - # https://stackoverflow.com/questions/19120263/why-exit-code-141-with-grep-q - set +o pipefail - glibc_minor_version=$(ldd --version | head -1 | grep -o "[0-9]\.[0-9]\+" | tail -1 | cut -d '.' -f2) - set -o pipefail - manylinux_version="2_17" - if [[ ${glibc_minor_version} -ge 28 ]]; then - manylinux_version="2_28" - fi - manylinux="manylinux_${manylinux_version}" - RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" - RAPIDS_PY_WHEEL_NAME="cudf_${manylinux}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep + RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,cudf-pandas-tests] fi diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 7cacdfd39c3..beeb130f0f1 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -70,7 +70,7 @@ sed_runner "s/version == ${CURRENT_SHORT_TAG}/version == ${NEXT_SHORT_TAG}/g" RE sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md # Libcudf examples update -sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/fetch_dependencies.cmake +sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/versions.cmake # CI files for FILE in .github/workflows/*.yaml; do @@ -88,4 +88,5 @@ sed_runner "s/cudf-.*-SNAPSHOT/cudf-${NEXT_FULL_JAVA_TAG}/g" java/ci/README.md find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r -d '' filename; do sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}" sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}" + sed_runner "s@rapids-\${localWorkspaceFolderBasename}-[0-9.]*@rapids-\${localWorkspaceFolderBasename}-${NEXT_SHORT_TAG}@g" "${filename}" done diff --git a/ci/run_cudf_examples.sh b/ci/run_cudf_examples.sh new file mode 100755 index 00000000000..0819eacf636 --- /dev/null +++ b/ci/run_cudf_examples.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -uo pipefail + +EXITCODE=0 +trap "EXITCODE=1" ERR + +# Support customizing the examples' install location +cd "${INSTALL_PREFIX:-${CONDA_PREFIX:-/usr}}/bin/examples/libcudf/"; + +# compute-sanitizer not available before CUDA 11.6 +if [[ "${RAPIDS_CUDA_VERSION%.*}" < "11.6" ]]; then + echo "computer-sanitizer unavailable pre 11.6" + exit 0 +fi + +compute-sanitizer --tool memcheck basic_example + +compute-sanitizer --tool memcheck deduplication + +compute-sanitizer --tool memcheck custom_optimized names.csv +compute-sanitizer --tool memcheck custom_prealloc names.csv +compute-sanitizer --tool memcheck custom_with_malloc names.csv + +compute-sanitizer --tool memcheck parquet_io +compute-sanitizer --tool memcheck parquet_io example.parquet output.parquet DELTA_BINARY_PACKED ZSTD TRUE + +exit ${EXITCODE} diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index 995c8d7d71f..7865849bb74 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -17,6 +17,12 @@ rapids-logger "Run libcudf gtests" ./ci/run_cudf_ctests.sh -j20 SUITEERROR=$? +if (( ${SUITEERROR} == 0 )); then + rapids-logger "Run libcudf examples" + ./ci/run_cudf_examples.sh + SUITEERROR=$? +fi + if (( ${SUITEERROR} == 0 )); then rapids-logger "Run libcudf_kafka gtests" ./ci/run_cudf_kafka_ctests.sh -j20 diff --git a/ci/test_cpp_common.sh b/ci/test_cpp_common.sh old mode 100644 new mode 100755 index e1b2a367187..da847137a2b --- a/ci/test_cpp_common.sh +++ b/ci/test_cpp_common.sh @@ -31,7 +31,7 @@ rapids-print-env rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ - libcudf libcudf_kafka libcudf-tests + libcudf libcudf_kafka libcudf-tests libcudf-example rapids-logger "Check GPU usage" nvidia-smi diff --git a/ci/test_python_cudf.sh b/ci/test_python_cudf.sh index bacb54b3896..217dd2fd9a8 100755 --- a/ci/test_python_cudf.sh +++ b/ci/test_python_cudf.sh @@ -14,6 +14,14 @@ EXITCODE=0 trap "EXITCODE=1" ERR set +e +rapids-logger "pytest pylibcudf" +pushd python/cudf/cudf/pylibcudf_tests +python -m pytest \ + --cache-clear \ + --dist=worksteal \ + . +popd + rapids-logger "pytest cudf" ./ci/run_cudf_pytests.sh \ --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf.xml" \ diff --git a/ci/test_python_other.sh b/ci/test_python_other.sh index 8ecd02f70a1..cbc1dc1cb87 100755 --- a/ci/test_python_other.sh +++ b/ci/test_python_other.sh @@ -19,8 +19,8 @@ EXITCODE=0 trap "EXITCODE=1" ERR set +e -rapids-logger "pytest dask_cudf" -./ci/run_dask_cudf_pytests.sh \ +rapids-logger "pytest dask_cudf (dask-expr)" +DASK_DATAFRAME__QUERY_PLANNING=True ./ci/run_dask_cudf_pytests.sh \ --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf.xml" \ --numprocesses=8 \ --dist=worksteal \ @@ -29,10 +29,9 @@ rapids-logger "pytest dask_cudf" --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cudf-coverage.xml" \ --cov-report=term -# Run tests in dask_cudf/tests and dask_cudf/io/tests with dask-expr -rapids-logger "pytest dask_cudf + dask_expr" -DASK_DATAFRAME__QUERY_PLANNING=True ./ci/run_dask_cudf_pytests.sh \ - --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-expr.xml" \ +rapids-logger "pytest dask_cudf (legacy)" +DASK_DATAFRAME__QUERY_PLANNING=False ./ci/run_dask_cudf_pytests.sh \ + --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-legacy.xml" \ --numprocesses=8 \ --dist=loadscope \ . diff --git a/ci/test_wheel_cudf.sh b/ci/test_wheel_cudf.sh index af5779f478a..fdb61278d36 100755 --- a/ci/test_wheel_cudf.sh +++ b/ci/test_wheel_cudf.sh @@ -3,21 +3,8 @@ set -eou pipefail -# Set the manylinux version used for downloading the wheels so that we test the -# newer ABI wheels on the newer images that support their installation. -# Need to disable pipefail for the head not to fail, see -# https://stackoverflow.com/questions/19120263/why-exit-code-141-with-grep-q -set +o pipefail -glibc_minor_version=$(ldd --version | head -1 | grep -o "[0-9]\.[0-9]\+" | tail -1 | cut -d '.' -f2) -set -o pipefail -manylinux_version="2_17" -if [[ ${glibc_minor_version} -ge 28 ]]; then - manylinux_version="2_28" -fi -manylinux="manylinux_${manylinux_version}" - RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="cudf_${manylinux}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install $(echo ./dist/cudf*.whl)[test] @@ -26,18 +13,21 @@ RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"} RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/ mkdir -p "${RAPIDS_TESTS_DIR}" -# Run smoke tests for aarch64 pull requests -if [[ "$(arch)" == "aarch64" && ${RAPIDS_BUILD_TYPE} == "pull-request" ]]; then - rapids-logger "Run smoke tests for cudf" - python ./ci/wheel_smoke_test_cudf.py -else - rapids-logger "pytest cudf" - pushd python/cudf/cudf/tests - python -m pytest \ - --cache-clear \ - --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf.xml" \ - --numprocesses=8 \ - --dist=worksteal \ - . - popd -fi + +rapids-logger "pytest pylibcudf" +pushd python/cudf/cudf/pylibcudf_tests +python -m pytest \ + --cache-clear \ + --dist=worksteal \ + . +popd + +rapids-logger "pytest cudf" +pushd python/cudf/cudf/tests +python -m pytest \ + --cache-clear \ + --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf.xml" \ + --numprocesses=8 \ + --dist=worksteal \ + . +popd diff --git a/ci/test_wheel_dask_cudf.sh b/ci/test_wheel_dask_cudf.sh index 398eed43ea4..2b20b9d9ce4 100755 --- a/ci/test_wheel_dask_cudf.sh +++ b/ci/test_wheel_dask_cudf.sh @@ -7,20 +7,7 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist # Download the cudf built in the previous step -# Set the manylinux version used for downloading the wheels so that we test the -# newer ABI wheels on the newer images that support their installation. -# Need to disable pipefail for the head not to fail, see -# https://stackoverflow.com/questions/19120263/why-exit-code-141-with-grep-q -set +o pipefail -glibc_minor_version=$(ldd --version | head -1 | grep -o "[0-9]\.[0-9]\+" | tail -1 | cut -d '.' -f2) -set -o pipefail -manylinux_version="2_17" -if [[ ${glibc_minor_version} -ge 28 ]]; then - manylinux_version="2_28" -fi -manylinux="manylinux_${manylinux_version}" - -RAPIDS_PY_WHEEL_NAME="cudf_${manylinux}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep python -m pip install --no-deps ./local-cudf-dep/cudf*.whl # echo to expand wildcard before adding `[extra]` requires for pip @@ -31,19 +18,19 @@ RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/ mkdir -p "${RAPIDS_TESTS_DIR}" # Run tests in dask_cudf/tests and dask_cudf/io/tests -rapids-logger "pytest dask_cudf" +rapids-logger "pytest dask_cudf (dask-expr)" pushd python/dask_cudf/dask_cudf -python -m pytest \ +DASK_DATAFRAME__QUERY_PLANNING=True python -m pytest \ --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf.xml" \ --numprocesses=8 \ . popd -# Run tests in dask_cudf/tests and dask_cudf/io/tests with dask-expr -rapids-logger "pytest dask_cudf + dask_expr" +# Run tests in dask_cudf/tests and dask_cudf/io/tests (legacy) +rapids-logger "pytest dask_cudf (legacy)" pushd python/dask_cudf/dask_cudf -DASK_DATAFRAME__QUERY_PLANNING=True python -m pytest \ - --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-expr.xml" \ +DASK_DATAFRAME__QUERY_PLANNING=False python -m pytest \ + --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-legacy.xml" \ --numprocesses=8 \ . popd diff --git a/ci/wheel_smoke_test_cudf.py b/ci/wheel_smoke_test_cudf.py deleted file mode 100644 index a11a97039af..00000000000 --- a/ci/wheel_smoke_test_cudf.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. - -import cudf -import pyarrow as pa - -if __name__ == '__main__': - n_legs = pa.array([2, 4, 5, 100]) - animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"]) - names = ["n_legs", "animals"] - foo = pa.table([n_legs, animals], names=names) - df = cudf.DataFrame.from_arrow(foo) - assert df.loc[df["animals"] == "Centipede"]["n_legs"].iloc[0] == 100 - assert df.loc[df["animals"] == "Flamingo"]["n_legs"].iloc[0] == 2 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index eb4eca1cb12..804b09bab59 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -8,7 +8,6 @@ channels: - nvidia dependencies: - aiobotocore>=2.2.0 -- benchmark==1.8.0 - boto3>=1.21.21 - botocore>=1.24.21 - breathe>=4.35.0 @@ -27,29 +26,27 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.3 -- dask-cuda==24.4.* +- dask-cuda==24.6.* - dlpack>=0.8,<1.0 - doxygen=1.9.1 - fastavro>=0.22.9 - fmt>=10.1.1,<11 - fsspec>=0.6.0 - gcc_linux-64=11.* -- gmock>=1.13.0 -- gtest>=1.13.0 - hypothesis - identify>=2.5.20 - ipython -- libarrow-acero==14.0.2.* -- libarrow-dataset==14.0.2.* -- libarrow==14.0.2.* +- libarrow-acero==16.1.0.* +- libarrow-dataset==16.1.0.* +- libarrow==16.1.0.* - libcufile-dev=1.4.0.31 - libcufile=1.4.0.31 - libcurand-dev=10.3.0.86 - libcurand=10.3.0.86 -- libkvikio==24.4.* -- libparquet==14.0.2.* +- libkvikio==24.6.* +- libparquet==16.1.0.* - librdkafka>=1.9.0,<1.10.0a0 -- librmm==24.4.* +- librmm==24.6.* - make - moto>=4.0.8 - msgpack-python @@ -64,13 +61,12 @@ dependencies: - nvcomp==3.0.6 - nvtx>=0.2.1 - packaging -- pandas>=2.0,<2.2.2dev0 +- pandas>=2.0,<2.2.3dev0 - pandoc - pip - pre-commit -- protobuf>=3.20,<5 - ptxcompiler -- pyarrow==14.0.2.* +- pyarrow==16.1.0.* - pydata-sphinx-theme!=0.14.2 - pytest-benchmark - pytest-cases>=3.8.2 @@ -80,9 +76,9 @@ dependencies: - python-confluent-kafka>=1.9.0,<1.10.0a0 - python>=3.9,<3.12 - pytorch>=2.1.0 -- rapids-dask-dependency==24.4.* +- rapids-dask-dependency==24.6.* - rich -- rmm==24.4.* +- rmm==24.6.* - s3fs>=2022.3.0 - scikit-build-core>=0.7.0 - scipy @@ -96,7 +92,7 @@ dependencies: - streamz - sysroot_linux-64==2.17 - tokenizers==0.15.2 -- transformers==4.38.1 +- transformers==4.39.3 - typing_extensions>=4.0.0 - zlib>=1.2.13 - pip: diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml index b1b41f41803..89eac98f652 100644 --- a/conda/environments/all_cuda-122_arch-x86_64.yaml +++ b/conda/environments/all_cuda-122_arch-x86_64.yaml @@ -8,7 +8,6 @@ channels: - nvidia dependencies: - aiobotocore>=2.2.0 -- benchmark==1.8.0 - boto3>=1.21.21 - botocore>=1.24.21 - breathe>=4.35.0 @@ -28,27 +27,25 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.3 -- dask-cuda==24.4.* +- dask-cuda==24.6.* - dlpack>=0.8,<1.0 - doxygen=1.9.1 - fastavro>=0.22.9 - fmt>=10.1.1,<11 - fsspec>=0.6.0 - gcc_linux-64=11.* -- gmock>=1.13.0 -- gtest>=1.13.0 - hypothesis - identify>=2.5.20 - ipython -- libarrow-acero==14.0.2.* -- libarrow-dataset==14.0.2.* -- libarrow==14.0.2.* +- libarrow-acero==16.1.0.* +- libarrow-dataset==16.1.0.* +- libarrow==16.1.0.* - libcufile-dev - libcurand-dev -- libkvikio==24.4.* -- libparquet==14.0.2.* +- libkvikio==24.6.* +- libparquet==16.1.0.* - librdkafka>=1.9.0,<1.10.0a0 -- librmm==24.4.* +- librmm==24.6.* - make - moto>=4.0.8 - msgpack-python @@ -62,12 +59,11 @@ dependencies: - nvcomp==3.0.6 - nvtx>=0.2.1 - packaging -- pandas>=2.0,<2.2.2dev0 +- pandas>=2.0,<2.2.3dev0 - pandoc - pip - pre-commit -- protobuf>=3.20,<5 -- pyarrow==14.0.2.* +- pyarrow==16.1.0.* - pydata-sphinx-theme!=0.14.2 - pynvjitlink - pytest-benchmark @@ -78,9 +74,9 @@ dependencies: - python-confluent-kafka>=1.9.0,<1.10.0a0 - python>=3.9,<3.12 - pytorch>=2.1.0 -- rapids-dask-dependency==24.4.* +- rapids-dask-dependency==24.6.* - rich -- rmm==24.4.* +- rmm==24.6.* - s3fs>=2022.3.0 - scikit-build-core>=0.7.0 - scipy @@ -94,7 +90,7 @@ dependencies: - streamz - sysroot_linux-64==2.17 - tokenizers==0.15.2 -- transformers==4.38.1 +- transformers==4.39.3 - typing_extensions>=4.0.0 - zlib>=1.2.13 - pip: diff --git a/conda/recipes/cudf/conda_build_config.yaml b/conda/recipes/cudf/conda_build_config.yaml index c98c2701653..d399e440edd 100644 --- a/conda/recipes/cudf/conda_build_config.yaml +++ b/conda/recipes/cudf/conda_build_config.yaml @@ -4,7 +4,10 @@ c_compiler_version: cxx_compiler_version: - 11 -sysroot_version: +c_stdlib: + - sysroot + +c_stdlib_version: - "2.17" cmake_version: diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index cd9237bd7cb..e7245e67659 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -57,16 +57,14 @@ requirements: - {{ compiler('cuda') }} {% endif %} - cuda-version ={{ cuda_version }} - - sysroot_{{ target_platform }} {{ sysroot_version }} + - {{ stdlib("c") }} host: - - protobuf ==4.24.* - python - cython >=3.0.3 - scikit-build-core >=0.7.0 - - setuptools - dlpack >=0.8,<1.0 - numpy 1.23 - - pyarrow ==14.0.2.* + - pyarrow ==16.1.0.* - libcudf ={{ version }} - rmm ={{ minor_version }} {% if cuda_major == "11" %} @@ -78,14 +76,13 @@ requirements: {% endif %} - cuda-version ={{ cuda_version }} run: - - protobuf >=3.20,<5.0a0 - python - typing_extensions >=4.0.0 - - pandas >=2.0,<2.2.2dev0 + - pandas >=2.0,<2.2.3dev0 - cupy >=12.0.0 - numba >=0.57 - {{ pin_compatible('numpy', max_pin='x') }} - - {{ pin_compatible('pyarrow', max_pin='x') }} + - {{ pin_compatible('pyarrow', max_pin='x.x') }} - libcudf ={{ version }} - {{ pin_compatible('rmm', max_pin='x.x') }} - fsspec >=0.6.0 diff --git a/conda/recipes/cudf_kafka/conda_build_config.yaml b/conda/recipes/cudf_kafka/conda_build_config.yaml index c98c2701653..d399e440edd 100644 --- a/conda/recipes/cudf_kafka/conda_build_config.yaml +++ b/conda/recipes/cudf_kafka/conda_build_config.yaml @@ -4,7 +4,10 @@ c_compiler_version: cxx_compiler_version: - 11 -sysroot_version: +c_stdlib: + - sysroot + +c_stdlib_version: - "2.17" cmake_version: diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 45e41bf8de7..4d91cf6320c 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -53,7 +53,7 @@ requirements: - {{ compiler('cuda') }} {% endif %} - cuda-version ={{ cuda_version }} - - sysroot_{{ target_platform }} {{ sysroot_version }} + - {{ stdlib("c") }} host: - python - cython >=3.0.3 @@ -61,7 +61,6 @@ requirements: - cudf ={{ version }} - libcudf_kafka ={{ version }} - scikit-build-core >=0.7.0 - - setuptools {% if cuda_major != "11" %} - cuda-cudart-dev {% endif %} diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml index 53770956ebe..c01178bf732 100644 --- a/conda/recipes/libcudf/conda_build_config.yaml +++ b/conda/recipes/libcudf/conda_build_config.yaml @@ -10,20 +10,17 @@ cuda_compiler: cuda11_compiler: - nvcc -sysroot_version: +c_stdlib: + - sysroot + +c_stdlib_version: - "2.17" cmake_version: - ">=3.26.4" -gbench_version: - - "==1.8.0" - -gtest_version: - - ">=1.13.0" - libarrow_version: - - "==14.0.2" + - "==16.1.0" dlpack_version: - ">=0.8,<1.0" diff --git a/conda/recipes/libcudf/install_libcudf_example.sh b/conda/recipes/libcudf/install_libcudf_example.sh index e249688a03b..1a52dec99e3 100644 --- a/conda/recipes/libcudf/install_libcudf_example.sh +++ b/conda/recipes/libcudf/install_libcudf_example.sh @@ -1,4 +1,5 @@ #!/bin/bash -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. -./cpp/examples/build.sh +# build and install libcudf examples +./cpp/examples/build.sh --install diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 63eb83084dd..76115362b6c 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -43,7 +43,7 @@ requirements: {% endif %} - cuda-version ={{ cuda_version }} - ninja - - sysroot_{{ target_platform }} {{ sysroot_version }} + - {{ stdlib("c") }} host: - librmm ={{ minor_version }} - libkvikio ={{ minor_version }} @@ -69,9 +69,6 @@ requirements: - librdkafka {{ librdkafka_version }} - fmt {{ fmt_version }} - spdlog {{ spdlog_version }} - - benchmark {{ gbench_version }} - - gtest {{ gtest_version }} - - gmock {{ gtest_version }} - zlib {{ zlib_version }} outputs: @@ -108,8 +105,6 @@ outputs: - librmm ={{ minor_version }} - libkvikio ={{ minor_version }} - dlpack {{ dlpack_version }} - - gtest {{ gtest_version }} - - gmock {{ gtest_version }} test: commands: - test -f $PREFIX/lib/libcudf.so @@ -175,7 +170,7 @@ outputs: {% endif %} - cuda-version ={{ cuda_version }} - ninja - - sysroot_{{ target_platform }} {{ sysroot_version }} + - {{ stdlib("c") }} host: - {{ pin_subpackage('libcudf', exact=True) }} {% if cuda_major == "11" %} @@ -195,7 +190,7 @@ outputs: license: Apache-2.0 license_family: APACHE license_file: LICENSE - summary: libcudf_example library + summary: libcudf example executables - name: libcudf-tests version: {{ version }} script: install_libcudf_tests.sh @@ -221,9 +216,6 @@ outputs: {% else %} - libcurand-dev {% endif %} - - benchmark {{ gbench_version }} - - gtest {{ gtest_version }} - - gmock {{ gtest_version }} run: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} - {{ pin_subpackage('libcudf', exact=True) }} @@ -233,9 +225,6 @@ outputs: {% else %} - libcurand {% endif %} - - benchmark {{ gbench_version }} - - gtest {{ gtest_version }} - - gmock {{ gtest_version }} about: home: https://rapids.ai/ license: Apache-2.0 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 12837c69e59..1eab51c8827 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -192,6 +192,8 @@ include(cmake/thirdparty/get_cccl.cmake) include(cmake/thirdparty/get_rmm.cmake) # find arrow include(cmake/thirdparty/get_arrow.cmake) +# find flatbuffers +include(cmake/thirdparty/get_flatbuffers.cmake) # find dlpack include(cmake/thirdparty/get_dlpack.cmake) # find cuCollections, should come after including CCCL @@ -210,12 +212,14 @@ include(cmake/thirdparty/get_kvikio.cmake) include(cmake/thirdparty/get_fmt.cmake) # find spdlog include(cmake/thirdparty/get_spdlog.cmake) +# find nanoarrow +include(cmake/thirdparty/get_nanoarrow.cmake) # Workaround until https://github.com/rapidsai/rapids-cmake/issues/176 is resolved if(NOT BUILD_SHARED_LIBS) include("${rapids-cmake-dir}/export/find_package_file.cmake") list(APPEND METADATA_KINDS BUILD INSTALL) - list(APPEND dependencies KvikIO ZLIB nvcomp) + list(APPEND dependencies KvikIO ZLIB nvcomp nanoarrow) if(TARGET cufile::cuFile_interface) list(APPEND dependencies cuFile) endif() @@ -260,6 +264,7 @@ add_library( src/binaryop/compiled/Mod.cu src/binaryop/compiled/Mul.cu src/binaryop/compiled/NullEquals.cu + src/binaryop/compiled/NullNotEquals.cu src/binaryop/compiled/NullLogicalAnd.cu src/binaryop/compiled/NullLogicalOr.cu src/binaryop/compiled/NullMax.cu @@ -344,7 +349,6 @@ add_library( src/groupby/sort/group_replace_nulls.cu src/groupby/sort/group_sum_scan.cu src/groupby/sort/sort_helper.cu - src/hash/hashing.cu src/hash/md5_hash.cu src/hash/murmurhash3_x86_32.cu src/hash/murmurhash3_x64_128.cu @@ -353,11 +357,14 @@ add_library( src/hash/sha256_hash.cu src/hash/sha384_hash.cu src/hash/sha512_hash.cu - src/hash/spark_murmurhash3_x86_32.cu src/hash/xxhash_64.cu src/interop/dlpack.cpp src/interop/from_arrow.cu src/interop/to_arrow.cu + src/interop/to_arrow_device.cu + src/interop/from_arrow_device.cu + src/interop/to_arrow_schema.cpp + src/interop/to_arrow_utilities.cpp src/interop/detail/arrow_allocator.cpp src/io/avro/avro.cpp src/io/avro/avro_gpu.cu @@ -391,8 +398,9 @@ add_library( src/io/orc/dict_enc.cu src/io/orc/orc.cpp src/io/orc/reader_impl.cu + src/io/orc/reader_impl_chunking.cu + src/io/orc/reader_impl_decode.cu src/io/orc/reader_impl_helpers.cpp - src/io/orc/reader_impl_preprocess.cu src/io/orc/stats_enc.cu src/io/orc/stripe_data.cu src/io/orc/stripe_enc.cu @@ -424,7 +432,9 @@ add_library( src/io/text/bgzip_utils.cpp src/io/text/multibyte_split.cu src/io/utilities/arrow_io_source.cpp + src/io/utilities/base64_utilities.cpp src/io/utilities/column_buffer.cpp + src/io/utilities/column_buffer_strings.cu src/io/utilities/config_utils.cpp src/io/utilities/data_casting.cu src/io/utilities/data_sink.cpp @@ -450,7 +460,6 @@ add_library( src/join/mixed_join_semi.cu src/join/mixed_join_size_kernel.cu src/join/mixed_join_size_kernel_nulls.cu - src/join/mixed_join_size_kernels_semi.cu src/join/semi_join.cu src/json/json_path.cu src/lists/contains.cu @@ -584,12 +593,14 @@ add_library( src/strings/filling/fill.cu src/strings/filter_chars.cu src/strings/like.cu + src/strings/merge/merge.cu src/strings/padding.cu src/strings/regex/regcomp.cpp src/strings/regex/regexec.cpp src/strings/regex/regex_program.cpp src/strings/repeat_strings.cu src/strings/replace/backref_re.cu + src/strings/replace/find_replace.cu src/strings/replace/multi.cu src/strings/replace/multi_re.cu src/strings/replace/replace.cu @@ -734,6 +745,8 @@ target_include_directories( "$" "$" PRIVATE "$" + "$" + "$" INTERFACE "$" ) @@ -781,8 +794,8 @@ add_dependencies(cudf jitify_preprocess_run) target_link_libraries( cudf PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm - PRIVATE $ cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio - $ + PRIVATE $ cuco::cuco ZLIB::ZLIB nvcomp::nvcomp + kvikio::kvikio $ nanoarrow ) # Add Conda library, and include paths if specified @@ -842,14 +855,12 @@ if(CUDF_BUILD_TESTUTIL) add_library(cudf::cudftest_default_stream ALIAS cudftest_default_stream) - # Needs to be static so that we support usage of static builds of gtest which doesn't compile with - # fPIC enabled and therefore can't be embedded into shared libraries. add_library( - cudftestutil STATIC + cudftestutil SHARED tests/io/metadata_utilities.cpp - tests/utilities/base_fixture.cpp tests/utilities/column_utilities.cu tests/utilities/debug_utilities.cu + tests/utilities/random_seed.cpp tests/utilities/table_utilities.cu tests/utilities/tdigest_utilities.cu ) @@ -874,8 +885,8 @@ if(CUDF_BUILD_TESTUTIL) target_link_libraries( cudftestutil - PUBLIC GTest::gmock GTest::gtest Threads::Threads cudf cudftest_default_stream - PRIVATE $ + PUBLIC Threads::Threads cudf cudftest_default_stream + PRIVATE GTest::gmock GTest::gtest $ ) target_include_directories( @@ -954,7 +965,7 @@ endif() if(CUDF_BUILD_BENCHMARKS) # Find or install GoogleBench include(${rapids-cmake-dir}/cpm/gbench.cmake) - rapids_cpm_gbench() + rapids_cpm_gbench(BUILD_STATIC) # Find or install nvbench include(cmake/thirdparty/get_nvbench.cmake) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index c82e475dece..10f645dfec0 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -25,7 +25,7 @@ target_compile_options( target_link_libraries( cudf_datagen PUBLIC GTest::gmock GTest::gtest benchmark::benchmark nvbench::nvbench Threads::Threads cudf - cudftestutil nvtx3-cpp + cudftestutil nvtx3::nvtx3-cpp PRIVATE $ ) @@ -40,7 +40,7 @@ target_include_directories( # Use an OBJECT library so we only compile these helper source files only once add_library( - cudf_benchmark_common OBJECT "${CUDF_SOURCE_DIR}/tests/utilities/base_fixture.cpp" + cudf_benchmark_common OBJECT "${CUDF_SOURCE_DIR}/tests/utilities/random_seed.cpp" synchronization/synchronization.cpp io/cuio_common.cpp ) target_link_libraries(cudf_benchmark_common PRIVATE cudf_datagen $) @@ -208,8 +208,9 @@ ConfigureNVBench( ) # ################################################################################################## -# * reduction benchmark --------------------------------------------------------------------------- +# * replace benchmark --------------------------------------------------------------------------- ConfigureBench(REPLACE_BENCH replace/clamp.cpp replace/nans.cpp) +ConfigureNVBench(REPLACE_NVBENCH replace/nulls.cpp) # ################################################################################################## # * filling benchmark ----------------------------------------------------------------------------- @@ -235,7 +236,9 @@ ConfigureNVBench(HASHING_NVBENCH hashing/hash.cpp) # ################################################################################################## # * merge benchmark ------------------------------------------------------------------------------- ConfigureBench(MERGE_BENCH merge/merge.cpp) -ConfigureNVBench(MERGE_NVBENCH merge/merge_structs.cpp merge/merge_lists.cpp) +ConfigureNVBench( + MERGE_NVBENCH merge/merge_lists.cpp merge/merge_structs.cpp merge/merge_strings.cpp +) # ################################################################################################## # * null_mask benchmark --------------------------------------------------------------------------- @@ -253,6 +256,11 @@ ConfigureNVBench( PARQUET_READER_NVBENCH io/parquet/parquet_reader_input.cpp io/parquet/parquet_reader_options.cpp ) +# ################################################################################################## +# * parquet multithread reader benchmark +# ---------------------------------------------------------------------- +ConfigureNVBench(PARQUET_MULTITHREAD_READER_NVBENCH io/parquet/parquet_reader_multithread.cpp) + # ################################################################################################## # * orc reader benchmark -------------------------------------------------------------------------- ConfigureNVBench(ORC_READER_NVBENCH io/orc/orc_reader_input.cpp io/orc/orc_reader_options.cpp) @@ -298,7 +306,6 @@ ConfigureBench( string/copy.cu string/factory.cu string/filter.cpp - string/find.cpp string/repeat_strings.cpp string/replace.cpp string/slice.cpp @@ -315,6 +322,7 @@ ConfigureNVBench( string/copy_range.cpp string/count.cpp string/extract.cpp + string/find.cpp string/gather.cpp string/join_strings.cpp string/lengths.cpp @@ -327,9 +335,10 @@ ConfigureNVBench( # ################################################################################################## # * json benchmark ------------------------------------------------------------------- -ConfigureBench(JSON_BENCH json/json.cu) +ConfigureNVBench(JSON_NVBENCH json/json.cu) ConfigureNVBench(FST_NVBENCH io/fst.cu) ConfigureNVBench(JSON_READER_NVBENCH io/json/nested_json.cpp io/json/json_reader_input.cpp) +ConfigureNVBench(JSON_READER_OPTION_NVBENCH io/json/json_reader_option.cpp) ConfigureNVBench(JSON_WRITER_NVBENCH io/json/json_writer.cpp) # ################################################################################################## @@ -337,6 +346,16 @@ ConfigureNVBench(JSON_WRITER_NVBENCH io/json/json_writer.cpp) ConfigureNVBench(MULTIBYTE_SPLIT_NVBENCH io/text/multibyte_split.cpp) target_link_libraries(MULTIBYTE_SPLIT_NVBENCH PRIVATE ZLIB::ZLIB) +# ################################################################################################## +# * decimal benchmark +# --------------------------------------------------------------------------------- +ConfigureNVBench(DECIMAL_NVBENCH decimal/convert_floating.cpp) + +# ################################################################################################## +# * reshape benchmark +# --------------------------------------------------------------------------------- +ConfigureNVBench(RESHAPE_NVBENCH reshape/interleave.cpp) + add_custom_target( run_benchmarks DEPENDS CUDF_BENCHMARKS diff --git a/cpp/benchmarks/binaryop/compiled_binaryop.cpp b/cpp/benchmarks/binaryop/compiled_binaryop.cpp index a1131df4472..7086a61c7c5 100644 --- a/cpp/benchmarks/binaryop/compiled_binaryop.cpp +++ b/cpp/benchmarks/binaryop/compiled_binaryop.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -111,5 +111,6 @@ BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NOT_EQUAL, bool BINARYOP_BENCHMARK_DEFINE(timestamp_s, timestamp_s, LESS, bool); BINARYOP_BENCHMARK_DEFINE(timestamp_ms, timestamp_s, GREATER, bool); BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, NULL_EQUALS, bool); +BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, NULL_NOT_EQUALS, bool); BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NULL_MAX, decimal32); BINARYOP_BENCHMARK_DEFINE(timestamp_D, timestamp_s, NULL_MIN, timestamp_s); diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu index 9857aac4473..6df2cb44adc 100644 --- a/cpp/benchmarks/common/generate_input.cu +++ b/cpp/benchmarks/common/generate_input.cu @@ -324,10 +324,11 @@ struct random_value_fn()>> { distribution_fn dist; std::optional scale; - random_value_fn(distribution_params const& desc) + random_value_fn(distribution_params const& desc) : lower_bound{desc.lower_bound}, upper_bound{desc.upper_bound}, - dist{make_distribution(desc.id, desc.lower_bound, desc.upper_bound)} + dist{make_distribution(desc.id, lower_bound, upper_bound)}, + scale{desc.scale} { } diff --git a/cpp/benchmarks/common/generate_input.hpp b/cpp/benchmarks/common/generate_input.hpp index 31dc2673d70..68d3dc492f5 100644 --- a/cpp/benchmarks/common/generate_input.hpp +++ b/cpp/benchmarks/common/generate_input.hpp @@ -182,9 +182,17 @@ struct distribution_params -struct distribution_params()>> {}; +struct distribution_params()>> { + distribution_id id; + typename T::rep lower_bound; + typename T::rep upper_bound; + std::optional scale; +}; /** * @brief Returns a vector of types, corresponding to the input type or a type group. @@ -226,7 +234,7 @@ class data_profile { cudf::type_id::INT32, {distribution_id::GEOMETRIC, 0, 64}, 2}; distribution_params struct_dist_desc{ {cudf::type_id::INT32, cudf::type_id::FLOAT32, cudf::type_id::STRING}, 2}; - std::map> decimal_params; + std::map> decimal_params; double bool_probability_true = 0.5; std::optional null_probability = 0.01; @@ -300,16 +308,21 @@ class data_profile { } template ()>* = nullptr> - distribution_params get_distribution_params() const + distribution_params get_distribution_params() const { using rep = typename T::rep; auto it = decimal_params.find(cudf::type_to_id()); if (it == decimal_params.end()) { auto const range = default_range(); - return distribution_params{default_distribution_id(), range.first, range.second}; + auto const scale = std::optional{}; + return distribution_params{ + default_distribution_id(), range.first, range.second, scale}; } else { auto& desc = it->second; - return {desc.id, static_cast(desc.lower_bound), static_cast(desc.upper_bound)}; + return {desc.id, + static_cast(desc.lower_bound), + static_cast(desc.upper_bound), + desc.scale}; } } @@ -359,6 +372,23 @@ class data_profile { } } + // Users should pass integral values for bounds when setting the parameters for fixed-point. + // Otherwise the call with have no effect. + template , T>* = nullptr> + void set_distribution_params(Type_enum type_or_group, + distribution_id dist, + T lower_bound, + T upper_bound, + numeric::scale_type scale) + { + for (auto tid : get_type_or_group(static_cast(type_or_group))) { + decimal_params[tid] = { + dist, static_cast<__int128_t>(lower_bound), static_cast<__int128_t>(upper_bound), scale}; + } + } + template (), T>* = nullptr> void set_distribution_params(Type_enum type_or_group, distribution_id dist, diff --git a/cpp/benchmarks/copying/shift.cu b/cpp/benchmarks/copying/shift.cu index e1169e3bcd6..efc385cf10b 100644 --- a/cpp/benchmarks/copying/shift.cu +++ b/cpp/benchmarks/copying/shift.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,11 +21,13 @@ #include #include +#include + template > std::unique_ptr make_scalar( - T value = 0, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + T value = 0, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { auto s = new ScalarType(value, true, stream, mr); return std::unique_ptr(s); diff --git a/cpp/benchmarks/decimal/convert_floating.cpp b/cpp/benchmarks/decimal/convert_floating.cpp new file mode 100644 index 00000000000..a367036c494 --- /dev/null +++ b/cpp/benchmarks/decimal/convert_floating.cpp @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include + +#include + +// This benchmark compares the cost of converting decimal <--> floating point +template +void bench_cast_decimal(nvbench::state& state, nvbench::type_list) +{ + static constexpr bool is_input_floating = std::is_floating_point_v; + static constexpr bool is_output_floating = std::is_floating_point_v; + + static constexpr bool is_double = + std::is_same_v || std::is_same_v; + static constexpr bool is_32bit = + std::is_same_v || std::is_same_v; + static constexpr bool is_128bit = std::is_same_v || + std::is_same_v; + + // Skip floating --> floating and decimal --> decimal + if constexpr (is_input_floating == is_output_floating) { + state.skip("Meaningless conversion."); + return; + } + + // Skip float <--> dec128 + if constexpr (!is_double && is_128bit) { + state.skip("Ignoring float <--> dec128."); + return; + } + + // Get settings + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const exp_mode = state.get_int64("exp_range"); + + // Exponent range: Range size is 10^6 + // These probe the edges of the float and double ranges, as well as more common values + int const exp_min_array[] = {-307, -37, -14, -3, 8, 31, 301}; + int const exp_range_size = 6; + int const exp_min = exp_min_array[exp_mode]; + int const exp_max = exp_min + exp_range_size; + + // With exp range size of 6, decimal output (generated or casted-to) has 7 digits of precision + int const extra_digits_precision = 1; + + // Exclude end range of double from float test + if (!is_double && ((exp_mode == 0) || (exp_mode == 6))) { + state.skip("Range beyond end of float tests."); + return; + } + + // The current float <--> decimal conversion algorithm is limited + static constexpr bool is_64bit = !is_32bit && !is_128bit; + if (is_32bit && (exp_mode != 3)) { + state.skip("Decimal32 conversion only works up to scale factors of 10^9."); + return; + } + if (is_64bit && ((exp_mode < 2) || (exp_mode > 4))) { + state.skip("Decimal64 conversion only works up to scale factors of 10^18."); + return; + } + if (is_128bit && ((exp_mode == 0) || (exp_mode == 6))) { + state.skip("Decimal128 conversion only works up to scale factors of 10^38."); + return; + } + + // Type IDs + auto const input_id = cudf::type_to_id(); + auto const output_id = cudf::type_to_id(); + + // Create data profile and scale + auto const [output_scale, profile] = [&]() { + if constexpr (is_input_floating) { + // Range for generated floating point values + auto get_pow10 = [](auto exp10) { + return std::pow(static_cast(10), static_cast(exp10)); + }; + InputType const floating_range_min = get_pow10(exp_min); + InputType const floating_range_max = get_pow10(exp_max); + + // With exp range size of 6, output has 7 decimal digits of precision + auto const decimal_output_scale = exp_min - extra_digits_precision; + + // Input distribution + data_profile const profile = data_profile_builder().distribution( + input_id, distribution_id::NORMAL, floating_range_min, floating_range_max); + + return std::pair{decimal_output_scale, profile}; + + } else { // Generating decimals + + using decimal_rep_type = typename InputType::rep; + + // For exp range size 6 and precision 7, generates ints between 10 and 10^7, + // with scale factor of: exp_max - 7. This matches floating point generation. + int const digits_precision = exp_range_size + extra_digits_precision; + auto const decimal_input_scale = numeric::scale_type{exp_max - digits_precision}; + + // Range for generated integer values + auto get_pow10 = [](auto exp10) { + return numeric::detail::ipow(exp10); + }; + auto const decimal_range_min = get_pow10(digits_precision - exp_range_size); + auto const decimal_range_max = get_pow10(digits_precision); + + // Input distribution + data_profile const profile = data_profile_builder().distribution(input_id, + distribution_id::NORMAL, + decimal_range_min, + decimal_range_max, + decimal_input_scale); + + return std::pair{0, profile}; + } + }(); + + // Generate input data + auto const input_col = create_random_column(input_id, row_count{num_rows}, profile); + auto const input_view = input_col->view(); + + // Output type + auto const output_type = + !is_input_floating ? cudf::data_type(output_id) : cudf::data_type(output_id, output_scale); + + // Stream + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + + // Run benchmark + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch&) { cudf::cast(input_view, output_type); }); + + // Throughput statistics + state.add_element_count(num_rows); + state.add_global_memory_reads(num_rows); + state.add_global_memory_writes(num_rows); +} + +// Data types +using data_types = + nvbench::type_list; + +NVBENCH_BENCH_TYPES(bench_cast_decimal, NVBENCH_TYPE_AXES(data_types, data_types)) + .set_name("decimal_floating_conversion") + .set_type_axes_names({"InputType", "OutputType"}) + .add_int64_power_of_two_axis("num_rows", {28}) + .add_int64_axis("exp_range", nvbench::range(0, 6)); diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp index adde0ae1720..8c8d6756b00 100644 --- a/cpp/benchmarks/fixture/benchmark_fixture.hpp +++ b/cpp/benchmarks/fixture/benchmark_fixture.hpp @@ -120,6 +120,7 @@ class memory_stats_logger { } private: + // TODO change to resource_ref once set_current_device_resource supports it rmm::mr::device_memory_resource* existing_mr; rmm::mr::statistics_resource_adaptor statistics_mr; }; diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp index 4e4eec3547f..ac0cab4071b 100644 --- a/cpp/benchmarks/fixture/nvbench_fixture.hpp +++ b/cpp/benchmarks/fixture/nvbench_fixture.hpp @@ -45,6 +45,8 @@ static std::string cuio_host_mem_param{ * Initializes the default memory resource to use the RMM pool device resource. */ struct nvbench_base_fixture { + using host_pooled_mr_t = rmm::mr::pool_memory_resource; + inline auto make_cuda() { return std::make_shared(); } inline auto make_pool() @@ -90,12 +92,14 @@ struct nvbench_base_fixture { inline rmm::host_async_resource_ref make_cuio_host_pinned_pool() { - using host_pooled_mr = rmm::mr::pool_memory_resource; - static std::shared_ptr mr = std::make_shared( - std::make_shared().get(), - size_t{1} * 1024 * 1024 * 1024); + if (!this->host_pooled_mr) { + // Don't store in static, as the CUDA context may be destroyed before static destruction + this->host_pooled_mr = std::make_shared( + std::make_shared().get(), + size_t{1} * 1024 * 1024 * 1024); + } - return *mr; + return *this->host_pooled_mr; } inline rmm::host_async_resource_ref create_cuio_host_memory_resource(std::string const& mode) @@ -126,9 +130,16 @@ struct nvbench_base_fixture { std::cout << "CUIO host memory resource = " << cuio_host_mode << "\n"; } + ~nvbench_base_fixture() + { + // Ensure the the pool is freed before the CUDA context is destroyed: + cudf::io::set_host_memory_resource(this->make_cuio_host_pinned()); + } + std::shared_ptr mr; std::string rmm_mode{"pool"}; + std::shared_ptr host_pooled_mr; std::string cuio_host_mode{"pinned"}; }; diff --git a/cpp/benchmarks/fixture/nvbench_main.cpp b/cpp/benchmarks/fixture/nvbench_main.cpp index f46cb11a6c3..5dfd67b1c54 100644 --- a/cpp/benchmarks/fixture/nvbench_main.cpp +++ b/cpp/benchmarks/fixture/nvbench_main.cpp @@ -15,29 +15,44 @@ */ #include -#define NVBENCH_ENVIRONMENT cudf::nvbench_base_fixture #include +#include #include +namespace cudf { + // strip off the rmm_mode and cuio_host_mem parameters before passing the // remaining arguments to nvbench::option_parser -#undef NVBENCH_MAIN_PARSE -#define NVBENCH_MAIN_PARSE(argc, argv) \ - nvbench::option_parser parser; \ - std::vector m_args; \ - for (int i = 0; i < argc; ++i) { \ - std::string arg = argv[i]; \ - if (arg == cudf::detail::rmm_mode_param) { \ - i += 2; \ - } else if (arg == cudf::detail::cuio_host_mem_param) { \ - i += 2; \ - } else { \ - m_args.push_back(arg); \ - } \ - } \ - parser.parse(m_args) +void benchmark_arg_handler(std::vector& args) +{ + std::vector _cudf_tmp_args; + + for (std::size_t i = 0; i < args.size(); ++i) { + std::string arg = args[i]; + if (arg == cudf::detail::rmm_mode_param) { + i++; // skip the next argument + } else if (arg == cudf::detail::cuio_host_mem_param) { + i++; // skip the next argument + } else { + _cudf_tmp_args.push_back(arg); + } + } + + args = _cudf_tmp_args; +} + +} // namespace cudf + +// Install arg handler +#undef NVBENCH_MAIN_CUSTOM_ARGS_HANDLER +#define NVBENCH_MAIN_CUSTOM_ARGS_HANDLER(args) cudf::benchmark_arg_handler(args) + +// Global fixture setup: +#undef NVBENCH_MAIN_INITIALIZE_CUSTOM_POST +#define NVBENCH_MAIN_INITIALIZE_CUSTOM_POST(argc, argv) \ + [[maybe_unused]] auto env_state = cudf::nvbench_base_fixture(argc, argv); // this declares/defines the main() function using the definitions above NVBENCH_MAIN diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp index 3d5be41e25f..6e0b32219ce 100644 --- a/cpp/benchmarks/io/cuio_common.hpp +++ b/cpp/benchmarks/io/cuio_common.hpp @@ -39,6 +39,10 @@ class cuio_source_sink_pair { // delete the temporary file std::remove(file_name.c_str()); } + // move constructor + cuio_source_sink_pair(cuio_source_sink_pair&& ss) = default; + cuio_source_sink_pair& operator=(cuio_source_sink_pair&& ss) = default; + /** * @brief Created a source info of the set type * diff --git a/cpp/benchmarks/io/json/json_reader_option.cpp b/cpp/benchmarks/io/json/json_reader_option.cpp new file mode 100644 index 00000000000..378134a2010 --- /dev/null +++ b/cpp/benchmarks/io/json/json_reader_option.cpp @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include + +// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to +// run on most GPUs, but large enough to allow highest throughput +constexpr size_t data_size = 512 << 20; +constexpr cudf::size_type num_cols = 64; + +template +void BM_json_read_options(nvbench::state& state, nvbench::type_list>) +{ + constexpr auto json_lines_bool = JsonLines == json_lines::YES; + + cuio_source_sink_pair source_sink(io_type::HOST_BUFFER); + auto const data_types = get_type_or_group({static_cast(data_type::INTEGRAL), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::STRING), + static_cast(data_type::LIST), + static_cast(data_type::STRUCT)}); + + auto const tbl = create_random_table( + cycle_dtypes(data_types, num_cols), table_size_bytes{data_size}, data_profile_builder()); + auto const view = tbl->view(); + cudf::io::json_writer_options const write_opts = + cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view) + .lines(json_lines_bool) + .na_rep("null") + .rows_per_chunk(100'000); + cudf::io::write_json(write_opts); + + cudf::io::json_reader_options read_options = + cudf::io::json_reader_options::builder(source_sink.make_source_info()).lines(json_lines_bool); + + auto mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec( + nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) { + try_drop_l3_cache(); + timer.start(); + auto const result = cudf::io::read_json(read_options); + auto const num_rows_read = result.tbl->num_rows(); + auto const num_cols_read = result.tbl->num_columns(); + timer.stop(); + CUDF_EXPECTS(num_rows_read == view.num_rows(), "Benchmark did not read the entire table"); + CUDF_EXPECTS(num_cols_read == num_cols, "Unexpected number of columns"); + }); + + auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + auto const data_processed = data_size * num_cols / view.num_columns(); + state.add_element_count(static_cast(data_processed) / elapsed_time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); +} + +template +void BM_jsonlines_read_options(nvbench::state& state, + nvbench::type_list, + nvbench::enum_type, + nvbench::enum_type, + nvbench::enum_type, + nvbench::enum_type>) +{ + constexpr auto normalize_single_quotes_bool = + NormalizeSingleQuotes == normalize_single_quotes::YES; + constexpr auto normalize_whitespace_bool = NormalizeWhitespace == normalize_whitespace::YES; + constexpr auto mixed_types_as_string_bool = MixedTypesAsString == mixed_types_as_string::YES; + constexpr auto recovery_mode_enum = RecoveryMode == recovery_mode::RECOVER_WITH_NULL + ? cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL + : cudf::io::json_recovery_mode_t::FAIL; + size_t const num_chunks = state.get_int64("num_chunks"); + if (num_chunks > 1 && RowSelection == row_selection::ALL) { + state.skip( + "No point running the same benchmark multiple times for different num_chunks when all rows " + "are being selected anyway"); + return; + } + + cuio_source_sink_pair source_sink(io_type::HOST_BUFFER); + auto const data_types = get_type_or_group({static_cast(data_type::INTEGRAL), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::STRING), + static_cast(data_type::LIST), + static_cast(data_type::STRUCT)}); + + auto const tbl = create_random_table( + cycle_dtypes(data_types, num_cols), table_size_bytes{data_size}, data_profile_builder()); + auto const view = tbl->view(); + cudf::io::json_writer_options const write_opts = + cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view) + .lines(true) + .na_rep("null") + .rows_per_chunk(100'000); + cudf::io::write_json(write_opts); + + cudf::io::json_reader_options read_options = + cudf::io::json_reader_options::builder(source_sink.make_source_info()) + .lines(true) + .normalize_single_quotes(normalize_single_quotes_bool) + .normalize_whitespace(normalize_whitespace_bool) + .mixed_types_as_string(mixed_types_as_string_bool) + .recovery_mode(recovery_mode_enum); + + size_t const chunk_size = cudf::util::div_rounding_up_safe(source_sink.size(), num_chunks); + auto mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec( + nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) { + try_drop_l3_cache(); + cudf::size_type num_rows_read = 0; + cudf::size_type num_cols_read = 0; + timer.start(); + switch (RowSelection) { + case row_selection::ALL: { + auto const result = cudf::io::read_json(read_options); + num_rows_read = result.tbl->num_rows(); + num_cols_read = result.tbl->num_columns(); + break; + } + case row_selection::BYTE_RANGE: { + for (uint64_t chunk = 0; chunk < num_chunks; chunk++) { + read_options.set_byte_range_offset(chunk * chunk_size); + read_options.set_byte_range_size(chunk_size); + auto const result = cudf::io::read_json(read_options); + num_rows_read += result.tbl->num_rows(); + num_cols_read = result.tbl->num_columns(); + if (num_cols_read) + CUDF_EXPECTS(num_cols_read == num_cols, "Unexpected number of columns"); + } + break; + } + default: CUDF_FAIL("Unsupported row selection method"); + } + timer.stop(); + CUDF_EXPECTS(num_rows_read == view.num_rows(), "Benchmark did not read the entire table"); + }); + + auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + auto const data_processed = data_size * num_cols / view.num_columns(); + state.add_element_count(static_cast(data_processed) / elapsed_time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); +} + +NVBENCH_BENCH_TYPES(BM_jsonlines_read_options, + NVBENCH_TYPE_AXES(nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list)) + .set_name("jsonlines_reader_normalize_single_quotes") + .set_type_axes_names({"row_selection", + "normalize_single_quotes", + "normalize_whitespace", + "mixed_types_as_string", + "recovery_mode"}) + .set_min_samples(6) + .add_int64_axis("num_chunks", nvbench::range(1, 1, 1)); + +NVBENCH_BENCH_TYPES( + BM_jsonlines_read_options, + NVBENCH_TYPE_AXES(nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list)) + .set_name("jsonlines_reader_normalize_whitespace") + .set_type_axes_names({"row_selection", + "normalize_single_quotes", + "normalize_whitespace", + "mixed_types_as_string", + "recovery_mode"}) + .set_min_samples(6) + .add_int64_axis("num_chunks", nvbench::range(1, 1, 1)); + +NVBENCH_BENCH_TYPES( + BM_jsonlines_read_options, + NVBENCH_TYPE_AXES(nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list)) + .set_name("jsonlines_reader_mixed_types_as_string") + .set_type_axes_names({"row_selection", + "normalize_single_quotes", + "normalize_whitespace", + "mixed_types_as_string", + "recovery_mode"}) + .set_min_samples(6) + .add_int64_axis("num_chunks", nvbench::range(1, 1, 1)); + +NVBENCH_BENCH_TYPES( + BM_jsonlines_read_options, + NVBENCH_TYPE_AXES(nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list)) + .set_name("jsonlines_reader_row_selection") + .set_type_axes_names({"row_selection", + "normalize_single_quotes", + "normalize_whitespace", + "mixed_types_as_string", + "recovery_mode"}) + .set_min_samples(6) + .add_int64_axis("num_chunks", nvbench::range(1, 5, 1)); + +NVBENCH_BENCH_TYPES(BM_json_read_options, + NVBENCH_TYPE_AXES(nvbench::enum_type_list)) + .set_name("json_reader") + .set_type_axes_names({"json_lines"}) + .set_min_samples(6); diff --git a/cpp/benchmarks/io/nvbench_helpers.hpp b/cpp/benchmarks/io/nvbench_helpers.hpp index dd96f6fa4cd..8b79912c7ee 100644 --- a/cpp/benchmarks/io/nvbench_helpers.hpp +++ b/cpp/benchmarks/io/nvbench_helpers.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -169,3 +169,68 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS( } }, [](auto) { return std::string{}; }) + +enum class json_lines : bool { YES, NO }; + +enum class normalize_single_quotes : bool { YES, NO }; + +enum class normalize_whitespace : bool { YES, NO }; + +enum class mixed_types_as_string : bool { YES, NO }; + +enum class recovery_mode : bool { FAIL, RECOVER_WITH_NULL }; + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + json_lines, + [](auto value) { + switch (value) { + case json_lines::YES: return "YES"; + case json_lines::NO: return "NO"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + normalize_single_quotes, + [](auto value) { + switch (value) { + case normalize_single_quotes::YES: return "YES"; + case normalize_single_quotes::NO: return "NO"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + normalize_whitespace, + [](auto value) { + switch (value) { + case normalize_whitespace::YES: return "YES"; + case normalize_whitespace::NO: return "NO"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + mixed_types_as_string, + [](auto value) { + switch (value) { + case mixed_types_as_string::YES: return "YES"; + case mixed_types_as_string::NO: return "NO"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + recovery_mode, + [](auto value) { + switch (value) { + case recovery_mode::FAIL: return "FAIL"; + case recovery_mode::RECOVER_WITH_NULL: return "RECOVER_WITH_NULL"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index fdb7dbe59b8..b7c214a8374 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,31 +24,59 @@ #include +namespace { + // Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to // run on most GPUs, but large enough to allow highest throughput -constexpr int64_t data_size = 512 << 20; constexpr cudf::size_type num_cols = 64; +constexpr std::size_t data_size = 512 << 20; +constexpr std::size_t Mbytes = 1024 * 1024; +template void orc_read_common(cudf::size_type num_rows_to_read, cuio_source_sink_pair& source_sink, nvbench::state& state) { - cudf::io::orc_reader_options read_opts = - cudf::io::orc_reader_options::builder(source_sink.make_source_info()); + auto const read_opts = + cudf::io::orc_reader_options::builder(source_sink.make_source_info()).build(); auto mem_stats_logger = cudf::memory_stats_logger(); // init stats logger state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - state.exec( - nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) { - try_drop_l3_cache(); - - timer.start(); - auto const result = cudf::io::read_orc(read_opts); - timer.stop(); - CUDF_EXPECTS(result.tbl->num_columns() == num_cols, "Unexpected number of columns"); - CUDF_EXPECTS(result.tbl->num_rows() == num_rows_to_read, "Unexpected number of rows"); - }); + if constexpr (is_chunked_read) { + state.exec( + nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch&, auto& timer) { + try_drop_l3_cache(); + auto const output_limit_MB = + static_cast(state.get_int64("chunk_read_limit_MB")); + auto const read_limit_MB = static_cast(state.get_int64("pass_read_limit_MB")); + + auto reader = + cudf::io::chunked_orc_reader(output_limit_MB * Mbytes, read_limit_MB * Mbytes, read_opts); + cudf::size_type num_rows{0}; + + timer.start(); + do { + auto chunk = reader.read_chunk(); + num_rows += chunk.tbl->num_rows(); + } while (reader.has_next()); + timer.stop(); + + CUDF_EXPECTS(num_rows == num_rows_to_read, "Unexpected number of rows"); + }); + } else { // not is_chunked_read + state.exec( + nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch&, auto& timer) { + try_drop_l3_cache(); + + timer.start(); + auto const result = cudf::io::read_orc(read_opts); + timer.stop(); + + CUDF_EXPECTS(result.tbl->num_columns() == num_cols, "Unexpected number of columns"); + CUDF_EXPECTS(result.tbl->num_rows() == num_rows_to_read, "Unexpected number of rows"); + }); + } auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); @@ -57,6 +85,8 @@ void orc_read_common(cudf::size_type num_rows_to_read, state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); } +} // namespace + template void BM_orc_read_data(nvbench::state& state, nvbench::type_list, nvbench::enum_type>) @@ -79,13 +109,11 @@ void BM_orc_read_data(nvbench::state& state, return view.num_rows(); }(); - orc_read_common(num_rows_written, source_sink, state); + orc_read_common(num_rows_written, source_sink, state); } -template -void BM_orc_read_io_compression( - nvbench::state& state, - nvbench::type_list, nvbench::enum_type>) +template +void orc_read_io_compression(nvbench::state& state) { auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL_SIGNED), static_cast(data_type::FLOAT), @@ -95,15 +123,21 @@ void BM_orc_read_io_compression( static_cast(data_type::LIST), static_cast(data_type::STRUCT)}); - cudf::size_type const cardinality = state.get_int64("cardinality"); - cudf::size_type const run_length = state.get_int64("run_length"); + auto const [cardinality, run_length] = [&]() -> std::pair { + if constexpr (chunked_read) { + return {0, 4}; + } else { + return {static_cast(state.get_int64("cardinality")), + static_cast(state.get_int64("run_length"))}; + } + }(); cuio_source_sink_pair source_sink(IOType); auto const num_rows_written = [&]() { auto const tbl = create_random_table( cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, - data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + data_profile_builder{}.cardinality(cardinality).avg_run_length(run_length)); auto const view = tbl->view(); cudf::io::orc_writer_options opts = @@ -113,7 +147,23 @@ void BM_orc_read_io_compression( return view.num_rows(); }(); - orc_read_common(num_rows_written, source_sink, state); + orc_read_common(num_rows_written, source_sink, state); +} + +template +void BM_orc_read_io_compression( + nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) +{ + return orc_read_io_compression(state); +} + +template +void BM_orc_chunked_read_io_compression(nvbench::state& state, + nvbench::type_list>) +{ + // Only run benchmark using HOST_BUFFER IO. + return orc_read_io_compression(state); } using d_type_list = nvbench::enum_type_list +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include + +#include + +// TODO: remove this once pinned/pooled is enabled by default in cuIO +void set_cuio_host_pinned_pool() +{ + using host_pooled_mr = rmm::mr::pool_memory_resource; + static std::shared_ptr mr = std::make_shared( + std::make_shared().get(), 256ul * 1024 * 1024); + cudf::io::set_host_memory_resource(*mr); +} + +size_t get_num_reads(nvbench::state const& state) { return state.get_int64("num_threads"); } + +size_t get_read_size(nvbench::state const& state) +{ + auto const num_reads = get_num_reads(state); + return state.get_int64("total_data_size") / num_reads; +} + +std::string get_label(std::string const& test_name, nvbench::state const& state) +{ + auto const num_cols = state.get_int64("num_cols"); + size_t const read_size_mb = get_read_size(state) / (1024 * 1024); + return {test_name + ", " + std::to_string(num_cols) + " columns, " + + std::to_string(state.get_int64("num_threads")) + " threads " + " (" + + std::to_string(read_size_mb) + " MB each)"}; +} + +std::tuple, size_t, size_t> write_file_data( + nvbench::state& state, std::vector const& d_types) +{ + cudf::size_type const cardinality = state.get_int64("cardinality"); + cudf::size_type const run_length = state.get_int64("run_length"); + cudf::size_type const num_cols = state.get_int64("num_cols"); + size_t const num_files = get_num_reads(state); + size_t const per_file_data_size = get_read_size(state); + + std::vector source_sink_vector; + + size_t total_file_size = 0; + + for (size_t i = 0; i < num_files; ++i) { + cuio_source_sink_pair source_sink{cudf::io::io_type::HOST_BUFFER}; + + auto const tbl = create_random_table( + cycle_dtypes(d_types, num_cols), + table_size_bytes{per_file_data_size}, + data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + auto const view = tbl->view(); + + cudf::io::parquet_writer_options write_opts = + cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) + .compression(cudf::io::compression_type::SNAPPY) + .max_page_size_rows(50000) + .max_page_size_bytes(1024 * 1024); + + cudf::io::write_parquet(write_opts); + total_file_size += source_sink.size(); + + source_sink_vector.push_back(std::move(source_sink)); + } + + return {std::move(source_sink_vector), total_file_size, num_files}; +} + +void BM_parquet_multithreaded_read_common(nvbench::state& state, + std::vector const& d_types, + std::string const& label) +{ + size_t const data_size = state.get_int64("total_data_size"); + auto const num_threads = state.get_int64("num_threads"); + + set_cuio_host_pinned_pool(); + + auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads); + cudf::detail::thread_pool threads(num_threads); + + auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types); + + auto mem_stats_logger = cudf::memory_stats_logger(); + + nvtxRangePushA(("(read) " + label).c_str()); + state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, + [&](nvbench::launch& launch, auto& timer) { + auto read_func = [&](int index) { + auto const stream = streams[index % num_threads]; + auto& source_sink = source_sink_vector[index]; + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(source_sink.make_source_info()); + cudf::io::read_parquet(read_opts, stream, rmm::mr::get_current_device_resource()); + }; + + threads.paused = true; + for (size_t i = 0; i < num_files; ++i) { + threads.submit(read_func, i); + } + timer.start(); + threads.paused = false; + threads.wait_for_tasks(); + cudf::detail::join_streams(streams, cudf::get_default_stream()); + timer.stop(); + }); + nvtxRangePop(); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(total_file_size, "encoded_file_size", "encoded_file_size"); +} + +void BM_parquet_multithreaded_read_mixed(nvbench::state& state) +{ + auto label = get_label("mixed", state); + nvtxRangePushA(label.c_str()); + BM_parquet_multithreaded_read_common( + state, {cudf::type_id::INT32, cudf::type_id::DECIMAL64, cudf::type_id::STRING}, label); + nvtxRangePop(); +} + +void BM_parquet_multithreaded_read_fixed_width(nvbench::state& state) +{ + auto label = get_label("fixed width", state); + nvtxRangePushA(label.c_str()); + BM_parquet_multithreaded_read_common(state, {cudf::type_id::INT32}, label); + nvtxRangePop(); +} + +void BM_parquet_multithreaded_read_string(nvbench::state& state) +{ + auto label = get_label("string", state); + nvtxRangePushA(label.c_str()); + BM_parquet_multithreaded_read_common(state, {cudf::type_id::STRING}, label); + nvtxRangePop(); +} + +void BM_parquet_multithreaded_read_list(nvbench::state& state) +{ + auto label = get_label("list", state); + nvtxRangePushA(label.c_str()); + BM_parquet_multithreaded_read_common(state, {cudf::type_id::LIST}, label); + nvtxRangePop(); +} + +void BM_parquet_multithreaded_read_chunked_common(nvbench::state& state, + std::vector const& d_types, + std::string const& label) +{ + size_t const data_size = state.get_int64("total_data_size"); + auto const num_threads = state.get_int64("num_threads"); + size_t const input_limit = state.get_int64("input_limit"); + size_t const output_limit = state.get_int64("output_limit"); + + set_cuio_host_pinned_pool(); + + auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads); + cudf::detail::thread_pool threads(num_threads); + auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types); + + auto mem_stats_logger = cudf::memory_stats_logger(); + + nvtxRangePushA(("(read) " + label).c_str()); + std::vector chunks; + state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, + [&](nvbench::launch& launch, auto& timer) { + auto read_func = [&](int index) { + auto const stream = streams[index % num_threads]; + auto& source_sink = source_sink_vector[index]; + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(source_sink.make_source_info()); + // divide chunk limits by number of threads so the number of chunks produced is the + // same for all cases. this seems better than the alternative, which is to keep the + // limits the same. if we do that, as the number of threads goes up, the number of + // chunks goes down - so are actually benchmarking the same thing in that case? + auto reader = cudf::io::chunked_parquet_reader( + output_limit / num_threads, input_limit / num_threads, read_opts, stream); + + // read all the chunks + do { + auto table = reader.read_chunk(); + } while (reader.has_next()); + }; + + threads.paused = true; + for (size_t i = 0; i < num_files; ++i) { + threads.submit(read_func, i); + } + timer.start(); + threads.paused = false; + threads.wait_for_tasks(); + cudf::detail::join_streams(streams, cudf::get_default_stream()); + timer.stop(); + }); + nvtxRangePop(); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(total_file_size, "encoded_file_size", "encoded_file_size"); +} + +void BM_parquet_multithreaded_read_chunked_mixed(nvbench::state& state) +{ + auto label = get_label("mixed", state); + nvtxRangePushA(label.c_str()); + BM_parquet_multithreaded_read_chunked_common( + state, {cudf::type_id::INT32, cudf::type_id::DECIMAL64, cudf::type_id::STRING}, label); + nvtxRangePop(); +} + +void BM_parquet_multithreaded_read_chunked_fixed_width(nvbench::state& state) +{ + auto label = get_label("mixed", state); + nvtxRangePushA(label.c_str()); + BM_parquet_multithreaded_read_chunked_common(state, {cudf::type_id::INT32}, label); + nvtxRangePop(); +} + +void BM_parquet_multithreaded_read_chunked_string(nvbench::state& state) +{ + auto label = get_label("string", state); + nvtxRangePushA(label.c_str()); + BM_parquet_multithreaded_read_chunked_common(state, {cudf::type_id::STRING}, label); + nvtxRangePop(); +} + +void BM_parquet_multithreaded_read_chunked_list(nvbench::state& state) +{ + auto label = get_label("list", state); + nvtxRangePushA(label.c_str()); + BM_parquet_multithreaded_read_chunked_common(state, {cudf::type_id::LIST}, label); + nvtxRangePop(); +} + +// mixed data types: fixed width and strings +NVBENCH_BENCH(BM_parquet_multithreaded_read_mixed) + .set_name("parquet_multithreaded_read_decode_mixed") + .set_min_samples(4) + .add_int64_axis("cardinality", {1000}) + .add_int64_axis("total_data_size", {512 * 1024 * 1024, 1024 * 1024 * 1024}) + .add_int64_axis("num_threads", {1, 2, 4, 8}) + .add_int64_axis("num_cols", {4}) + .add_int64_axis("run_length", {8}); + +NVBENCH_BENCH(BM_parquet_multithreaded_read_fixed_width) + .set_name("parquet_multithreaded_read_decode_fixed_width") + .set_min_samples(4) + .add_int64_axis("cardinality", {1000}) + .add_int64_axis("total_data_size", {512 * 1024 * 1024, 1024 * 1024 * 1024}) + .add_int64_axis("num_threads", {1, 2, 4, 8}) + .add_int64_axis("num_cols", {4}) + .add_int64_axis("run_length", {8}); + +NVBENCH_BENCH(BM_parquet_multithreaded_read_string) + .set_name("parquet_multithreaded_read_decode_string") + .set_min_samples(4) + .add_int64_axis("cardinality", {1000}) + .add_int64_axis("total_data_size", {512 * 1024 * 1024, 1024 * 1024 * 1024}) + .add_int64_axis("num_threads", {1, 2, 4, 8}) + .add_int64_axis("num_cols", {4}) + .add_int64_axis("run_length", {8}); + +NVBENCH_BENCH(BM_parquet_multithreaded_read_list) + .set_name("parquet_multithreaded_read_decode_list") + .set_min_samples(4) + .add_int64_axis("cardinality", {1000}) + .add_int64_axis("total_data_size", {512 * 1024 * 1024, 1024 * 1024 * 1024}) + .add_int64_axis("num_threads", {1, 2, 4, 8}) + .add_int64_axis("num_cols", {4}) + .add_int64_axis("run_length", {8}); + +// mixed data types: fixed width, strings +NVBENCH_BENCH(BM_parquet_multithreaded_read_chunked_mixed) + .set_name("parquet_multithreaded_read_decode_chunked_mixed") + .set_min_samples(4) + .add_int64_axis("cardinality", {1000}) + .add_int64_axis("total_data_size", {512 * 1024 * 1024, 1024 * 1024 * 1024}) + .add_int64_axis("num_threads", {1, 2, 4, 8}) + .add_int64_axis("num_cols", {4}) + .add_int64_axis("run_length", {8}) + .add_int64_axis("input_limit", {640 * 1024 * 1024}) + .add_int64_axis("output_limit", {640 * 1024 * 1024}); + +NVBENCH_BENCH(BM_parquet_multithreaded_read_chunked_fixed_width) + .set_name("parquet_multithreaded_read_decode_chunked_fixed_width") + .set_min_samples(4) + .add_int64_axis("cardinality", {1000}) + .add_int64_axis("total_data_size", {512 * 1024 * 1024, 1024 * 1024 * 1024}) + .add_int64_axis("num_threads", {1, 2, 4, 8}) + .add_int64_axis("num_cols", {4}) + .add_int64_axis("run_length", {8}) + .add_int64_axis("input_limit", {640 * 1024 * 1024}) + .add_int64_axis("output_limit", {640 * 1024 * 1024}); + +NVBENCH_BENCH(BM_parquet_multithreaded_read_chunked_string) + .set_name("parquet_multithreaded_read_decode_chunked_string") + .set_min_samples(4) + .add_int64_axis("cardinality", {1000}) + .add_int64_axis("total_data_size", {512 * 1024 * 1024, 1024 * 1024 * 1024}) + .add_int64_axis("num_threads", {1, 2, 4, 8}) + .add_int64_axis("num_cols", {4}) + .add_int64_axis("run_length", {8}) + .add_int64_axis("input_limit", {640 * 1024 * 1024}) + .add_int64_axis("output_limit", {640 * 1024 * 1024}); + +NVBENCH_BENCH(BM_parquet_multithreaded_read_chunked_list) + .set_name("parquet_multithreaded_read_decode_chunked_list") + .set_min_samples(4) + .add_int64_axis("cardinality", {1000}) + .add_int64_axis("total_data_size", {512 * 1024 * 1024, 1024 * 1024 * 1024}) + .add_int64_axis("num_threads", {1, 2, 4, 8}) + .add_int64_axis("num_cols", {4}) + .add_int64_axis("run_length", {8}) + .add_int64_axis("input_limit", {640 * 1024 * 1024}) + .add_int64_axis("output_limit", {640 * 1024 * 1024}); diff --git a/cpp/benchmarks/join/conditional_join.cu b/cpp/benchmarks/join/conditional_join.cu index d721de0e8fd..d95fc0a5b59 100644 --- a/cpp/benchmarks/join/conditional_join.cu +++ b/cpp/benchmarks/join/conditional_join.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,126 +16,102 @@ #include -template +template class ConditionalJoin : public cudf::benchmark {}; // For compatibility with the shared logic for equality (hash) joins, all of // the join lambdas defined by these macros accept a null_equality parameter // but ignore it (don't forward it to the underlying join implementation) // because conditional joins do not use this parameter. -#define CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ - BENCHMARK_TEMPLATE_DEFINE_F(ConditionalJoin, name, key_type, payload_type) \ - (::benchmark::State & st) \ - { \ - auto join = [](cudf::table_view const& left, \ - cudf::table_view const& right, \ - cudf::ast::operation binary_pred, \ - cudf::null_equality compare_nulls) { \ - return cudf::conditional_inner_join(left, right, binary_pred); \ - }; \ - BM_join(st, join); \ +#define CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(name, Key, Nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(ConditionalJoin, name, Key) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + cudf::ast::operation binary_pred, \ + cudf::null_equality compare_nulls) { \ + return cudf::conditional_inner_join(left, right, binary_pred); \ + }; \ + BM_join(st, join); \ } -CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(conditional_inner_join_32bit, int32_t, int32_t, false); -CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(conditional_inner_join_64bit, int64_t, int64_t, false); -CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(conditional_inner_join_32bit_nulls, int32_t, int32_t, true); -CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(conditional_inner_join_64bit_nulls, int64_t, int64_t, true); - -#define CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ - BENCHMARK_TEMPLATE_DEFINE_F(ConditionalJoin, name, key_type, payload_type) \ - (::benchmark::State & st) \ - { \ - auto join = [](cudf::table_view const& left, \ - cudf::table_view const& right, \ - cudf::ast::operation binary_pred, \ - cudf::null_equality compare_nulls) { \ - return cudf::conditional_left_join(left, right, binary_pred); \ - }; \ - BM_join(st, join); \ +CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(conditional_inner_join_32bit, int32_t, false); +CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(conditional_inner_join_64bit, int64_t, false); +CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(conditional_inner_join_32bit_nulls, int32_t, true); +CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(conditional_inner_join_64bit_nulls, int64_t, true); + +#define CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(name, Key, Nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(ConditionalJoin, name, Key) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + cudf::ast::operation binary_pred, \ + cudf::null_equality compare_nulls) { \ + return cudf::conditional_left_join(left, right, binary_pred); \ + }; \ + BM_join(st, join); \ } -CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(conditional_left_join_32bit, int32_t, int32_t, false); -CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(conditional_left_join_64bit, int64_t, int64_t, false); -CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(conditional_left_join_32bit_nulls, int32_t, int32_t, true); -CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(conditional_left_join_64bit_nulls, int64_t, int64_t, true); - -#define CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ - BENCHMARK_TEMPLATE_DEFINE_F(ConditionalJoin, name, key_type, payload_type) \ - (::benchmark::State & st) \ - { \ - auto join = [](cudf::table_view const& left, \ - cudf::table_view const& right, \ - cudf::ast::operation binary_pred, \ - cudf::null_equality compare_nulls) { \ - return cudf::conditional_full_join(left, right, binary_pred); \ - }; \ - BM_join(st, join); \ +CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(conditional_left_join_32bit, int32_t, false); +CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(conditional_left_join_64bit, int64_t, false); +CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(conditional_left_join_32bit_nulls, int32_t, true); +CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(conditional_left_join_64bit_nulls, int64_t, true); + +#define CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE(name, Key, Nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(ConditionalJoin, name, Key) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + cudf::ast::operation binary_pred, \ + cudf::null_equality compare_nulls) { \ + return cudf::conditional_full_join(left, right, binary_pred); \ + }; \ + BM_join(st, join); \ } -CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE(conditional_full_join_32bit, int32_t, int32_t, false); -CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE(conditional_full_join_64bit, int64_t, int64_t, false); -CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE(conditional_full_join_32bit_nulls, int32_t, int32_t, true); -CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE(conditional_full_join_64bit_nulls, int64_t, int64_t, true); - -#define CONDITIONAL_LEFT_ANTI_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ - BENCHMARK_TEMPLATE_DEFINE_F(ConditionalJoin, name, key_type, payload_type) \ - (::benchmark::State & st) \ - { \ - auto join = [](cudf::table_view const& left, \ - cudf::table_view const& right, \ - cudf::ast::operation binary_pred, \ - cudf::null_equality compare_nulls) { \ - return cudf::conditional_left_anti_join(left, right, binary_pred); \ - }; \ - BM_join(st, join); \ +CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE(conditional_full_join_32bit, int32_t, false); +CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE(conditional_full_join_64bit, int64_t, false); +CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE(conditional_full_join_32bit_nulls, int32_t, true); +CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE(conditional_full_join_64bit_nulls, int64_t, true); + +#define CONDITIONAL_LEFT_ANTI_JOIN_BENCHMARK_DEFINE(name, Key, Nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(ConditionalJoin, name, Key) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + cudf::ast::operation binary_pred, \ + cudf::null_equality compare_nulls) { \ + return cudf::conditional_left_anti_join(left, right, binary_pred); \ + }; \ + BM_join(st, join); \ } -CONDITIONAL_LEFT_ANTI_JOIN_BENCHMARK_DEFINE(conditional_left_anti_join_32bit, - int32_t, - int32_t, - false); -CONDITIONAL_LEFT_ANTI_JOIN_BENCHMARK_DEFINE(conditional_left_anti_join_64bit, - int64_t, - int64_t, - false); -CONDITIONAL_LEFT_ANTI_JOIN_BENCHMARK_DEFINE(conditional_left_anti_join_32bit_nulls, - int32_t, - int32_t, - true); -CONDITIONAL_LEFT_ANTI_JOIN_BENCHMARK_DEFINE(conditional_left_anti_join_64bit_nulls, - int64_t, - int64_t, - true); - -#define CONDITIONAL_LEFT_SEMI_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ - BENCHMARK_TEMPLATE_DEFINE_F(ConditionalJoin, name, key_type, payload_type) \ - (::benchmark::State & st) \ - { \ - auto join = [](cudf::table_view const& left, \ - cudf::table_view const& right, \ - cudf::ast::operation binary_pred, \ - cudf::null_equality compare_nulls) { \ - return cudf::conditional_left_semi_join(left, right, binary_pred); \ - }; \ - BM_join(st, join); \ +CONDITIONAL_LEFT_ANTI_JOIN_BENCHMARK_DEFINE(conditional_left_anti_join_32bit, int32_t, false); +CONDITIONAL_LEFT_ANTI_JOIN_BENCHMARK_DEFINE(conditional_left_anti_join_64bit, int64_t, false); +CONDITIONAL_LEFT_ANTI_JOIN_BENCHMARK_DEFINE(conditional_left_anti_join_32bit_nulls, int32_t, true); +CONDITIONAL_LEFT_ANTI_JOIN_BENCHMARK_DEFINE(conditional_left_anti_join_64bit_nulls, int64_t, true); + +#define CONDITIONAL_LEFT_SEMI_JOIN_BENCHMARK_DEFINE(name, Key, Nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(ConditionalJoin, name, Key) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + cudf::ast::operation binary_pred, \ + cudf::null_equality compare_nulls) { \ + return cudf::conditional_left_semi_join(left, right, binary_pred); \ + }; \ + BM_join(st, join); \ } -CONDITIONAL_LEFT_SEMI_JOIN_BENCHMARK_DEFINE(conditional_left_semi_join_32bit, - int32_t, - int32_t, - false); -CONDITIONAL_LEFT_SEMI_JOIN_BENCHMARK_DEFINE(conditional_left_semi_join_64bit, - int64_t, - int64_t, - false); -CONDITIONAL_LEFT_SEMI_JOIN_BENCHMARK_DEFINE(conditional_left_semi_join_32bit_nulls, - int32_t, - int32_t, - true); -CONDITIONAL_LEFT_SEMI_JOIN_BENCHMARK_DEFINE(conditional_left_semi_join_64bit_nulls, - int64_t, - int64_t, - true); +CONDITIONAL_LEFT_SEMI_JOIN_BENCHMARK_DEFINE(conditional_left_semi_join_32bit, int32_t, false); +CONDITIONAL_LEFT_SEMI_JOIN_BENCHMARK_DEFINE(conditional_left_semi_join_64bit, int64_t, false); +CONDITIONAL_LEFT_SEMI_JOIN_BENCHMARK_DEFINE(conditional_left_semi_join_32bit_nulls, int32_t, true); +CONDITIONAL_LEFT_SEMI_JOIN_BENCHMARK_DEFINE(conditional_left_semi_join_64bit_nulls, int64_t, true); // inner join ----------------------------------------------------------------------- BENCHMARK_REGISTER_F(ConditionalJoin, conditional_inner_join_32bit) diff --git a/cpp/benchmarks/join/distinct_join.cu b/cpp/benchmarks/join/distinct_join.cu index 4a68ee3878e..af8fa1f9d94 100644 --- a/cpp/benchmarks/join/distinct_join.cu +++ b/cpp/benchmarks/join/distinct_join.cu @@ -16,12 +16,10 @@ #include "join_common.hpp" -template +template void distinct_inner_join(nvbench::state& state, - nvbench::type_list>) + nvbench::type_list>) { - skip_helper(state); - auto join = [](cudf::table_view const& build_input, cudf::table_view const& probe_input, cudf::null_equality compare_nulls, @@ -35,15 +33,13 @@ void distinct_inner_join(nvbench::state& state, return hj_obj.inner_join(stream); }; - BM_join(state, join); + BM_join(state, join); } -template +template void distinct_left_join(nvbench::state& state, - nvbench::type_list>) + nvbench::type_list>) { - skip_helper(state); - auto join = [](cudf::table_view const& build_input, cudf::table_view const& probe_input, cudf::null_equality compare_nulls, @@ -57,65 +53,18 @@ void distinct_left_join(nvbench::state& state, return hj_obj.left_join(stream); }; - BM_join(state, join); + BM_join(state, join); } -// inner join ----------------------------------------------------------------------- NVBENCH_BENCH_TYPES(distinct_inner_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("distinct_inner_join_32bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); - -NVBENCH_BENCH_TYPES(distinct_inner_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("distinct_inner_join_64bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); - -NVBENCH_BENCH_TYPES(distinct_inner_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("distinct_inner_join_32bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); - -NVBENCH_BENCH_TYPES(distinct_inner_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("distinct_inner_join_64bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); - -// left join ------------------------------------------------------------------------ -NVBENCH_BENCH_TYPES(distinct_left_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("distinct_left_join_32bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE)) + .set_name("distinct_inner_join") + .set_type_axes_names({"Key", "Nullable"}) + .add_int64_axis("left_size", JOIN_SIZE_RANGE) + .add_int64_axis("right_size", JOIN_SIZE_RANGE); -NVBENCH_BENCH_TYPES(distinct_left_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("distinct_left_join_32bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); +NVBENCH_BENCH_TYPES(distinct_left_join, NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE)) + .set_name("distinct_left_join") + .set_type_axes_names({"Key", "Nullable"}) + .add_int64_axis("left_size", JOIN_SIZE_RANGE) + .add_int64_axis("right_size", JOIN_SIZE_RANGE); diff --git a/cpp/benchmarks/join/generate_input_tables.cuh b/cpp/benchmarks/join/generate_input_tables.cuh index 93401f01026..f7984b29d6b 100644 --- a/cpp/benchmarks/join/generate_input_tables.cuh +++ b/cpp/benchmarks/join/generate_input_tables.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -34,7 +35,7 @@ CUDF_KERNEL void init_curand(curandState* state, int const nstates) { - int ithread = threadIdx.x + blockIdx.x * blockDim.x; + int ithread = cudf::detail::grid_1d::global_thread_id(); if (ithread < nstates) { curand_init(1234ULL, ithread, 0, state + ithread); } } @@ -46,13 +47,14 @@ CUDF_KERNEL void init_build_tbl(key_type* const build_tbl, curandState* state, int const num_states) { - auto const start_idx = blockIdx.x * blockDim.x + threadIdx.x; - auto const stride = blockDim.x * gridDim.x; + auto const start_idx = cudf::detail::grid_1d::global_thread_id(); + auto const stride = cudf::detail::grid_1d::grid_stride(); assert(start_idx < num_states); curandState localState = state[start_idx]; - for (size_type idx = start_idx; idx < build_tbl_size; idx += stride) { + for (cudf::thread_index_type tidx = start_idx; tidx < build_tbl_size; tidx += stride) { + auto const idx = static_cast(tidx); double const x = curand_uniform_double(&localState); build_tbl[idx] = static_cast(x * (build_tbl_size / multiplicity)); @@ -71,13 +73,14 @@ CUDF_KERNEL void init_probe_tbl(key_type* const probe_tbl, curandState* state, int const num_states) { - auto const start_idx = blockIdx.x * blockDim.x + threadIdx.x; - auto const stride = blockDim.x * gridDim.x; + auto const start_idx = cudf::detail::grid_1d::global_thread_id(); + auto const stride = cudf::detail::grid_1d::grid_stride(); assert(start_idx < num_states); curandState localState = state[start_idx]; - for (size_type idx = start_idx; idx < probe_tbl_size; idx += stride) { + for (cudf::thread_index_type tidx = start_idx; tidx < probe_tbl_size; tidx += stride) { + auto const idx = static_cast(tidx); key_type val; double x = curand_uniform_double(&localState); diff --git a/cpp/benchmarks/join/join.cu b/cpp/benchmarks/join/join.cu index 1c02a4488ac..c4a39da4662 100644 --- a/cpp/benchmarks/join/join.cu +++ b/cpp/benchmarks/join/join.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,12 +16,10 @@ #include -template +template void nvbench_inner_join(nvbench::state& state, - nvbench::type_list>) + nvbench::type_list>) { - skip_helper(state); - auto join = [](cudf::table_view const& left_input, cudf::table_view const& right_input, cudf::null_equality compare_nulls, @@ -33,15 +31,12 @@ void nvbench_inner_join(nvbench::state& state, return hj_obj.inner_join(right_input, std::nullopt, stream); }; - BM_join(state, join); + BM_join(state, join); } -template -void nvbench_left_join(nvbench::state& state, - nvbench::type_list>) +template +void nvbench_left_join(nvbench::state& state, nvbench::type_list>) { - skip_helper(state); - auto join = [](cudf::table_view const& left_input, cudf::table_view const& right_input, cudf::null_equality compare_nulls, @@ -53,15 +48,12 @@ void nvbench_left_join(nvbench::state& state, return hj_obj.left_join(right_input, std::nullopt, stream); }; - BM_join(state, join); + BM_join(state, join); } -template -void nvbench_full_join(nvbench::state& state, - nvbench::type_list>) +template +void nvbench_full_join(nvbench::state& state, nvbench::type_list>) { - skip_helper(state); - auto join = [](cudf::table_view const& left_input, cudf::table_view const& right_input, cudf::null_equality compare_nulls, @@ -73,122 +65,23 @@ void nvbench_full_join(nvbench::state& state, return hj_obj.full_join(right_input, std::nullopt, stream); }; - BM_join(state, join); + BM_join(state, join); } -// inner join ----------------------------------------------------------------------- -NVBENCH_BENCH_TYPES(nvbench_inner_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("inner_join_32bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_inner_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("inner_join_64bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_inner_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("inner_join_32bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_inner_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("inner_join_64bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); - -// left join ------------------------------------------------------------------------ -NVBENCH_BENCH_TYPES(nvbench_left_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("left_join_32bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_left_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("left_join_64bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_left_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("left_join_32bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_left_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("left_join_64bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); - -// full join ------------------------------------------------------------------------ -NVBENCH_BENCH_TYPES(nvbench_full_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("full_join_32bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_full_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("full_join_64bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_full_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("full_join_32bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_full_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("full_join_64bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); +NVBENCH_BENCH_TYPES(nvbench_inner_join, NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE)) + .set_name("inner_join") + .set_type_axes_names({"Key", "Nullable"}) + .add_int64_axis("left_size", JOIN_SIZE_RANGE) + .add_int64_axis("right_size", JOIN_SIZE_RANGE); + +NVBENCH_BENCH_TYPES(nvbench_left_join, NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE)) + .set_name("left_join") + .set_type_axes_names({"Key", "Nullable"}) + .add_int64_axis("left_size", JOIN_SIZE_RANGE) + .add_int64_axis("right_size", JOIN_SIZE_RANGE); + +NVBENCH_BENCH_TYPES(nvbench_full_join, NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE)) + .set_name("full_join") + .set_type_axes_names({"Key", "Nullable"}) + .add_int64_axis("left_size", JOIN_SIZE_RANGE) + .add_int64_axis("right_size", JOIN_SIZE_RANGE); diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp index 9f869ddb1ac..9e23d28b363 100644 --- a/cpp/benchmarks/join/join_common.hpp +++ b/cpp/benchmarks/join/join_common.hpp @@ -41,6 +41,11 @@ #include +using JOIN_KEY_TYPE_RANGE = nvbench::type_list; +using JOIN_NULLABLE_RANGE = nvbench::enum_type_list; + +auto const JOIN_SIZE_RANGE = std::vector{1000, 100'000, 10'000'000}; + struct null75_generator { thrust::minstd_rand engine; thrust::uniform_int_distribution rand_gen; @@ -55,52 +60,42 @@ struct null75_generator { enum class join_t { CONDITIONAL, MIXED, HASH }; -inline void skip_helper(nvbench::state& state) -{ - auto const build_table_size = state.get_int64("Build Table Size"); - auto const probe_table_size = state.get_int64("Probe Table Size"); - - if (build_table_size > probe_table_size) { - state.skip("Large build tables are skipped."); - return; - } - - if (build_table_size * 100 <= probe_table_size) { - state.skip("Large probe tables are skipped."); - return; - } -} - -template void BM_join(state_type& state, Join JoinFunc) { - auto const build_table_size = [&]() { + auto const right_size = [&]() { if constexpr (std::is_same_v) { return static_cast(state.range(0)); } if constexpr (std::is_same_v) { - return static_cast(state.get_int64("Build Table Size")); + return static_cast(state.get_int64("right_size")); } }(); - auto const probe_table_size = [&]() { + auto const left_size = [&]() { if constexpr (std::is_same_v) { return static_cast(state.range(1)); } if constexpr (std::is_same_v) { - return static_cast(state.get_int64("Probe Table Size")); + return static_cast(state.get_int64("left_size")); } }(); + if constexpr (std::is_same_v) { + if (right_size > left_size) { + state.skip("Skip large right table"); + return; + } + } + double const selectivity = 0.3; int const multiplicity = 1; // Generate build and probe tables - auto build_random_null_mask = [](int size) { + auto right_random_null_mask = [](int size) { // roughly 75% nulls auto validity = thrust::make_transform_iterator(thrust::make_counting_iterator(0), null75_generator{}); @@ -111,62 +106,62 @@ void BM_join(state_type& state, Join JoinFunc) rmm::mr::get_current_device_resource()); }; - std::unique_ptr build_key_column0 = [&]() { - auto [null_mask, null_count] = build_random_null_mask(build_table_size); - return Nullable ? cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), - build_table_size, - std::move(null_mask), - null_count) - : cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), - build_table_size); + std::unique_ptr right_key_column0 = [&]() { + auto [null_mask, null_count] = right_random_null_mask(right_size); + return Nullable + ? cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), + right_size, + std::move(null_mask), + null_count) + : cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), right_size); }(); - std::unique_ptr probe_key_column0 = [&]() { - auto [null_mask, null_count] = build_random_null_mask(probe_table_size); - return Nullable ? cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), - probe_table_size, - std::move(null_mask), - null_count) - : cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), - probe_table_size); + std::unique_ptr left_key_column0 = [&]() { + auto [null_mask, null_count] = right_random_null_mask(left_size); + return Nullable + ? cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), + left_size, + std::move(null_mask), + null_count) + : cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), left_size); }(); - generate_input_tables( - build_key_column0->mutable_view().data(), - build_table_size, - probe_key_column0->mutable_view().data(), - probe_table_size, - selectivity, - multiplicity); + // build table is right table, probe table is left table + generate_input_tables(right_key_column0->mutable_view().data(), + right_size, + left_key_column0->mutable_view().data(), + left_size, + selectivity, + multiplicity); - // Copy build_key_column0 and probe_key_column0 into new columns. + // Copy right_key_column0 and left_key_column0 into new columns. // If Nullable, the new columns will be assigned new nullmasks. - auto const build_key_column1 = [&]() { - auto col = std::make_unique(build_key_column0->view()); + auto const right_key_column1 = [&]() { + auto col = std::make_unique(right_key_column0->view()); if (Nullable) { - auto [null_mask, null_count] = build_random_null_mask(build_table_size); + auto [null_mask, null_count] = right_random_null_mask(right_size); col->set_null_mask(std::move(null_mask), null_count); } return col; }(); - auto const probe_key_column1 = [&]() { - auto col = std::make_unique(probe_key_column0->view()); + auto const left_key_column1 = [&]() { + auto col = std::make_unique(left_key_column0->view()); if (Nullable) { - auto [null_mask, null_count] = build_random_null_mask(probe_table_size); + auto [null_mask, null_count] = right_random_null_mask(left_size); col->set_null_mask(std::move(null_mask), null_count); } return col; }(); - auto init = cudf::make_fixed_width_scalar(static_cast(0)); - auto build_payload_column = cudf::sequence(build_table_size, *init); - auto probe_payload_column = cudf::sequence(probe_table_size, *init); + auto init = cudf::make_fixed_width_scalar(static_cast(0)); + auto right_payload_column = cudf::sequence(right_size, *init); + auto left_payload_column = cudf::sequence(left_size, *init); CUDF_CHECK_CUDA(0); - cudf::table_view build_table( - {build_key_column0->view(), build_key_column1->view(), *build_payload_column}); - cudf::table_view probe_table( - {probe_key_column0->view(), probe_key_column1->view(), *probe_payload_column}); + cudf::table_view right_table( + {right_key_column0->view(), right_key_column1->view(), *right_payload_column}); + cudf::table_view left_table( + {left_key_column0->view(), left_key_column1->view(), *left_payload_column}); // Setup join parameters and result table [[maybe_unused]] std::vector columns_to_join = {0}; @@ -177,8 +172,8 @@ void BM_join(state_type& state, Join JoinFunc) for (auto _ : state) { cuda_event_timer raii(state, true, cudf::get_default_stream()); - auto result = JoinFunc(probe_table.select(columns_to_join), - build_table.select(columns_to_join), + auto result = JoinFunc(left_table.select(columns_to_join), + right_table.select(columns_to_join), cudf::null_equality::UNEQUAL); } } @@ -191,10 +186,10 @@ void BM_join(state_type& state, Join JoinFunc) cudf::ast::operation(cudf::ast::ast_operator::EQUAL, col_ref_left_0, col_ref_right_0); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { rmm::cuda_stream_view stream_view{launch.get_stream()}; - auto result = JoinFunc(probe_table.select(columns_to_join), - build_table.select(columns_to_join), - probe_table.select({1}), - build_table.select({1}), + auto result = JoinFunc(left_table.select(columns_to_join), + right_table.select(columns_to_join), + left_table.select({1}), + right_table.select({1}), left_zero_eq_right_zero, cudf::null_equality::UNEQUAL, stream_view); @@ -203,8 +198,8 @@ void BM_join(state_type& state, Join JoinFunc) if constexpr (join_type == join_t::HASH) { state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { rmm::cuda_stream_view stream_view{launch.get_stream()}; - auto result = JoinFunc(probe_table.select(columns_to_join), - build_table.select(columns_to_join), + auto result = JoinFunc(left_table.select(columns_to_join), + right_table.select(columns_to_join), cudf::null_equality::UNEQUAL, stream_view); }); @@ -223,7 +218,7 @@ void BM_join(state_type& state, Join JoinFunc) cuda_event_timer raii(state, true, cudf::get_default_stream()); auto result = - JoinFunc(probe_table, build_table, left_zero_eq_right_zero, cudf::null_equality::UNEQUAL); + JoinFunc(left_table, right_table, left_zero_eq_right_zero, cudf::null_equality::UNEQUAL); } } } diff --git a/cpp/benchmarks/join/left_join.cu b/cpp/benchmarks/join/left_join.cu index 96bbd1bc58e..3e398e721fa 100644 --- a/cpp/benchmarks/join/left_join.cu +++ b/cpp/benchmarks/join/left_join.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,42 +16,42 @@ #include -template +template class Join : public cudf::benchmark {}; -#define LEFT_ANTI_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ - BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \ - (::benchmark::State & st) \ - { \ - auto join = [](cudf::table_view const& left, \ - cudf::table_view const& right, \ - cudf::null_equality compare_nulls) { \ - return cudf::left_anti_join(left, right, compare_nulls); \ - }; \ - BM_join(st, join); \ +#define LEFT_ANTI_JOIN_BENCHMARK_DEFINE(name, Key, Nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(Join, name, Key) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + cudf::null_equality compare_nulls) { \ + return cudf::left_anti_join(left, right, compare_nulls); \ + }; \ + BM_join(st, join); \ } -LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_32bit, int32_t, int32_t, false); -LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_64bit, int64_t, int64_t, false); -LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_32bit_nulls, int32_t, int32_t, true); -LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_64bit_nulls, int64_t, int64_t, true); +LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_32bit, int32_t, false); +LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_64bit, int64_t, false); +LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_32bit_nulls, int32_t, true); +LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_64bit_nulls, int64_t, true); -#define LEFT_SEMI_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ - BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \ - (::benchmark::State & st) \ - { \ - auto join = [](cudf::table_view const& left, \ - cudf::table_view const& right, \ - cudf::null_equality compare_nulls) { \ - return cudf::left_semi_join(left, right, compare_nulls); \ - }; \ - BM_join(st, join); \ +#define LEFT_SEMI_JOIN_BENCHMARK_DEFINE(name, Key, Nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(Join, name, Key) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + cudf::null_equality compare_nulls) { \ + return cudf::left_semi_join(left, right, compare_nulls); \ + }; \ + BM_join(st, join); \ } -LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_32bit, int32_t, int32_t, false); -LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_64bit, int64_t, int64_t, false); -LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_32bit_nulls, int32_t, int32_t, true); -LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_64bit_nulls, int64_t, int64_t, true); +LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_32bit, int32_t, false); +LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_64bit, int64_t, false); +LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_32bit_nulls, int32_t, true); +LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_64bit_nulls, int64_t, true); // left anti-join ------------------------------------------------------------- BENCHMARK_REGISTER_F(Join, left_anti_join_32bit) diff --git a/cpp/benchmarks/join/mixed_join.cu b/cpp/benchmarks/join/mixed_join.cu index 67be4640f84..129ea62e7a6 100644 --- a/cpp/benchmarks/join/mixed_join.cu +++ b/cpp/benchmarks/join/mixed_join.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,12 +16,10 @@ #include -template -void nvbench_mixed_inner_join( - nvbench::state& state, nvbench::type_list>) +template +void nvbench_mixed_inner_join(nvbench::state& state, + nvbench::type_list>) { - skip_helper(state); - auto join = [](cudf::table_view const& left_equality_input, cudf::table_view const& right_equality_input, cudf::table_view const& left_conditional_input, @@ -37,15 +35,13 @@ void nvbench_mixed_inner_join( compare_nulls); }; - BM_join(state, join); + BM_join(state, join); } -template -void nvbench_mixed_left_join( - nvbench::state& state, nvbench::type_list>) +template +void nvbench_mixed_left_join(nvbench::state& state, + nvbench::type_list>) { - skip_helper(state); - auto join = [](cudf::table_view const& left_equality_input, cudf::table_view const& right_equality_input, cudf::table_view const& left_conditional_input, @@ -61,15 +57,13 @@ void nvbench_mixed_left_join( compare_nulls); }; - BM_join(state, join); + BM_join(state, join); } -template -void nvbench_mixed_full_join( - nvbench::state& state, nvbench::type_list>) +template +void nvbench_mixed_full_join(nvbench::state& state, + nvbench::type_list>) { - skip_helper(state); - auto join = [](cudf::table_view const& left_equality_input, cudf::table_view const& right_equality_input, cudf::table_view const& left_conditional_input, @@ -85,15 +79,13 @@ void nvbench_mixed_full_join( compare_nulls); }; - BM_join(state, join); + BM_join(state, join); } -template -void nvbench_mixed_left_semi_join( - nvbench::state& state, nvbench::type_list>) +template +void nvbench_mixed_left_semi_join(nvbench::state& state, + nvbench::type_list>) { - skip_helper(state); - auto join = [](cudf::table_view const& left_equality_input, cudf::table_view const& right_equality_input, cudf::table_view const& left_conditional_input, @@ -109,15 +101,13 @@ void nvbench_mixed_left_semi_join( compare_nulls); }; - BM_join(state, join); + BM_join(state, join); } -template -void nvbench_mixed_left_anti_join( - nvbench::state& state, nvbench::type_list>) +template +void nvbench_mixed_left_anti_join(nvbench::state& state, + nvbench::type_list>) { - skip_helper(state); - auto join = [](cudf::table_view const& left_equality_input, cudf::table_view const& right_equality_input, cudf::table_view const& left_conditional_input, @@ -133,200 +123,40 @@ void nvbench_mixed_left_anti_join( compare_nulls); }; - BM_join(state, join); + BM_join(state, join); } -// inner join ----------------------------------------------------------------------- -NVBENCH_BENCH_TYPES(nvbench_mixed_inner_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_inner_join_32bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_mixed_inner_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_inner_join_64bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); - NVBENCH_BENCH_TYPES(nvbench_mixed_inner_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_inner_join_32bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE)) + .set_name("mixed_inner_join") + .set_type_axes_names({"Key", "Nullable"}) + .add_int64_axis("left_size", JOIN_SIZE_RANGE) + .add_int64_axis("right_size", JOIN_SIZE_RANGE); -NVBENCH_BENCH_TYPES(nvbench_mixed_inner_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_inner_join_64bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); - -// left join ------------------------------------------------------------------------ NVBENCH_BENCH_TYPES(nvbench_mixed_left_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_left_join_32bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE)) + .set_name("mixed_left_join") + .set_type_axes_names({"Key", "Nullable"}) + .add_int64_axis("left_size", JOIN_SIZE_RANGE) + .add_int64_axis("right_size", JOIN_SIZE_RANGE); -NVBENCH_BENCH_TYPES(nvbench_mixed_left_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_left_join_64bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_mixed_left_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_left_join_32bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_mixed_left_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_left_join_64bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); - -// full join ------------------------------------------------------------------------ NVBENCH_BENCH_TYPES(nvbench_mixed_full_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_full_join_32bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_mixed_full_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_full_join_64bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_mixed_full_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_full_join_32bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_mixed_full_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_full_join_64bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); - -// left semi join ------------------------------------------------------------------------ -NVBENCH_BENCH_TYPES(nvbench_mixed_left_semi_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_left_semi_join_32bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_mixed_left_semi_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_left_semi_join_64bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); + NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE)) + .set_name("mixed_full_join") + .set_type_axes_names({"Key", "Nullable"}) + .add_int64_axis("left_size", JOIN_SIZE_RANGE) + .add_int64_axis("right_size", JOIN_SIZE_RANGE); NVBENCH_BENCH_TYPES(nvbench_mixed_left_semi_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_left_semi_join_32bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_mixed_left_semi_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_left_semi_join_64bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); - -// left anti join ------------------------------------------------------------------------ -NVBENCH_BENCH_TYPES(nvbench_mixed_left_anti_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_left_anti_join_32bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_mixed_left_anti_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_left_anti_join_64bit") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); - -NVBENCH_BENCH_TYPES(nvbench_mixed_left_anti_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_left_anti_join_32bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) - .add_int64_axis("Probe Table Size", - {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE)) + .set_name("mixed_left_semi_join") + .set_type_axes_names({"Key", "Nullable"}) + .add_int64_axis("left_size", JOIN_SIZE_RANGE) + .add_int64_axis("right_size", JOIN_SIZE_RANGE); NVBENCH_BENCH_TYPES(nvbench_mixed_left_anti_join, - NVBENCH_TYPE_AXES(nvbench::type_list, - nvbench::type_list, - nvbench::enum_type_list)) - .set_name("mixed_left_anti_join_64bit_nulls") - .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) - .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) - .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); + NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE)) + .set_name("mixed_left_anti_join") + .set_type_axes_names({"Key", "Nullable"}) + .add_int64_axis("left_size", JOIN_SIZE_RANGE) + .add_int64_axis("right_size", JOIN_SIZE_RANGE); diff --git a/cpp/benchmarks/json/json.cu b/cpp/benchmarks/json/json.cu index a54d7d48dc4..06b793bf5f1 100644 --- a/cpp/benchmarks/json/json.cu +++ b/cpp/benchmarks/json/json.cu @@ -15,8 +15,6 @@ */ #include -#include -#include #include @@ -28,9 +26,7 @@ #include #include -#include - -class JsonPath : public cudf::benchmark {}; +#include std::vector const Books{ R"json({ @@ -77,10 +73,9 @@ struct json_benchmark_row_builder { cudf::column_device_view const d_book_pct; // Book percentage cudf::column_device_view const d_misc_order; // Misc-Store order cudf::column_device_view const d_store_order; // Books-Bicycles order - int32_t* d_offsets{}; + cudf::size_type* d_sizes{}; char* d_chars{}; - thrust::minstd_rand rng{5236}; - thrust::uniform_int_distribution dist{}; + cudf::detail::input_offsetalator d_offsets; // internal data structure for {bytes, out_ptr} with operator+= struct bytes_and_ptr { @@ -98,12 +93,10 @@ struct json_benchmark_row_builder { cudf::size_type num_items, bytes_and_ptr& output_str) { - using param_type = thrust::uniform_int_distribution::param_type; - dist.param(param_type{0, d_books_bicycles[this_idx].size() - 1}); cudf::string_view comma(",\n", 2); for (int i = 0; i < num_items; i++) { if (i > 0) { output_str += comma; } - int idx = dist(rng); + int idx = threadIdx.x % d_books_bicycles[this_idx].size(); auto item = d_books_bicycles[this_idx].element(idx); output_str += item; } @@ -155,7 +148,7 @@ struct json_benchmark_row_builder { output_str += Misc; } output_str += brace2; - if (!output_str.ptr) d_offsets[idx] = output_str.bytes; + if (!output_str.ptr) { d_sizes[idx] = output_str.bytes; } } }; @@ -182,41 +175,42 @@ auto build_json_string_column(int desired_bytes, int num_rows) return cudf::make_strings_column(num_rows, std::move(offsets), chars.release(), 0, {}); } -void BM_case(benchmark::State& state, std::string query_arg) +static std::string queries[] = {"$", + "$.store", + "$.store.book", + "$.store.*", + "$.store.book[*]", + "$.store.book[*].category", + "$.store['bicycle']", + "$.store.book[*]['isbn']", + "$.store.bicycle[1]"}; + +static void bench_query(nvbench::state& state) { srand(5236); - int num_rows = state.range(0); - int desired_bytes = state.range(1); + + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const desired_bytes = static_cast(state.get_int64("bytes")); + auto const query = state.get_int64("query"); + auto const json_path = queries[query]; + + auto const stream = cudf::get_default_stream(); auto input = build_json_string_column(desired_bytes, num_rows); cudf::strings_column_view scv(input->view()); - size_t num_chars = scv.chars_size(cudf::get_default_stream()); + size_t num_chars = scv.chars_size(stream); - std::string json_path(query_arg); - - for (auto _ : state) { - cuda_event_timer raii(state, true); - auto result = cudf::get_json_object(scv, json_path); - CUDF_CUDA_TRY(cudaStreamSynchronize(0)); - } + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + // This isn't strictly 100% accurate. a given query isn't necessarily + // going to visit every single incoming character but in spirit it does. + state.add_global_memory_reads(num_chars); - // this isn't strictly 100% accurate. a given query isn't necessarily - // going to visit every single incoming character. but in spirit it does. - state.SetBytesProcessed(state.iterations() * num_chars); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + [[maybe_unused]] auto result = cudf::get_json_object(scv, json_path); + }); } -#define JSON_BENCHMARK_DEFINE(name, query) \ - BENCHMARK_DEFINE_F(JsonPath, name)(::benchmark::State & state) { BM_case(state, query); } \ - BENCHMARK_REGISTER_F(JsonPath, name) \ - ->ArgsProduct({{100, 1000, 100000, 400000}, {300, 600, 4096}}) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -JSON_BENCHMARK_DEFINE(query0, "$"); -JSON_BENCHMARK_DEFINE(query1, "$.store"); -JSON_BENCHMARK_DEFINE(query2, "$.store.book"); -JSON_BENCHMARK_DEFINE(query3, "$.store.*"); -JSON_BENCHMARK_DEFINE(query4, "$.store.book[*]"); -JSON_BENCHMARK_DEFINE(query5, "$.store.book[*].category"); -JSON_BENCHMARK_DEFINE(query6, "$.store['bicycle']"); -JSON_BENCHMARK_DEFINE(query7, "$.store.book[*]['isbn']"); -JSON_BENCHMARK_DEFINE(query8, "$.store.bicycle[1]"); +NVBENCH_BENCH(bench_query) + .set_name("json_path") + .add_int64_axis("bytes", {300, 600, 4096}) + .add_int64_axis("num_rows", {100, 1000, 100000, 400000}) + .add_int64_axis("query", {0, 1, 2, 3, 4, 5, 6, 7, 8}); diff --git a/cpp/benchmarks/merge/merge_strings.cpp b/cpp/benchmarks/merge/merge_strings.cpp new file mode 100644 index 00000000000..3d0f1865490 --- /dev/null +++ b/cpp/benchmarks/merge/merge_strings.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include + +void nvbench_merge_strings(nvbench::state& state) +{ + auto stream = cudf::get_default_stream(); + + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + if (static_cast(2 * num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const table_profile = + data_profile_builder() + .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width) + .no_validity(); + auto const source_tables = create_random_table( + {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{num_rows}, table_profile); + + auto const sorted_lhs = cudf::sort(cudf::table_view({source_tables->view().column(0)})); + auto const sorted_rhs = cudf::sort(cudf::table_view({source_tables->view().column(1)})); + auto const lhs = sorted_lhs->view().column(0); + auto const rhs = sorted_rhs->view().column(0); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + auto chars_size = cudf::strings_column_view(lhs).chars_size(stream) + + cudf::strings_column_view(rhs).chars_size(stream); + state.add_global_memory_reads(chars_size); // all bytes are read + state.add_global_memory_writes(chars_size); // all bytes are written + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + [[maybe_unused]] auto result = cudf::merge( + {cudf::table_view({lhs}), cudf::table_view({rhs})}, {0}, {cudf::order::ASCENDING}); + }); +} + +NVBENCH_BENCH(nvbench_merge_strings) + .set_name("merge_strings") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}); diff --git a/cpp/benchmarks/replace/nulls.cpp b/cpp/benchmarks/replace/nulls.cpp new file mode 100644 index 00000000000..ccd00050789 --- /dev/null +++ b/cpp/benchmarks/replace/nulls.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +static void replace_nulls(nvbench::state& state) +{ + auto const n_rows = static_cast(state.get_int64("num_rows")); + auto const max_width = static_cast(state.get_int64("row_width")); + + if (static_cast(n_rows) * static_cast(max_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const table_profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_width); + + auto const input_table = create_random_table( + {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{n_rows}, table_profile); + auto const input = input_table->view().column(0); + auto const repl = input_table->view().column(1); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + auto chars_size = cudf::strings_column_view(input).chars_size(cudf::get_default_stream()); + state.add_global_memory_reads(chars_size); // all bytes are read; + state.add_global_memory_writes(chars_size); + + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto result = cudf::replace_nulls(input, repl); }); +} + +NVBENCH_BENCH(replace_nulls) + .set_name("replace_nulls") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048}) + .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216}); diff --git a/cpp/benchmarks/reshape/interleave.cpp b/cpp/benchmarks/reshape/interleave.cpp new file mode 100644 index 00000000000..4499e34af77 --- /dev/null +++ b/cpp/benchmarks/reshape/interleave.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include + +static void bench_interleave(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const num_cols = static_cast(state.get_int64("columns")); + + if (static_cast(num_rows) * static_cast(row_width) * num_cols >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const str_profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + std::vector types(num_cols, cudf::type_id::STRING); + auto const source_table = create_random_table(types, row_count{num_rows}, str_profile); + + auto const source_view = source_table->view(); + auto const stream = cudf::get_default_stream(); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + auto chars_size = cudf::strings_column_view(source_view.column(0)).chars_size(stream) + + cudf::strings_column_view(source_view.column(1)).chars_size(stream); + state.add_global_memory_reads(chars_size); // all bytes are read + state.add_global_memory_writes(chars_size); // all bytes are written + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + [[maybe_unused]] auto result = cudf::interleave_columns(source_view); + }); +} + +NVBENCH_BENCH(bench_interleave) + .set_name("interleave_strings") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024}) + .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216}) + .add_int64_axis("columns", {2, 10, 100}); diff --git a/cpp/benchmarks/string/case.cpp b/cpp/benchmarks/string/case.cpp index a7db972d39f..cd4d3ca964b 100644 --- a/cpp/benchmarks/string/case.cpp +++ b/cpp/benchmarks/string/case.cpp @@ -75,5 +75,5 @@ void bench_case(nvbench::state& state) NVBENCH_BENCH(bench_case) .set_name("case") .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048}) - .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}) + .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216}) .add_string_axis("encoding", {"ascii", "utf8"}); diff --git a/cpp/benchmarks/string/contains.cpp b/cpp/benchmarks/string/contains.cpp index 6d839c1de64..ae6c8b844c8 100644 --- a/cpp/benchmarks/string/contains.cpp +++ b/cpp/benchmarks/string/contains.cpp @@ -80,7 +80,7 @@ std::unique_ptr build_input_column(cudf::size_type n_rows, } // longer pattern lengths demand more working memory per string -std::string patterns[] = {"^\\d+ [a-z]+", "[A-Z ]+\\d+ +\\d+[A-Z]+\\d+$"}; +std::string patterns[] = {"^\\d+ [a-z]+", "[A-Z ]+\\d+ +\\d+[A-Z]+\\d+$", "5W43"}; static void bench_contains(nvbench::state& state) { @@ -114,4 +114,4 @@ NVBENCH_BENCH(bench_contains) .add_int64_axis("row_width", {32, 64, 128, 256, 512}) .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216}) .add_int64_axis("hit_rate", {50, 100}) // percentage - .add_int64_axis("pattern", {0, 1}); + .add_int64_axis("pattern", {0, 1, 2}); diff --git a/cpp/benchmarks/string/count.cpp b/cpp/benchmarks/string/count.cpp index a656010dca5..f964bc5d224 100644 --- a/cpp/benchmarks/string/count.cpp +++ b/cpp/benchmarks/string/count.cpp @@ -25,10 +25,13 @@ #include +static std::string patterns[] = {"\\d+", "a"}; + static void bench_count(nvbench::state& state) { - auto const num_rows = static_cast(state.get_int64("num_rows")); - auto const row_width = static_cast(state.get_int64("row_width")); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const pattern_index = static_cast(state.get_int64("pattern")); if (static_cast(num_rows) * static_cast(row_width) >= static_cast(std::numeric_limits::max())) { @@ -41,7 +44,7 @@ static void bench_count(nvbench::state& state) create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile); cudf::strings_column_view input(table->view().column(0)); - std::string pattern = "\\d+"; + auto const pattern = patterns[pattern_index]; auto prog = cudf::strings::regex_program::create(pattern); @@ -59,4 +62,5 @@ static void bench_count(nvbench::state& state) NVBENCH_BENCH(bench_count) .set_name("count") .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048}) - .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}); + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}) + .add_int64_axis("pattern", {0, 1}); diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp index e866092f3a3..a9c620e4bf0 100644 --- a/cpp/benchmarks/string/find.cpp +++ b/cpp/benchmarks/string/find.cpp @@ -16,78 +16,75 @@ #include #include -#include #include +#include #include +#include #include #include #include #include -#include +#include -enum FindAPI { find, find_multi, contains, starts_with, ends_with }; +std::unique_ptr build_input_column(cudf::size_type n_rows, + cudf::size_type row_width, + int32_t hit_rate); -class StringFindScalar : public cudf::benchmark {}; - -static void BM_find_scalar(benchmark::State& state, FindAPI find_api) +static void bench_find_string(nvbench::state& state) { - cudf::size_type const n_rows{static_cast(state.range(0))}; - cudf::size_type const max_str_length{static_cast(state.range(1))}; - data_profile const profile = data_profile_builder().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); - cudf::strings_column_view input(column->view()); - cudf::string_scalar target("+"); - cudf::test::strings_column_wrapper targets({"+", "-"}); + auto const n_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const hit_rate = static_cast(state.get_int64("hit_rate")); + auto const api = state.get_string("api"); - for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::get_default_stream()); - switch (find_api) { - case find: cudf::strings::find(input, target); break; - case find_multi: - cudf::strings::find_multiple(input, cudf::strings_column_view(targets)); - break; - case contains: cudf::strings::contains(input, target); break; - case starts_with: cudf::strings::starts_with(input, target); break; - case ends_with: cudf::strings::ends_with(input, target); break; - } + if (static_cast(n_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); } - state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream())); -} + auto const stream = cudf::get_default_stream(); + auto const col = build_input_column(n_rows, row_width, hit_rate); + auto const input = cudf::strings_column_view(col->view()); -static void generate_bench_args(benchmark::internal::Benchmark* b) -{ - int const min_rows = 1 << 12; - int const max_rows = 1 << 24; - int const row_mult = 8; - int const min_rowlen = 1 << 5; - int const max_rowlen = 1 << 13; - int const len_mult = 2; - for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) { - for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) { - // avoid generating combinations that exceed the cudf column limit - size_t total_chars = static_cast(row_count) * rowlen; - if (total_chars < static_cast(std::numeric_limits::max())) { - b->Args({row_count, rowlen}); - } - } + std::vector h_targets({"5W", "5W43", "0987 5W43"}); + cudf::string_scalar target(h_targets[2]); + cudf::test::strings_column_wrapper targets(h_targets.begin(), h_targets.end()); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + auto const chars_size = input.chars_size(stream); + state.add_element_count(chars_size, "chars_size"); + state.add_global_memory_reads(chars_size); + if (api.substr(0, 4) == "find") { + state.add_global_memory_writes(input.size()); + } else { + state.add_global_memory_writes(input.size()); } -} -#define STRINGS_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(StringFindScalar, name) \ - (::benchmark::State & st) { BM_find_scalar(st, name); } \ - BENCHMARK_REGISTER_F(StringFindScalar, name) \ - ->Apply(generate_bench_args) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); + if (api == "find") { + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::find(input, target); }); + } else if (api == "find_multi") { + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::strings::find_multiple(input, cudf::strings_column_view(targets)); + }); + } else if (api == "contains") { + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::contains(input, target); }); + } else if (api == "starts_with") { + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::starts_with(input, target); }); + } else if (api == "ends_with") { + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::ends_with(input, target); }); + } +} -STRINGS_BENCHMARK_DEFINE(find) -STRINGS_BENCHMARK_DEFINE(find_multi) -STRINGS_BENCHMARK_DEFINE(contains) -STRINGS_BENCHMARK_DEFINE(starts_with) -STRINGS_BENCHMARK_DEFINE(ends_with) +NVBENCH_BENCH(bench_find_string) + .set_name("find_string") + .add_string_axis("api", {"find", "find_multi", "contains", "starts_with", "ends_with"}) + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024}) + .add_int64_axis("num_rows", {260'000, 1'953'000, 16'777'216}) + .add_int64_axis("hit_rate", {20, 80}); // percentage diff --git a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu index 161328ae088..3aff75d840e 100644 --- a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu +++ b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu @@ -60,13 +60,15 @@ constexpr int block_size = 256; template CUDF_KERNEL void no_dispatching_kernel(T** A, cudf::size_type n_rows, cudf::size_type n_cols) { - using F = Functor; - cudf::size_type index = blockIdx.x * blockDim.x + threadIdx.x; - while (index < n_rows) { + using F = Functor; + auto tidx = cudf::detail::grid_1d::global_thread_id(); + auto const stride = cudf::detail::grid_1d::grid_stride(); + while (tidx < n_rows) { + auto const index = static_cast(tidx); for (int c = 0; c < n_cols; c++) { A[c][index] = F::f(A[c][index]); } - index += blockDim.x * gridDim.x; + tidx += stride; } } @@ -74,12 +76,14 @@ CUDF_KERNEL void no_dispatching_kernel(T** A, cudf::size_type n_rows, cudf::size template CUDF_KERNEL void host_dispatching_kernel(cudf::mutable_column_device_view source_column) { - using F = Functor; - T* A = source_column.data(); - cudf::size_type index = blockIdx.x * blockDim.x + threadIdx.x; - while (index < source_column.size()) { - A[index] = F::f(A[index]); - index += blockDim.x * gridDim.x; + using F = Functor; + T* A = source_column.data(); + auto tidx = cudf::detail::grid_1d::global_thread_id(); + auto const stride = cudf::detail::grid_1d::grid_stride(); + while (tidx < source_column.size()) { + auto const index = static_cast(tidx); + A[index] = F::f(A[index]); + tidx += stride; } } @@ -127,14 +131,15 @@ template CUDF_KERNEL void device_dispatching_kernel(cudf::mutable_table_device_view source) { cudf::size_type const n_rows = source.num_rows(); - cudf::size_type index = threadIdx.x + blockIdx.x * blockDim.x; - - while (index < n_rows) { + auto tidx = cudf::detail::grid_1d::global_thread_id(); + auto const stride = cudf::detail::grid_1d::grid_stride(); + while (tidx < n_rows) { + auto const index = static_cast(tidx); for (cudf::size_type i = 0; i < source.num_columns(); i++) { cudf::type_dispatcher( source.column(i).type(), RowHandle{}, source.column(i), index); } - index += blockDim.x * gridDim.x; + tidx += stride; } // while } diff --git a/cpp/cmake/Modules/ConfigureCUDA.cmake b/cpp/cmake/Modules/ConfigureCUDA.cmake index f79e4c37228..f75b5aef7af 100644 --- a/cpp/cmake/Modules/ConfigureCUDA.cmake +++ b/cpp/cmake/Modules/ConfigureCUDA.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -25,6 +25,11 @@ else() list(APPEND CUDF_CUDA_FLAGS -Werror=cross-execution-space-call) endif() list(APPEND CUDF_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations) +# This warning needs to be suppressed because some parts of cudf instantiate templated CCCL +# functions in contexts where the resulting instantiations would have internal linkage (e.g. in +# anonymous namespaces). In such contexts, the visibility attribute on the template is ignored, and +# the compiler issues a warning. This is not a problem and will be fixed in future versions of CCCL. +list(APPEND CUDF_CUDA_FLAGS -diag-suppress=1407) if(DISABLE_DEPRECATION_WARNINGS) list(APPEND CUDF_CXX_FLAGS -Wno-deprecated-declarations) diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake index 8c4e2b47fca..752c2028350 100644 --- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake +++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake @@ -23,8 +23,9 @@ target_link_libraries(jitify_preprocess PUBLIC ${CMAKE_DL_LIBS}) function(jit_preprocess_files) cmake_parse_arguments(ARG "" "SOURCE_DIRECTORY" "FILES" ${ARGN}) - foreach(inc IN LISTS libcudacxx_raw_includes) - list(APPEND libcudacxx_includes "-I${inc}") + set(includes) + foreach(inc IN LISTS libcudacxx_raw_includes CUDAToolkit_INCLUDE_DIRS) + list(APPEND includes "-I${inc}") endforeach() foreach(ARG_FILE ${ARG_FILES}) set(ARG_OUTPUT ${CUDF_GENERATED_INCLUDE_DIR}/include/jit_preprocessed_files/${ARG_FILE}.jit.hpp) @@ -44,8 +45,7 @@ function(jit_preprocess_files) $ ${ARG_FILE} -o ${CUDF_GENERATED_INCLUDE_DIR}/include/jit_preprocessed_files -i -m -std=c++17 -remove-unused-globals -D_FILE_OFFSET_BITS=64 -D__CUDACC_RTC__ -I${CUDF_SOURCE_DIR}/include - -I${CUDF_SOURCE_DIR}/src ${libcudacxx_includes} -I${CUDAToolkit_INCLUDE_DIRS} - --no-preinclude-workarounds --no-replace-pragma-once + -I${CUDF_SOURCE_DIR}/src ${includes} --no-preinclude-workarounds --no-replace-pragma-once COMMENT "Custom command to JIT-compile files." ) endforeach() diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 114a1f98a68..0afdc526981 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -26,13 +26,20 @@ include_guard(GLOBAL) # pyarrow. function(find_libarrow_in_python_wheel PYARROW_VERSION) string(REPLACE "." ";" PYARROW_VER_COMPONENTS "${PYARROW_VERSION}") - list(GET PYARROW_VER_COMPONENTS 0 PYARROW_SO_VER) - # The soname for Arrow libraries is constructed using the major version plus "00". Note that, - # although it may seem like it due to Arrow almost exclusively releasing new major versions (i.e. - # `${MINOR_VERSION}${PATCH_VERSION}` is almost always equivalent to "00"), - # the soname is not generated by concatenating the major, minor, and patch versions into a single - # version number soname, just `${MAJOR_VERSION}00` - set(PYARROW_LIB "libarrow.so.${PYARROW_SO_VER}00") + list(GET PYARROW_VER_COMPONENTS 0 PYARROW_MAJOR_VER) + list(GET PYARROW_VER_COMPONENTS 1 PYARROW_MINOR_VER) + + # Ensure that the major and minor versions are two digits long + string(LENGTH ${PYARROW_MAJOR_VER} PYARROW_MAJOR_LENGTH) + string(LENGTH ${PYARROW_MINOR_VER} PYARROW_MINOR_LENGTH) + if(${PYARROW_MAJOR_LENGTH} EQUAL 1) + set(PYARROW_MAJOR_VER "0${PYARROW_MAJOR_VER}") + endif() + if(${PYARROW_MINOR_LENGTH} EQUAL 1) + set(PYARROW_MINOR_VER "0${PYARROW_MINOR_VER}") + endif() + + set(PYARROW_LIB "libarrow.so.${PYARROW_MAJOR_VER}${PYARROW_MINOR_VER}") string( APPEND @@ -68,37 +75,6 @@ list(POP_BACK CMAKE_PREFIX_PATH) find_package(Arrow ${PYARROW_VERSION} MODULE REQUIRED GLOBAL) add_library(arrow_shared ALIAS Arrow::Arrow) - # When using the libarrow inside a wheel, whether or not libcudf may be built using the new C++11 - # ABI is dependent on whether the libarrow inside the wheel was compiled using that ABI because we - # need the arrow library that we bundle in cudf to be ABI-compatible with the one inside pyarrow. - # We determine what options to use by checking the glibc version on the current system, which is - # also how pip determines which manylinux-versioned pyarrow wheel to install. Note that tests will - # not build successfully without also propagating these options to builds of GTest. Similarly, - # benchmarks will not work without updating GBench (and possibly NVBench) builds. We are currently - # ignoring these limitations since we don't anticipate using this feature except for building - # wheels. - enable_language(C) - execute_process( - COMMAND ${CMAKE_C_COMPILER} -print-file-name=libc.so.6 - OUTPUT_VARIABLE GLIBC_EXECUTABLE - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - execute_process( - COMMAND ${GLIBC_EXECUTABLE} - OUTPUT_VARIABLE GLIBC_OUTPUT - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - string(REGEX MATCH "stable release version ([0-9]+\\.[0-9]+)" GLIBC_VERSION ${GLIBC_OUTPUT}) - string(REPLACE "stable release version " "" GLIBC_VERSION ${GLIBC_VERSION}) - string(REPLACE "." ";" GLIBC_VERSION_LIST ${GLIBC_VERSION}) - list(GET GLIBC_VERSION_LIST 1 GLIBC_VERSION_MINOR) - if(GLIBC_VERSION_MINOR LESS 28) - target_compile_options( - Arrow::Arrow INTERFACE "$<$:-D_GLIBCXX_USE_CXX11_ABI=0>" - "$<$:-Xcompiler=-D_GLIBCXX_USE_CXX11_ABI=0>" - ) - endif() - rapids_export_package(BUILD Arrow cudf-exports) rapids_export_package(INSTALL Arrow cudf-exports) endfunction() @@ -334,7 +310,20 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB " ) endif() - + rapids_cmake_install_lib_dir(lib_dir) + if(TARGET arrow_static) + get_target_property(interface_libs arrow_static INTERFACE_LINK_LIBRARIES) + # The `arrow_static` library is leaking a dependency on the object libraries it was built with + # we need to remove this from the interface, since keeping them around would cause duplicate + # symbols and CMake export errors + if(interface_libs MATCHES "arrow_array" AND interface_libs MATCHES "arrow_compute") + string(REPLACE "BUILD_INTERFACE:" "BUILD_LOCAL_INTERFACE:" interface_libs + "${interface_libs}" + ) + set_target_properties(arrow_static PROPERTIES INTERFACE_LINK_LIBRARIES "${interface_libs}") + get_target_property(interface_libs arrow_static INTERFACE_LINK_LIBRARIES) + endif() + endif() rapids_export( BUILD Arrow VERSION ${VERSION} @@ -441,7 +430,7 @@ if(NOT DEFINED CUDF_VERSION_Arrow) set(CUDF_VERSION_Arrow # This version must be kept in sync with the libarrow version pinned for builds in # dependencies.yaml. - 14.0.2 + 16.1.0 CACHE STRING "The version of Arrow to find (or build)" ) endif() diff --git a/cpp/cmake/thirdparty/get_flatbuffers.cmake b/cpp/cmake/thirdparty/get_flatbuffers.cmake new file mode 100644 index 00000000000..b0ece38b8ef --- /dev/null +++ b/cpp/cmake/thirdparty/get_flatbuffers.cmake @@ -0,0 +1,33 @@ +# ============================================================================= +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Use CPM to find or clone flatbuffers +function(find_and_configure_flatbuffers VERSION) + + rapids_cpm_find( + flatbuffers ${VERSION} + GLOBAL_TARGETS flatbuffers + CPM_ARGS + GIT_REPOSITORY https://github.com/google/flatbuffers.git + GIT_TAG v${VERSION} + GIT_SHALLOW TRUE + ) + + rapids_export_find_package_root( + BUILD flatbuffers "${flatbuffers_BINARY_DIR}" EXPORT_SET cudf-exports + ) + +endfunction() + +find_and_configure_flatbuffers(24.3.25) diff --git a/cpp/cmake/thirdparty/get_gtest.cmake b/cpp/cmake/thirdparty/get_gtest.cmake index cfb219448f1..10e6b026d9a 100644 --- a/cpp/cmake/thirdparty/get_gtest.cmake +++ b/cpp/cmake/thirdparty/get_gtest.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -17,22 +17,7 @@ function(find_and_configure_gtest) include(${rapids-cmake-dir}/cpm/gtest.cmake) # Find or install GoogleTest - rapids_cpm_gtest(BUILD_EXPORT_SET cudf-testing-exports INSTALL_EXPORT_SET cudf-testing-exports) - - if(GTest_ADDED) - rapids_export( - BUILD GTest - VERSION ${GTest_VERSION} - EXPORT_SET GTestTargets - GLOBAL_TARGETS gtest gmock gtest_main gmock_main - NAMESPACE GTest:: - ) - - include("${rapids-cmake-dir}/export/find_package_root.cmake") - rapids_export_find_package_root( - BUILD GTest [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cudf-testing-exports - ) - endif() + rapids_cpm_gtest(BUILD_STATIC) endfunction() diff --git a/cpp/cmake/thirdparty/get_nanoarrow.cmake b/cpp/cmake/thirdparty/get_nanoarrow.cmake new file mode 100644 index 00000000000..025bff7d8f0 --- /dev/null +++ b/cpp/cmake/thirdparty/get_nanoarrow.cmake @@ -0,0 +1,32 @@ +# ============================================================================= +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# This function finds nanoarrow and sets any additional necessary environment variables. +function(find_and_configure_nanoarrow) + # Currently we need to always build nanoarrow so we don't pickup a previous installed version + set(CPM_DOWNLOAD_nanoarrow ON) + rapids_cpm_find( + nanoarrow 0.5.0 + GLOBAL_TARGETS nanoarrow + CPM_ARGS + GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git + GIT_TAG 11e73a8c85b45e3d49c8c541b4e1497a649fe03c + GIT_SHALLOW FALSE + OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf" + ) + set_target_properties(nanoarrow PROPERTIES POSITION_INDEPENDENT_CODE ON) + rapids_export_find_package_root(BUILD nanoarrow "${nanoarrow_BINARY_DIR}" EXPORT_SET cudf-exports) +endfunction() + +find_and_configure_nanoarrow() diff --git a/cpp/cmake/thirdparty/get_nvbench.cmake b/cpp/cmake/thirdparty/get_nvbench.cmake index bbd22693ba4..84c27dd9d56 100644 --- a/cpp/cmake/thirdparty/get_nvbench.cmake +++ b/cpp/cmake/thirdparty/get_nvbench.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -18,9 +18,6 @@ function(find_and_configure_nvbench) include(${rapids-cmake-dir}/cpm/nvbench.cmake) include(${rapids-cmake-dir}/cpm/package_override.cmake) - set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches") - rapids_cpm_package_override("${cudf_patch_dir}/nvbench_override.json") - rapids_cpm_nvbench(BUILD_STATIC) endfunction() diff --git a/cpp/cmake/thirdparty/get_nvtx.cmake b/cpp/cmake/thirdparty/get_nvtx.cmake index c722c4f70f1..e236d586522 100644 --- a/cpp/cmake/thirdparty/get_nvtx.cmake +++ b/cpp/cmake/thirdparty/get_nvtx.cmake @@ -12,16 +12,14 @@ # the License. # ============================================================================= -# This function finds NVTX and sets any additional necessary environment variables. +# Need to call rapids_cpm_nvtx3 to get support for an installed version of nvtx3 and to support +# installing it ourselves function(find_and_configure_nvtx) - rapids_cpm_find( - NVTX3 3.1.0 - GLOBAL_TARGETS nvtx3-c nvtx3-cpp - CPM_ARGS - GIT_REPOSITORY https://github.com/NVIDIA/NVTX.git - GIT_TAG v3.1.0 - GIT_SHALLOW TRUE SOURCE_SUBDIR c - ) + include(${rapids-cmake-dir}/cpm/nvtx3.cmake) + + # Find or install nvtx3 + rapids_cpm_nvtx3(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports) + endfunction() find_and_configure_nvtx() diff --git a/cpp/cmake/thirdparty/patches/cccl_override.json b/cpp/cmake/thirdparty/patches/cccl_override.json index 68fc8979c46..059f713e7a5 100644 --- a/cpp/cmake/thirdparty/patches/cccl_override.json +++ b/cpp/cmake/thirdparty/patches/cccl_override.json @@ -18,20 +18,45 @@ "issue" : "thrust::copy introduced a change in behavior that causes failures with cudaErrorInvalidValue.", "fixed_in" : "" }, + { + "file" : "${current_json_dir}/revert_pr_211_cccl_2.5.0.diff", + "issue" : "thrust::copy introduced a change in behavior that causes failures with cudaErrorInvalidValue.", + "fixed_in" : "" + }, + { + "file": "cccl/kernel_pointer_hiding.diff", + "issue": "Hide APIs that accept kernel pointers [https://github.com/NVIDIA/cccl/pull/1395]", + "fixed_in": "2.4" + }, { "file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff", "issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]", "fixed_in" : "" }, + { + "file" : "${current_json_dir}/thrust_disable_64bit_dispatching_cccl_2.5.0.diff", + "issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]", + "fixed_in" : "" + }, { "file" : "${current_json_dir}/thrust_faster_sort_compile_times.diff", "issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]", "fixed_in" : "" }, + { + "file" : "${current_json_dir}/thrust_faster_sort_compile_times_cccl_2.5.0.diff", + "issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]", + "fixed_in" : "" + }, { "file" : "${current_json_dir}/thrust_faster_scan_compile_times.diff", "issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]", "fixed_in" : "" + }, + { + "file" : "${current_json_dir}/thrust_faster_scan_compile_times_cccl_2.5.0.diff", + "issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]", + "fixed_in" : "" } ] } diff --git a/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff b/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff deleted file mode 100644 index 04f96f49b48..00000000000 --- a/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff +++ /dev/null @@ -1,29 +0,0 @@ -diff --git a/nvbench/main.cuh b/nvbench/main.cuh -index 0ba82d7..cca5273 100644 ---- a/nvbench/main.cuh -+++ b/nvbench/main.cuh -@@ -54,6 +54,16 @@ - // clang-format on - #endif - -+#ifndef NVBENCH_ENVIRONMENT -+namespace nvbench { -+struct no_environment -+{ -+ no_environment(int, char const *const *) {} -+}; -+} -+#define NVBENCH_ENVIRONMENT nvbench::no_environment -+#endif -+ - #define NVBENCH_MAIN_PARSE(argc, argv) \ - nvbench::option_parser parser; \ - parser.parse(argc, argv) -@@ -77,6 +87,7 @@ - printer.set_total_state_count(total_states); \ - \ - printer.set_completed_state_count(0); \ -+ [[maybe_unused]] auto env_state = NVBENCH_ENVIRONMENT(argc, argv); \ - for (auto &bench_ptr : benchmarks) \ - { \ - bench_ptr->set_printer(printer); \ diff --git a/cpp/cmake/thirdparty/patches/nvbench_override.json b/cpp/cmake/thirdparty/patches/nvbench_override.json deleted file mode 100644 index ad9b19c29c1..00000000000 --- a/cpp/cmake/thirdparty/patches/nvbench_override.json +++ /dev/null @@ -1,14 +0,0 @@ - -{ - "packages" : { - "nvbench" : { - "patches" : [ - { - "file" : "${current_json_dir}/nvbench_global_setup.diff", - "issue" : "Fix add support for global setup to initialize RMM in nvbench [https://github.com/NVIDIA/nvbench/pull/123]", - "fixed_in" : "" - } - ] - } - } -} diff --git a/cpp/cmake/thirdparty/patches/revert_pr_211_cccl_2.5.0.diff b/cpp/cmake/thirdparty/patches/revert_pr_211_cccl_2.5.0.diff new file mode 100644 index 00000000000..27ff16744f5 --- /dev/null +++ b/cpp/cmake/thirdparty/patches/revert_pr_211_cccl_2.5.0.diff @@ -0,0 +1,47 @@ +diff --git a/thrust/thrust/system/cuda/detail/internal/copy_device_to_device.h b/thrust/thrust/system/cuda/detail/internal/copy_device_to_device.h +index 046eb83c0..8047c9701 100644 +--- a/thrust/thrust/system/cuda/detail/internal/copy_device_to_device.h ++++ b/thrust/thrust/system/cuda/detail/internal/copy_device_to_device.h +@@ -53,41 +53,15 @@ namespace cuda_cub + + namespace __copy + { +-template +-OutputIt THRUST_RUNTIME_FUNCTION device_to_device( +- execution_policy& policy, InputIt first, InputIt last, OutputIt result, thrust::detail::true_type) +-{ +- typedef typename thrust::iterator_traits::value_type InputTy; +- const auto n = thrust::distance(first, last); +- if (n > 0) +- { +- cudaError status; +- status = trivial_copy_device_to_device( +- policy, +- reinterpret_cast(thrust::raw_pointer_cast(&*result)), +- reinterpret_cast(thrust::raw_pointer_cast(&*first)), +- n); +- cuda_cub::throw_on_error(status, "__copy:: D->D: failed"); +- } +- +- return result + n; +-} + + template + OutputIt THRUST_RUNTIME_FUNCTION device_to_device( +- execution_policy& policy, InputIt first, InputIt last, OutputIt result, thrust::detail::false_type) ++ execution_policy& policy, InputIt first, InputIt last, OutputIt result) + { + typedef typename thrust::iterator_traits::value_type InputTy; + return cuda_cub::transform(policy, first, last, result, thrust::identity()); + } + +-template +-OutputIt THRUST_RUNTIME_FUNCTION +-device_to_device(execution_policy& policy, InputIt first, InputIt last, OutputIt result) +-{ +- return device_to_device( +- policy, first, last, result, typename is_indirectly_trivially_relocatable_to::type()); +-} + } // namespace __copy + + } // namespace cuda_cub diff --git a/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching_cccl_2.5.0.diff b/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching_cccl_2.5.0.diff new file mode 100644 index 00000000000..6ae1e1c917b --- /dev/null +++ b/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching_cccl_2.5.0.diff @@ -0,0 +1,25 @@ +diff --git a/thrust/thrust/system/cuda/detail/dispatch.h b/thrust/thrust/system/cuda/detail/dispatch.h +index 2a3cc4e33..8fb337b26 100644 +--- a/thrust/thrust/system/cuda/detail/dispatch.h ++++ b/thrust/thrust/system/cuda/detail/dispatch.h +@@ -44,8 +44,7 @@ + } \ + else \ + { \ +- auto THRUST_PP_CAT2(count, _fixed) = static_cast(count); \ +- status = call arguments; \ ++ throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \ + } + + /** +@@ -66,9 +65,7 @@ + } \ + else \ + { \ +- auto THRUST_PP_CAT2(count1, _fixed) = static_cast(count1); \ +- auto THRUST_PP_CAT2(count2, _fixed) = static_cast(count2); \ +- status = call arguments; \ ++ throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \ + } + /** + * Dispatch between 32-bit and 64-bit index based versions of the same algorithm diff --git a/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times_cccl_2.5.0.diff b/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times_cccl_2.5.0.diff new file mode 100644 index 00000000000..fee46046194 --- /dev/null +++ b/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times_cccl_2.5.0.diff @@ -0,0 +1,39 @@ +diff --git a/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/cub/device/dispatch/dispatch_radix_sort.cuh +index 0606485bb..dbb99ff13 100644 +--- a/cub/cub/device/dispatch/dispatch_radix_sort.cuh ++++ b/cub/cub/device/dispatch/dispatch_radix_sort.cuh +@@ -1085,7 +1085,7 @@ struct DeviceRadixSortPolicy + }; + + /// SM60 (GP100) +- struct Policy600 : ChainedPolicy<600, Policy600, Policy500> ++ struct Policy600 : ChainedPolicy<600, Policy600, Policy600> + { + enum + { +diff --git a/cub/cub/device/dispatch/dispatch_reduce.cuh b/cub/cub/device/dispatch/dispatch_reduce.cuh +index f39613adb..75bd16ff9 100644 +--- a/cub/cub/device/dispatch/dispatch_reduce.cuh ++++ b/cub/cub/device/dispatch/dispatch_reduce.cuh +@@ -488,7 +488,7 @@ struct DeviceReducePolicy + }; + + /// SM60 +- struct Policy600 : ChainedPolicy<600, Policy600, Policy350> ++ struct Policy600 : ChainedPolicy<600, Policy600, Policy600> + { + static constexpr int threads_per_block = 256; + static constexpr int items_per_thread = 16; +diff --git a/cub/cub/device/dispatch/tuning/tuning_scan.cuh b/cub/cub/device/dispatch/tuning/tuning_scan.cuh +index 419908c4e..6ab0840e1 100644 +--- a/cub/cub/device/dispatch/tuning/tuning_scan.cuh ++++ b/cub/cub/device/dispatch/tuning/tuning_scan.cuh +@@ -339,7 +339,7 @@ struct DeviceScanPolicy + /// SM600 + struct Policy600 + : DefaultTuning +- , ChainedPolicy<600, Policy600, Policy520> ++ , ChainedPolicy<600, Policy600, Policy600> + {}; + + /// SM800 diff --git a/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times_cccl_2.5.0.diff b/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times_cccl_2.5.0.diff new file mode 100644 index 00000000000..cb0cc55f4d2 --- /dev/null +++ b/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times_cccl_2.5.0.diff @@ -0,0 +1,39 @@ +diff --git a/cub/cub/block/block_merge_sort.cuh b/cub/cub/block/block_merge_sort.cuh +index eb76ebb0b..c6c529a50 100644 +--- a/cub/cub/block/block_merge_sort.cuh ++++ b/cub/cub/block/block_merge_sort.cuh +@@ -95,7 +95,7 @@ _CCCL_DEVICE _CCCL_FORCEINLINE void SerialMerge( + KeyT key1 = keys_shared[keys1_beg]; + KeyT key2 = keys_shared[keys2_beg]; + +-#pragma unroll ++#pragma unroll 1 + for (int item = 0; item < ITEMS_PER_THREAD; ++item) + { + bool p = (keys2_beg < keys2_end) && ((keys1_beg >= keys1_end) || compare_op(key2, key1)); +@@ -376,7 +376,7 @@ public: + // + KeyT max_key = oob_default; + +-#pragma unroll ++#pragma unroll 1 + for (int item = 1; item < ITEMS_PER_THREAD; ++item) + { + if (ITEMS_PER_THREAD * linear_tid + item < valid_items) +diff --git a/cub/cub/thread/thread_sort.cuh b/cub/cub/thread/thread_sort.cuh +index 7d9e8622f..da5627306 100644 +--- a/cub/cub/thread/thread_sort.cuh ++++ b/cub/cub/thread/thread_sort.cuh +@@ -87,10 +87,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD], ValueT (&items)[ITEMS_PER_THRE + { + constexpr bool KEYS_ONLY = ::cuda::std::is_same::value; + +-#pragma unroll ++#pragma unroll 1 + for (int i = 0; i < ITEMS_PER_THREAD; ++i) + { +-#pragma unroll ++#pragma unroll 1 + for (int j = 1 & i; j < ITEMS_PER_THREAD - 1; j += 2) + { + if (compare_op(keys[j + 1], keys[j])) diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index 8188c466312..ff80c2daab8 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -1,4 +1,4 @@ -# libcudf C++ Developer Guide {#DEVELOPER_GUIDE} +# libcudf C++ Developer Guide This document serves as a guide for contributors to libcudf C++ code. Developers should also refer to these additional files for further documentation of libcudf best practices. @@ -84,7 +84,7 @@ prefixed with an underscore. ```c++ template -void algorithm_function(int x, rmm::cuda_stream_view s, rmm::device_memory_resource* mr) +void algorithm_function(int x, rmm::cuda_stream_view s, rmm::device_async_resource_ref mr) { ... } @@ -194,9 +194,10 @@ and produce `unique_ptr`s to owning objects as output. For example, std::unique_ptr sort(table_view const& input); ``` -## rmm::device_memory_resource +## Memory Resources -libcudf allocates all device memory via RMM memory resources (MR). See the +libcudf allocates all device memory via RMM memory resources (MR) or CUDA MRs. Either type +can be passed to libcudf functions via `rmm::device_async_resource_ref` parameters. See the [RMM documentation](https://github.com/rapidsai/rmm/blob/main/README.md) for details. ### Current Device Memory Resource @@ -206,6 +207,27 @@ RMM provides a "default" memory resource for each device that can be accessed an respectively. All memory resource parameters should be defaulted to use the return value of `rmm::mr::get_current_device_resource()`. +### Resource Refs + +Memory resources are passed via resource ref parameters. A resource ref is a memory resource wrapper +that enables consumers to specify properties of resources that they expect. These are defined +in the `cuda::mr` namespace of libcu++, but RMM provides some convenience wrappers in +`rmm/resource_ref.hpp`: + - `rmm::device_resource_ref` accepts a memory resource that provides synchronous allocation + of device-accessible memory. + - `rmm::device_async_resource_ref` accepts a memory resource that provides stream-ordered allocation + of device-accessible memory. + - `rmm::host_resource_ref` accepts a memory resource that provides synchronous allocation of host- + accessible memory. + - `rmm::host_async_resource_ref` accepts a memory resource that provides stream-ordered allocation + of host-accessible memory. + - `rmm::host_device_resource_ref` accepts a memory resource that provides synchronous allocation of + host- and device-accessible memory. + - `rmm::host_async_resource_ref` accepts a memory resource that provides stream-ordered allocation + of host- and device-accessible memory. + +See the libcu++ [docs on `resource_ref`](https://nvidia.github.io/cccl/libcudacxx/extended_api/memory_resource/resource_ref.html) for more information. + ## cudf::column `cudf::column` is a core owning data structure in libcudf. Most libcudf public APIs produce either @@ -519,23 +541,23 @@ how device memory is allocated. ### Output Memory -Any libcudf API that allocates memory that is *returned* to a user must accept a pointer to a -`device_memory_resource` as the last parameter. Inside the API, this memory resource must be used -to allocate any memory for returned objects. It should therefore be passed into functions whose -outputs will be returned. Example: +Any libcudf API that allocates memory that is *returned* to a user must accept a +`rmm::device_async_resource_ref` as the last parameter. Inside the API, this memory resource must +be used to allocate any memory for returned objects. It should therefore be passed into functions +whose outputs will be returned. Example: ```c++ // Returned `column` contains newly allocated memory, // therefore the API must accept a memory resource pointer std::unique_ptr returns_output_memory( - ..., rmm::device_memory_resource * mr = rmm::mr::get_current_device_resource()); + ..., rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); // This API does not allocate any new *output* memory, therefore // a memory resource is unnecessary void does_not_allocate_output_memory(...); ``` -This rule automatically applies to all detail APIs that allocates memory. Any detail API may be +This rule automatically applies to all detail APIs that allocate memory. Any detail API may be called by any public API, and therefore could be allocating memory that is returned to the user. To support such uses cases, all detail APIs allocating memory resources should accept an `mr` parameter. Callers are responsible for either passing through a provided `mr` or @@ -549,7 +571,7 @@ obtained from `rmm::mr::get_current_device_resource()` for temporary memory allo ```c++ rmm::device_buffer some_function( - ..., rmm::mr::device_memory_resource mr * = rmm::mr::get_current_device_resource()) { + ..., rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { rmm::device_buffer returned_buffer(..., mr); // Returned buffer uses the passed in MR ... rmm::device_buffer temporary_buffer(...); // Temporary buffer uses default MR @@ -561,11 +583,11 @@ rmm::device_buffer some_function( ### Memory Management libcudf code generally eschews raw pointers and direct memory allocation. Use RMM classes built to -use `device_memory_resource`s for device memory allocation with automated lifetime management. +use memory resources for device memory allocation with automated lifetime management. #### rmm::device_buffer Allocates a specified number of bytes of untyped, uninitialized device memory using a -`device_memory_resource`. If no resource is explicitly provided, uses +memory resource. If no `rmm::device_async_resource_ref` is explicitly provided, it uses `rmm::mr::get_current_device_resource()`. `rmm::device_buffer` is movable and copyable on a stream. A copy performs a deep copy of the @@ -806,7 +828,7 @@ This iterator returns the validity of the underlying element (`true` or `false`) The proliferation of data types supported by libcudf can result in long compile times. One area where compile time was a problem is in types used to store indices, which can be any integer type. -The "Indexalator", or index-normalizing iterator (`include/cudf/detail/indexalator.cuh`), can be +The "indexalator", or index-normalizing iterator (`include/cudf/detail/indexalator.cuh`), can be used for index types (integers) without requiring a type-specific instance. It can be used for any iterator interface for reading an array of integer values of type `int8`, `int16`, `int32`, `int64`, `uint8`, `uint16`, `uint32`, or `uint64`. Reading specific elements always returns a @@ -834,6 +856,41 @@ thrust::lower_bound(rmm::exec_policy(stream), thrust::less()); ``` +### Offset-normalizing iterators + +Like the [indexalator](#index-normalizing-iterators), +the "offsetalator", or offset-normalizing iterator (`include/cudf/detail/offsetalator.cuh`), can be +used for offset column types (`INT32` or `INT64` only) without requiring a type-specific instance. +This is helpful when reading or building [strings columns](#strings-columns). +The normalized type is `int64` which means an `input_offsetsalator` will return `int64` type values +for both `INT32` and `INT64` offsets columns. +Likewise, an `output_offselator` can accept `int64` type values to store into either an +`INT32` or `INT64` output offsets column created appropriately. + +Use the `cudf::detail::offsetalator_factory` to create an appropriate input or output iterator from an offsets column_view. +Example input iterator usage: + +```c++ + // convert the sizes to offsets + auto [offsets, char_bytes] = cudf::strings::detail::make_offsets_child_column( + output_sizes.begin(), output_sizes.end(), stream, mr); + auto d_offsets = + cudf::detail::offsetalator_factory::make_input_iterator(offsets->view()); + // use d_offsets to address the output row bytes +``` + +Example output iterator usage: + +```c++ + // create offsets column as either INT32 or INT64 depending on the number of bytes + auto offsets_column = cudf::strings::detail::create_offsets_child_column(total_bytes, + offsets_count, + stream, mr); + auto d_offsets = + cudf::detail::offsetalator_factory::make_output_iterator(offsets_column->mutable_view()); + // write appropriate offset values to d_offsets +``` + ## Namespaces ### External @@ -921,13 +978,14 @@ Use the `CUDF_EXPECTS` macro to enforce runtime conditions necessary for correct Example usage: ```c++ -CUDF_EXPECTS(lhs.type() == rhs.type(), "Column type mismatch"); +CUDF_EXPECTS(cudf::have_same_types(lhs, rhs), "Type mismatch", cudf::data_type_error); ``` The first argument is the conditional expression expected to resolve to `true` under normal -conditions. If the conditional evaluates to `false`, then an error has occurred and an instance of -`cudf::logic_error` is thrown. The second argument to `CUDF_EXPECTS` is a short description of the -error that has occurred and is used for the exception's `what()` message. +conditions. The second argument to `CUDF_EXPECTS` is a short description of the error that has +occurred and is used for the exception's `what()` message. If the conditional evaluates to +`false`, then an error has occurred and an instance of the exception class in the third argument +(or the default, `cudf::logic_error`) is thrown. There are times where a particular code path, if reached, should indicate an error no matter what. For example, often the `default` case of a `switch` statement represents an invalid alternative. @@ -1026,6 +1084,12 @@ types such as numeric types and timestamps/durations, adding support for nested Enabling an algorithm differently for different types uses either template specialization or SFINAE, as discussed in [Specializing Type-Dispatched Code Paths](#specializing-type-dispatched-code-paths). +## Comparing Data Types + +When comparing the data types of two columns or scalars, do not directly compare +`a.type() == b.type()`. Nested types such as lists of structs of integers will not be handled +properly if only the top level type is compared. Instead, use the `cudf::have_same_types` function. + # Type Dispatcher libcudf stores data (for columns and scalars) "type erased" in `void*` device memory. This @@ -1212,18 +1276,20 @@ This is related to [Arrow's "Variable-Size List" memory layout](https://arrow.ap Strings are represented as a column with a data device buffer and a child offsets column. The parent column's type is `STRING` and its data holds all the characters across all the strings packed together -but its size represents the number of strings in the column, and its null mask represents the -validity of each string. To summarize, the strings column children are: - -1. A non-nullable column of [`size_type`](#cudfsize_type) elements that indicates the offset to the beginning of each - string in a dense data buffer of all characters. +but its size represents the number of strings in the column and its null mask represents the +validity of each string. -With this representation, `data[offsets[i]]` is the first character of string `i`, and the -size of string `i` is given by `offsets[i+1] - offsets[i]`. The following image shows an example of -this compound column representation of strings. +The strings column contains a single, non-nullable child column +of offset elements that indicates the byte position offset to the beginning of each +string in the dense data buffer of all characters. With this representation, `data[offsets[i]]` is the +first character of string `i`, and the size of string `i` is given by `offsets[i+1] - offsets[i]`. +The following image shows an example of this compound column representation of strings. ![strings](strings.png) +The type of the offsets column is either `INT32` or `INT64` depending on the number of bytes in the data buffer. +See [`cudf::strings_view`](#cudfstrings_column_view-and-cudfstring_view) for more information on processing individual string rows. + ## Structs columns A struct is a nested data type with a set of child columns each representing an individual field @@ -1266,7 +1332,7 @@ struct column's layout is as follows. (Note that null masks should be read from } ``` -The last struct row (index 3) is not null, but has a null value in the INT32 field. Also, row 2 of +The last struct row (index 3) is not null, but has a null value in the `INT32` field. Also, row 2 of the struct column is null, making its corresponding fields also null. Therefore, bit 2 is unset in the null masks of both struct fields. @@ -1322,18 +1388,27 @@ libcudf provides view types for nested column types as well as for the data elem ### cudf::strings_column_view and cudf::string_view -`cudf::strings_column_view` is a view of a strings column, like `cudf::column_view` is a view of -any `cudf::column`. `cudf::string_view` is a view of a single string, and therefore -`cudf::string_view` is the data type of a `cudf::column` of type `STRING` just like `int32_t` is the -data type for a `cudf::column` of type [`size_type`](#cudfsize_type). As its name implies, this is a -read-only object instance that points to device memory inside the strings column. It's lifespan is -the same (or less) as the column it views. +A `cudf::strings_column_view` wraps a strings column and contains a parent +`cudf::column_view` as a view of the strings column and an offsets `cudf::column_view` +which is a child of the parent. +The parent view contains the offset, size, and validity mask for the strings column. +The offsets view is non-nullable with `offset()==0` and its own size. +Since the offset column type can be either `INT32` or `INT64` it is useful to use the +offset normalizing iterators [offsetalator](#offset-normalizing-iterators) to access individual offset values. + +A `cudf::string_view` is a view of a single string and therefore +is the data type of a `cudf::column` of type `STRING` just like `int32_t` is the +data type for a `cudf::column` of type `INT32`. As its name implies, this is a +read-only object instance that points to device memory inside the strings column. +Its lifespan is the same (or less) as the column it views. +An individual strings column row and a `cudf::string_view` is limited to [`size_type`](#cudfsize_type) bytes. Use the `column_device_view::element` method to access an individual row element. Like any other column, do not call `element()` on a row that is null. ```c++ - cudf::column_device_view d_strings; + cudf::strings_column_view scv; + auto d_strings = cudf::column_device_view::create(scv.parent(), stream); ... if( d_strings.is_valid(row_index) ) { string_view d_str = d_strings.element(row_index); @@ -1341,27 +1416,27 @@ column, do not call `element()` on a row that is null. } ``` -A null string is not the same as an empty string. Use the `string_scalar` class if you need an +A null string is not the same as an empty string. Use the `cudf::string_scalar` class if you need an instance of a class object to represent a null string. -The `string_view` contains comparison operators `<,>,==,<=,>=` that can be used in many cudf -functions like `sort` without string-specific code. The data for a `string_view` instance is +The `cudf::string_view` contains comparison operators `<,>,==,<=,>=` that can be used in many cudf +functions like `sort` without string-specific code. The data for a `cudf::string_view` instance is required to be [UTF-8](#utf-8) and all operators and methods expect this encoding. Unless documented otherwise, position and length parameters are specified in characters and not bytes. The class also -includes a `string_view::const_iterator` which can be used to navigate through individual characters +includes a `cudf::string_view::const_iterator` which can be used to navigate through individual characters within the string. -`cudf::type_dispatcher` dispatches to the `string_view` data type when invoked on a `STRING` column. +`cudf::type_dispatcher` dispatches to the `cudf::string_view` data type when invoked on a `STRING` column. #### UTF-8 The libcudf strings column only supports UTF-8 encoding for strings data. [UTF-8](https://en.wikipedia.org/wiki/UTF-8) is a variable-length character encoding wherein each character can be 1-4 bytes. This means the length of a string is not the same as its size in bytes. -For this reason, it is recommended to use the `string_view` class to access these characters for +For this reason, it is recommended to use the `cudf::string_view` class to access these characters for most operations. -The `string_view.cuh` header also includes some utility methods for reading and writing +The `cudf/strings/detail/utf8.hpp` header also includes some utility methods for reading and writing (`to_char_utf8/from_char_utf8`) individual UTF-8 characters to/from byte arrays. ### cudf::lists_column_view and cudf::lists_view @@ -1384,3 +1459,25 @@ cuIO is a component of libcudf that provides GPU-accelerated reading and writing formats commonly used in data analytics, including CSV, Parquet, ORC, Avro, and JSON_Lines. // TODO: add more detail and move to a separate file. + +# Debugging Tips + +Here are some tools that can help with debugging libcudf (besides printf of course): +1. `cuda-gdb`\ + Follow the instructions in the [Contributor to cuDF guide](../../../CONTRIBUTING.md#debugging-cudf) to build + and run libcudf with debug symbols. +2. `compute-sanitizer`\ + The [CUDA Compute Sanitizer](https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html) + tool can be used to locate many CUDA reported errors by providing a call stack + close to where the error occurs even with a non-debug build. The sanitizer includes various + tools including `memcheck`, `racecheck`, and `initcheck` as well as others. + The `racecheck` and `initcheck` have been known to produce false positives. +3. `cudf::test::print()`\ + The `print()` utility can be called within a gtest to output the data in a `cudf::column_view`. + More information is available in the [Testing Guide](TESTING.md#printing-and-accessing-column-data) +4. GCC Address Sanitizer\ + The GCC ASAN can also be used by adding the `-fsanitize=address` compiler flag. + There is a compatibility issue with the CUDA runtime that can be worked around by setting + environment variable `ASAN_OPTIONS=protect_shadow_gap=0` before running the executable. + Note that the CUDA `compute-sanitizer` can also be used with GCC ASAN by setting the + environment variable `ASAN_OPTIONS=protect_shadow_gap=0,alloc_dealloc_mismatch=0`. diff --git a/cpp/doxygen/developer_guide/TESTING.md b/cpp/doxygen/developer_guide/TESTING.md index a4ffe0f575b..9c86be5a55d 100644 --- a/cpp/doxygen/developer_guide/TESTING.md +++ b/cpp/doxygen/developer_guide/TESTING.md @@ -455,10 +455,19 @@ Column comparison functions in the `cudf::test::detail` namespace should **NOT** ### Printing and accessing column data -`include/cudf_test/column_utilities.hpp` defines various functions and overloads for printing +The `` header defines various functions and overloads for printing columns (`print`), converting column data to string (`to_string`, `to_strings`), and copying data to -the host (`to_host`). - +the host (`to_host`). For example, to print a `cudf::column_view` contents or `column_wrapper` instance +to the console use the `cudf::test::print()`: +```cpp + cudf::test::fixed_width_column_wrapper input({1,2,3,4}); + auto splits = cudf::split(input,{2}); + cudf::test::print(input); + cudf::test::print(splits.front()); +``` +Fixed-width and strings columns output as comma-separated entries including null rows. +Nested columns are also supported and output includes the offsets and data children as well as +the null mask bits. ## Validating Stream Usage diff --git a/cpp/examples/basic/CMakeLists.txt b/cpp/examples/basic/CMakeLists.txt index 759a43b5627..a3fe699667a 100644 --- a/cpp/examples/basic/CMakeLists.txt +++ b/cpp/examples/basic/CMakeLists.txt @@ -1,7 +1,13 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. cmake_minimum_required(VERSION 3.26.4) +include(../set_cuda_architecture.cmake) + +# initialize cuda architecture +rapids_cuda_init_architectures(basic_example) +rapids_cuda_set_architectures(RAPIDS) + project( basic_example VERSION 0.0.1 @@ -14,3 +20,6 @@ include(../fetch_dependencies.cmake) add_executable(basic_example src/process_csv.cpp) target_link_libraries(basic_example PRIVATE cudf::cudf) target_compile_features(basic_example PRIVATE cxx_std_17) + +install(TARGETS basic_example DESTINATION bin/examples/libcudf) +install(FILES ${CMAKE_CURRENT_LIST_DIR}/4stock_5day.csv DESTINATION bin/examples/libcudf) diff --git a/cpp/examples/build.sh b/cpp/examples/build.sh index 001cdeec694..bde6ef7d69c 100755 --- a/cpp/examples/build.sh +++ b/cpp/examples/build.sh @@ -1,14 +1,41 @@ #!/bin/bash -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # libcudf examples build script +set -euo pipefail + # Parallelism control PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} +# Installation disabled by default +INSTALL_EXAMPLES=false + +# Check for -i or --install flags to enable installation +ARGS=$(getopt -o i --long install -- "$@") +eval set -- "$ARGS" +while [ : ]; do + case "$1" in + -i | --install) + INSTALL_EXAMPLES=true + shift + ;; + --) shift; + break + ;; + esac +done # Root of examples EXAMPLES_DIR=$(dirname "$(realpath "$0")") + +# Set up default libcudf build directory and install prefix if conda build +if [ "${CONDA_BUILD:-"0"}" == "1" ]; then + LIB_BUILD_DIR="${LIB_BUILD_DIR:-${SRC_DIR/cpp/build}}" + INSTALL_PREFIX="${INSTALL_PREFIX:-${PREFIX}}" +fi + +# libcudf build directory LIB_BUILD_DIR=${LIB_BUILD_DIR:-$(readlink -f "${EXAMPLES_DIR}/../build")} ################################################################################ @@ -23,8 +50,13 @@ build_example() { cmake -S ${example_dir} -B ${build_dir} -Dcudf_ROOT="${LIB_BUILD_DIR}" # Build cmake --build ${build_dir} -j${PARALLEL_LEVEL} + # Install if needed + if [ "$INSTALL_EXAMPLES" = true ]; then + cmake --install ${build_dir} --prefix ${INSTALL_PREFIX:-${example_dir}/install} + fi } build_example basic build_example strings build_example nested_types +build_example parquet_io diff --git a/cpp/examples/fetch_dependencies.cmake b/cpp/examples/fetch_dependencies.cmake index a03f84ae142..851405caf55 100644 --- a/cpp/examples/fetch_dependencies.cmake +++ b/cpp/examples/fetch_dependencies.cmake @@ -11,7 +11,10 @@ # or implied. See the License for the specific language governing permissions and limitations under # the License. # ============================================================================= -set(CPM_DOWNLOAD_VERSION v0.35.3) + +include(${CMAKE_CURRENT_LIST_DIR}/versions.cmake) + +set(CPM_DOWNLOAD_VERSION v0.38.5) file( DOWNLOAD https://github.com/cpm-cmake/CPM.cmake/releases/download/${CPM_DOWNLOAD_VERSION}/get_cpm.cmake @@ -19,9 +22,11 @@ file( ) include(${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake) -set(CUDF_TAG branch-24.04) +# find or build it via CPM CPMFindPackage( - NAME cudf GIT_REPOSITORY https://github.com/rapidsai/cudf + NAME cudf + FIND_PACKAGE_ARGUMENTS "PATHS ${cudf_ROOT} ${cudf_ROOT}/latest" GIT_REPOSITORY + https://github.com/rapidsai/cudf GIT_TAG ${CUDF_TAG} GIT_SHALLOW TRUE diff --git a/cpp/examples/nested_types/CMakeLists.txt b/cpp/examples/nested_types/CMakeLists.txt index cb9430db237..8a900f6b5ae 100644 --- a/cpp/examples/nested_types/CMakeLists.txt +++ b/cpp/examples/nested_types/CMakeLists.txt @@ -1,7 +1,13 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. cmake_minimum_required(VERSION 3.26.4) +include(../set_cuda_architecture.cmake) + +# initialize cuda architecture +rapids_cuda_init_architectures(nested_types) +rapids_cuda_set_architectures(RAPIDS) + project( nested_types VERSION 0.0.1 @@ -14,3 +20,6 @@ include(../fetch_dependencies.cmake) add_executable(deduplication deduplication.cpp) target_link_libraries(deduplication PRIVATE cudf::cudf) target_compile_features(deduplication PRIVATE cxx_std_17) + +install(TARGETS deduplication DESTINATION bin/examples/libcudf) +install(FILES ${CMAKE_CURRENT_LIST_DIR}/example.json DESTINATION bin/examples/libcudf) diff --git a/cpp/examples/parquet_io/CMakeLists.txt b/cpp/examples/parquet_io/CMakeLists.txt new file mode 100644 index 00000000000..d8e9205ffd4 --- /dev/null +++ b/cpp/examples/parquet_io/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +cmake_minimum_required(VERSION 3.26.4) + +include(../set_cuda_architecture.cmake) + +# initialize cuda architecture +rapids_cuda_init_architectures(parquet_io) +rapids_cuda_set_architectures(RAPIDS) + +project( + parquet_io + VERSION 0.0.1 + LANGUAGES CXX CUDA +) + +include(../fetch_dependencies.cmake) + +# Configure your project here +add_executable(parquet_io parquet_io.cpp) +target_link_libraries(parquet_io PRIVATE cudf::cudf) +target_compile_features(parquet_io PRIVATE cxx_std_17) + +install(TARGETS parquet_io DESTINATION bin/examples/libcudf) +install(FILES ${CMAKE_CURRENT_LIST_DIR}/example.parquet DESTINATION bin/examples/libcudf) diff --git a/cpp/examples/parquet_io/example.parquet b/cpp/examples/parquet_io/example.parquet new file mode 100644 index 00000000000..f0fb5319cb0 Binary files /dev/null and b/cpp/examples/parquet_io/example.parquet differ diff --git a/cpp/examples/parquet_io/parquet_io.cpp b/cpp/examples/parquet_io/parquet_io.cpp new file mode 100644 index 00000000000..8be17db3781 --- /dev/null +++ b/cpp/examples/parquet_io/parquet_io.cpp @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "parquet_io.hpp" + +/** + * @file parquet_io.cpp + * @brief Demonstrates usage of the libcudf APIs to read and write + * parquet file format with different encodings and compression types + * + * The following encoding and compression ztypes are demonstrated: + * Encoding Types: DEFAULT, DICTIONARY, PLAIN, DELTA_BINARY_PACKED, + * DELTA_LENGTH_BYTE_ARRAY, DELTA_BYTE_ARRAY + * + * Compression Types: NONE, AUTO, SNAPPY, LZ4, ZSTD + * + */ + +/** + * @brief Read parquet input from file + * + * @param filepath path to input parquet file + * @return cudf::io::table_with_metadata + */ +cudf::io::table_with_metadata read_parquet(std::string filepath) +{ + auto source_info = cudf::io::source_info(filepath); + auto builder = cudf::io::parquet_reader_options::builder(source_info); + auto options = builder.build(); + return cudf::io::read_parquet(options); +} + +/** + * @brief Write parquet output to file + * + * @param input table to write + * @param metadata metadata of input table read by parquet reader + * @param filepath path to output parquet file + * @param stats_level optional page size stats level + */ +void write_parquet(cudf::table_view input, + cudf::io::table_metadata metadata, + std::string filepath, + cudf::io::column_encoding encoding, + cudf::io::compression_type compression, + std::optional stats_level) +{ + // write the data for inspection + auto sink_info = cudf::io::sink_info(filepath); + auto builder = cudf::io::parquet_writer_options::builder(sink_info, input); + auto table_metadata = cudf::io::table_input_metadata{metadata}; + + std::for_each(table_metadata.column_metadata.begin(), + table_metadata.column_metadata.end(), + [=](auto& col_meta) { col_meta.set_encoding(encoding); }); + + builder.metadata(table_metadata); + auto options = builder.build(); + options.set_compression(compression); + // Either use the input stats level or don't write stats + options.set_stats_level(stats_level.value_or(cudf::io::statistics_freq::STATISTICS_NONE)); + + // write parquet data + cudf::io::write_parquet(options); +} + +/** + * @brief Main for nested_types examples + * + * Command line parameters: + * 1. parquet input file name/path (default: "example.parquet") + * 2. parquet output file name/path (default: "output.parquet") + * 3. encoding type for columns (default: "DELTA_BINARY_PACKED") + * 4. compression type (default: "ZSTD") + * 5. optional: use page size stats metadata (default: "NO") + * + * Example invocation from directory `cudf/cpp/examples/parquet_io`: + * ./build/parquet_io example.parquet output.parquet DELTA_BINARY_PACKED ZSTD + * + */ +int main(int argc, char const** argv) +{ + std::string input_filepath; + std::string output_filepath; + cudf::io::column_encoding encoding; + cudf::io::compression_type compression; + std::optional page_stats; + + switch (argc) { + case 1: + input_filepath = "example.parquet"; + output_filepath = "output.parquet"; + encoding = get_encoding_type("DELTA_BINARY_PACKED"); + compression = get_compression_type("ZSTD"); + break; + case 6: page_stats = get_page_size_stats(argv[5]); [[fallthrough]]; + case 5: + input_filepath = argv[1]; + output_filepath = argv[2]; + encoding = get_encoding_type(argv[3]); + compression = get_compression_type(argv[4]); + break; + default: + throw std::runtime_error( + "Either provide all command-line arguments, or none to use defaults\n"); + } + + // Create and use a memory pool + bool is_pool_used = true; + auto resource = create_memory_resource(is_pool_used); + rmm::mr::set_current_device_resource(resource.get()); + + // Read input parquet file + // We do not want to time the initial read time as it may include + // time for nvcomp, cufile loading and RMM growth + std::cout << std::endl << "Reading " << input_filepath << "..." << std::endl; + std::cout << "Note: Not timing the initial parquet read as it may include\n" + "times for nvcomp, cufile loading and RMM growth." + << std::endl + << std::endl; + auto [input, metadata] = read_parquet(input_filepath); + + // Status string to indicate if page stats are set to be written or not + auto page_stat_string = (page_stats.has_value()) ? "page stats" : "no page stats"; + // Write parquet file with the specified encoding and compression + std::cout << "Writing " << output_filepath << " with encoding, compression and " + << page_stat_string << ".." << std::endl; + + // `timer` is automatically started here + Timer timer; + write_parquet(input->view(), metadata, output_filepath, encoding, compression, page_stats); + timer.print_elapsed_millis(); + + // Read the parquet file written with encoding and compression + std::cout << "Reading " << output_filepath << "..." << std::endl; + + // Reset the timer + timer.reset(); + auto [transcoded_input, transcoded_metadata] = read_parquet(output_filepath); + timer.print_elapsed_millis(); + + // Check for validity + try { + // Left anti-join the original and transcoded tables + // identical tables should not throw an exception and + // return an empty indices vector + auto const indices = cudf::left_anti_join( + input->view(), transcoded_input->view(), cudf::null_equality::EQUAL, resource.get()); + + // No exception thrown, check indices + auto const valid = indices->size() == 0; + std::cout << "Transcoding valid: " << std::boolalpha << valid << std::endl; + } catch (std::exception& e) { + std::cerr << e.what() << std::endl << std::endl; + std::cout << "Transcoding valid: false" << std::endl; + } + + return 0; +} diff --git a/cpp/examples/parquet_io/parquet_io.hpp b/cpp/examples/parquet_io/parquet_io.hpp new file mode 100644 index 00000000000..d2fc359a2fe --- /dev/null +++ b/cpp/examples/parquet_io/parquet_io.hpp @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/** + * @brief Create memory resource for libcudf functions + * + * @param pool Whether to use a pool memory resource. + * @return Memory resource instance + */ +std::shared_ptr create_memory_resource(bool is_pool_used) +{ + auto cuda_mr = std::make_shared(); + if (is_pool_used) { + return rmm::mr::make_owning_wrapper( + cuda_mr, rmm::percent_of_free_device_memory(50)); + } + return cuda_mr; +} + +/** + * @brief Get encoding type from the keyword + * + * @param name encoding keyword name + * @return corresponding column encoding type + */ +[[nodiscard]] cudf::io::column_encoding get_encoding_type(std::string name) +{ + using encoding_type = cudf::io::column_encoding; + + static const std::unordered_map map = { + {"DEFAULT", encoding_type::USE_DEFAULT}, + {"DICTIONARY", encoding_type::DICTIONARY}, + {"PLAIN", encoding_type::PLAIN}, + {"DELTA_BINARY_PACKED", encoding_type::DELTA_BINARY_PACKED}, + {"DELTA_LENGTH_BYTE_ARRAY", encoding_type::DELTA_LENGTH_BYTE_ARRAY}, + {"DELTA_BYTE_ARRAY", encoding_type::DELTA_BYTE_ARRAY}, + }; + + std::transform(name.begin(), name.end(), name.begin(), ::toupper); + if (map.find(name) != map.end()) { return map.at(name); } + throw std::invalid_argument("FATAL: " + std::string(name) + + " is not a valid encoding type.\n\n" + "Available encoding types: DEFAULT, DICTIONARY, PLAIN,\n" + "DELTA_BINARY_PACKED, DELTA_LENGTH_BYTE_ARRAY,\n" + "DELTA_BYTE_ARRAY\n" + "\n" + "Exiting...\n"); +} + +/** + * @brief Get compression type from the keyword + * + * @param name compression keyword name + * @return corresponding compression type + */ +[[nodiscard]] cudf::io::compression_type get_compression_type(std::string name) +{ + using compression_type = cudf::io::compression_type; + + static const std::unordered_map map = { + {"NONE", compression_type::NONE}, + {"AUTO", compression_type::AUTO}, + {"SNAPPY", compression_type::SNAPPY}, + {"LZ4", compression_type::LZ4}, + {"ZSTD", compression_type::ZSTD}}; + + std::transform(name.begin(), name.end(), name.begin(), ::toupper); + if (map.find(name) != map.end()) { return map.at(name); } + throw std::invalid_argument("FATAL: " + std::string(name) + + " is not a valid compression type.\n\n" + "Available compression_type types: NONE, AUTO, SNAPPY,\n" + "LZ4, ZSTD\n" + "\n" + "Exiting...\n"); +} + +/** + * @brief Get the optional page size stat frequency from they keyword + * + * @param use_stats keyword affirmation string such as: Y, T, YES, TRUE, ON + * @return optional page statistics frequency set to full (STATISTICS_COLUMN) + */ +[[nodiscard]] std::optional get_page_size_stats(std::string use_stats) +{ + std::transform(use_stats.begin(), use_stats.end(), use_stats.begin(), ::toupper); + + // Check if the input string matches to any of the following + if (not use_stats.compare("ON") or not use_stats.compare("TRUE") or + not use_stats.compare("YES") or not use_stats.compare("Y") or not use_stats.compare("T")) { + // Full column and offset indices - STATISTICS_COLUMN + return std::make_optional(cudf::io::statistics_freq::STATISTICS_COLUMN); + } + + return std::nullopt; +} + +/** + * @brief Light-weight timer for parquet reader and writer instrumentation + * + * Timer object constructed from std::chrono, instrumenting at microseconds + * precision. Can display elapsed durations at milli and micro second + * scales. Timer starts at object construction. + */ +class Timer { + public: + using micros = std::chrono::microseconds; + using millis = std::chrono::milliseconds; + + Timer() { reset(); } + void reset() { start_time = std::chrono::high_resolution_clock::now(); } + auto elapsed() { return (std::chrono::high_resolution_clock::now() - start_time); } + void print_elapsed_micros() + { + std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() + << "us\n\n"; + } + void print_elapsed_millis() + { + std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() + << "ms\n\n"; + } + + private: + using time_point_t = std::chrono::time_point; + time_point_t start_time; +}; diff --git a/cpp/examples/set_cuda_architecture.cmake b/cpp/examples/set_cuda_architecture.cmake new file mode 100644 index 00000000000..bed6cd2f357 --- /dev/null +++ b/cpp/examples/set_cuda_architecture.cmake @@ -0,0 +1,28 @@ +# ============================================================================= +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +include(${CMAKE_CURRENT_LIST_DIR}/versions.cmake) + +if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/libcudf_cpp_examples_RAPIDS.cmake) + file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/${CUDF_TAG}/RAPIDS.cmake + ${CMAKE_CURRENT_BINARY_DIR}/libcudf_cpp_examples_RAPIDS.cmake + ) +endif() +include(${CMAKE_CURRENT_BINARY_DIR}/libcudf_cpp_examples_RAPIDS.cmake) + +include(rapids-cmake) +include(rapids-cpm) +include(rapids-cuda) +include(rapids-export) +include(rapids-find) diff --git a/cpp/examples/strings/CMakeLists.txt b/cpp/examples/strings/CMakeLists.txt index c90fa9dde16..a5654870544 100644 --- a/cpp/examples/strings/CMakeLists.txt +++ b/cpp/examples/strings/CMakeLists.txt @@ -1,7 +1,13 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. cmake_minimum_required(VERSION 3.26.4) +include(../set_cuda_architecture.cmake) + +# initialize cuda architecture +rapids_cuda_init_architectures(strings_examples) +rapids_cuda_set_architectures(RAPIDS) + project( strings_examples VERSION 0.0.1 @@ -12,22 +18,27 @@ include(../fetch_dependencies.cmake) list(APPEND CUDF_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr) -# add_executable(libcudf_apis libcudf_apis.cpp) target_compile_features(libcudf_apis PRIVATE cxx_std_17) target_link_libraries(libcudf_apis PRIVATE cudf::cudf nvToolsExt) +install(TARGETS libcudf_apis DESTINATION bin/examples/libcudf) add_executable(custom_with_malloc custom_with_malloc.cu) target_compile_features(custom_with_malloc PRIVATE cxx_std_17) target_compile_options(custom_with_malloc PRIVATE "$<$:${CUDF_CUDA_FLAGS}>") target_link_libraries(custom_with_malloc PRIVATE cudf::cudf nvToolsExt) +install(TARGETS custom_with_malloc DESTINATION bin/examples/libcudf) add_executable(custom_prealloc custom_prealloc.cu) target_compile_features(custom_prealloc PRIVATE cxx_std_17) target_compile_options(custom_prealloc PRIVATE "$<$:${CUDF_CUDA_FLAGS}>") target_link_libraries(custom_prealloc PRIVATE cudf::cudf nvToolsExt) +install(TARGETS custom_prealloc DESTINATION bin/examples/libcudf) add_executable(custom_optimized custom_optimized.cu) target_compile_features(custom_optimized PRIVATE cxx_std_17) target_compile_options(custom_optimized PRIVATE "$<$:${CUDF_CUDA_FLAGS}>") target_link_libraries(custom_optimized PRIVATE cudf::cudf nvToolsExt) +install(TARGETS custom_optimized DESTINATION bin/examples/libcudf) + +install(FILES ${CMAKE_CURRENT_LIST_DIR}/names.csv DESTINATION bin/examples/libcudf) diff --git a/cpp/examples/strings/common.hpp b/cpp/examples/strings/common.hpp index 0dbe6fe2b7b..65a9c100c7c 100644 --- a/cpp/examples/strings/common.hpp +++ b/cpp/examples/strings/common.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -110,7 +111,8 @@ int main(int argc, char const** argv) std::chrono::duration elapsed = std::chrono::steady_clock::now() - st; std::cout << "Wall time: " << elapsed.count() << " seconds\n"; - std::cout << "Output size " << result->view().child(1).size() << " bytes\n"; + auto const scv = cudf::strings_column_view(result->view()); + std::cout << "Output size " << scv.chars_size(rmm::cuda_stream_default) << " bytes\n"; return 0; } diff --git a/cpp/examples/strings/custom_optimized.cu b/cpp/examples/strings/custom_optimized.cu index cefa3346150..62ca19a5ca9 100644 --- a/cpp/examples/strings/custom_optimized.cu +++ b/cpp/examples/strings/custom_optimized.cu @@ -153,8 +153,12 @@ std::unique_ptr redact_strings(cudf::column_view const& names, redact_kernel<<>>( *d_names, *d_visibilities, offsets.data(), chars.data()); - // create column from offsets and chars vectors (no copy is performed) - auto result = cudf::make_strings_column(names.size(), std::move(offsets), chars.release(), {}, 0); + // create column from offsets vector (move only) + auto offsets_column = std::make_unique(std::move(offsets), rmm::device_buffer{}, 0); + + // create column for chars vector (no copy is performed) + auto result = cudf::make_strings_column( + names.size(), std::move(offsets_column), chars.release(), 0, rmm::device_buffer{}); // wait for all of the above to finish stream.synchronize(); diff --git a/cpp/examples/versions.cmake b/cpp/examples/versions.cmake new file mode 100644 index 00000000000..dff66b4d7d8 --- /dev/null +++ b/cpp/examples/versions.cmake @@ -0,0 +1,15 @@ +# ============================================================================= +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +set(CUDF_TAG branch-24.06) diff --git a/cpp/include/cudf/ast/detail/expression_parser.hpp b/cpp/include/cudf/ast/detail/expression_parser.hpp index a36a831a7aa..38f7ac5291f 100644 --- a/cpp/include/cudf/ast/detail/expression_parser.hpp +++ b/cpp/include/cudf/ast/detail/expression_parser.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,8 @@ #include #include +#include + #include #include @@ -118,7 +120,7 @@ class expression_parser { std::optional> right, bool has_nulls, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) : _left{left}, _right{right}, _expression_count{0}, @@ -139,7 +141,7 @@ class expression_parser { cudf::table_view const& table, bool has_nulls, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) : expression_parser(expr, table, {}, has_nulls, stream, mr) { } @@ -240,7 +242,7 @@ class expression_parser { data_pointers.push_back(v.data()); } - void move_to_device(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) + void move_to_device(rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { std::vector sizes; std::vector data_pointers; diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp index 9df4b4eb00f..5e41a871f32 100644 --- a/cpp/include/cudf/binaryop.hpp +++ b/cpp/include/cudf/binaryop.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include #include @@ -76,6 +77,8 @@ enum class binary_operator : int32_t { GREATER_EQUAL, ///< operator >= NULL_EQUALS, ///< Returns true when both operands are null; false when one is null; the ///< result of equality when both are non-null + NULL_NOT_EQUALS, ///< Returns false when both operands are null; true when one is null; the + ///< result of inequality when both are non-null NULL_MAX, ///< Returns max of operands when both are non-null; returns the non-null ///< operand when one is null; or invalid when both are null NULL_MIN, ///< Returns min of operands when both are non-null; returns the non-null @@ -116,8 +119,8 @@ std::unique_ptr binary_operation( column_view const& rhs, binary_operator op, data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Performs a binary operation between a column and a scalar. @@ -147,8 +150,8 @@ std::unique_ptr binary_operation( scalar const& rhs, binary_operator op, data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Performs a binary operation between two columns. @@ -177,8 +180,8 @@ std::unique_ptr binary_operation( column_view const& rhs, binary_operator op, data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Performs a binary operation between two columns using a @@ -208,8 +211,8 @@ std::unique_ptr binary_operation( column_view const& rhs, std::string const& ptx, data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Computes the `scale` for a `fixed_point` number based on given binary operator `op` @@ -249,8 +252,8 @@ namespace binops { std::pair scalar_col_valid_mask_and( column_view const& col, scalar const& s, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); namespace compiled { namespace detail { diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp index 023e58c5300..22db25bdc83 100644 --- a/cpp/include/cudf/column/column.hpp +++ b/cpp/include/cudf/column/column.hpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -63,8 +64,8 @@ class column { * @param mr Device memory resource to use for all device memory allocations */ column(column const& other, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Move the contents from `other` to create a new column. @@ -141,8 +142,8 @@ class column { * @param mr Device memory resource to use for all device memory allocations */ explicit column(column_view view, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the column's logical element type diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index 96322159f0f..dc4700576e6 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -23,6 +23,7 @@ #include #include +#include #include @@ -75,9 +76,9 @@ std::unique_ptr make_empty_column(type_id id); std::unique_ptr make_numeric_column( data_type type, size_type size, - mask_state state = mask_state::UNALLOCATED, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + mask_state state = mask_state::UNALLOCATED, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct column with sufficient uninitialized storage to hold `size` elements of the @@ -102,8 +103,8 @@ std::unique_ptr make_numeric_column( size_type size, B&& null_mask, size_type null_count, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(is_numeric(type), "Invalid, non-numeric type."); return std::make_unique(type, @@ -133,9 +134,9 @@ std::unique_ptr make_numeric_column( std::unique_ptr make_fixed_point_column( data_type type, size_type size, - mask_state state = mask_state::UNALLOCATED, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + mask_state state = mask_state::UNALLOCATED, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct column with sufficient uninitialized storage to hold `size` elements of the @@ -159,8 +160,8 @@ std::unique_ptr make_fixed_point_column( size_type size, B&& null_mask, size_type null_count, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(is_fixed_point(type), "Invalid, non-fixed_point type."); return std::make_unique(type, @@ -191,9 +192,9 @@ std::unique_ptr make_fixed_point_column( std::unique_ptr make_timestamp_column( data_type type, size_type size, - mask_state state = mask_state::UNALLOCATED, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + mask_state state = mask_state::UNALLOCATED, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct column with sufficient uninitialized storage to hold `size` elements of the @@ -218,8 +219,8 @@ std::unique_ptr make_timestamp_column( size_type size, B&& null_mask, size_type null_count, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(is_timestamp(type), "Invalid, non-timestamp type."); return std::make_unique(type, @@ -250,9 +251,9 @@ std::unique_ptr make_timestamp_column( std::unique_ptr make_duration_column( data_type type, size_type size, - mask_state state = mask_state::UNALLOCATED, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + mask_state state = mask_state::UNALLOCATED, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct column with sufficient uninitialized storage to hold `size` elements of the @@ -277,8 +278,8 @@ std::unique_ptr make_duration_column( size_type size, B&& null_mask, size_type null_count, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(is_duration(type), "Invalid, non-duration type."); return std::make_unique(type, @@ -309,9 +310,9 @@ std::unique_ptr make_duration_column( std::unique_ptr make_fixed_width_column( data_type type, size_type size, - mask_state state = mask_state::UNALLOCATED, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + mask_state state = mask_state::UNALLOCATED, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct column with sufficient uninitialized storage to hold `size` elements of the @@ -336,8 +337,8 @@ std::unique_ptr make_fixed_width_column( size_type size, B&& null_mask, size_type null_count, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(is_fixed_width(type), "Invalid, non-fixed-width type."); if (is_timestamp(type)) { @@ -375,8 +376,8 @@ std::unique_ptr make_fixed_width_column( */ std::unique_ptr make_strings_column( cudf::device_span const> strings, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a STRING type column given a device span of string_view. @@ -407,8 +408,8 @@ std::unique_ptr make_strings_column( std::unique_ptr make_strings_column( cudf::device_span string_views, string_view const null_placeholder, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a STRING type column given offsets column, chars columns, and null mask and null @@ -495,8 +496,8 @@ std::unique_ptr make_lists_column( std::unique_ptr child_column, size_type null_count, rmm::device_buffer&& null_mask, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a STRUCT column using specified child columns as members. @@ -526,8 +527,8 @@ std::unique_ptr make_structs_column( std::vector>&& child_columns, size_type null_count, rmm::device_buffer&& null_mask, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a column with size elements that are all equal to the given scalar. @@ -546,8 +547,8 @@ std::unique_ptr make_structs_column( std::unique_ptr make_column_from_scalar( scalar const& s, size_type size, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a dictionary column with size elements that are all equal to the given scalar. @@ -566,8 +567,8 @@ std::unique_ptr make_column_from_scalar( std::unique_ptr make_dictionary_from_scalar( scalar const& s, size_type size, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/concatenate.hpp b/cpp/include/cudf/concatenate.hpp index 9ee55275a5e..e7b55a2e6d0 100644 --- a/cpp/include/cudf/concatenate.hpp +++ b/cpp/include/cudf/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include @@ -46,8 +47,8 @@ namespace cudf { */ rmm::device_buffer concatenate_masks( host_span views, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Concatenates multiple columns into a single column @@ -63,8 +64,8 @@ rmm::device_buffer concatenate_masks( */ std::unique_ptr concatenate( host_span columns_to_concat, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Columns of `tables_to_concat` are concatenated vertically to return a @@ -92,8 +93,8 @@ std::unique_ptr concatenate( */ std::unique_ptr
concatenate( host_span tables_to_concat, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/contiguous_split.hpp b/cpp/include/cudf/contiguous_split.hpp index 1bbbf73bd5d..0d4f20d1ef2 100644 --- a/cpp/include/cudf/contiguous_split.hpp +++ b/cpp/include/cudf/contiguous_split.hpp @@ -19,6 +19,8 @@ #include #include +#include + #include #include @@ -119,7 +121,7 @@ struct packed_table { std::vector contiguous_split( cudf::table_view const& input, std::vector const& splits, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); namespace detail { struct contiguous_split_state; @@ -196,7 +198,7 @@ class chunked_pack { explicit chunked_pack( cudf::table_view const& input, std::size_t user_buffer_size, - rmm::mr::device_memory_resource* temp_mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref temp_mr = rmm::mr::get_current_device_resource()); /** * @brief Destructor that will be implemented as default. Declared with definition here because @@ -261,7 +263,7 @@ class chunked_pack { [[nodiscard]] static std::unique_ptr create( cudf::table_view const& input, std::size_t user_buffer_size, - rmm::mr::device_memory_resource* temp_mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref temp_mr = rmm::mr::get_current_device_resource()); private: // internal state of contiguous split @@ -281,7 +283,7 @@ class chunked_pack { * and device memory respectively */ packed_columns pack(cudf::table_view const& input, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Produce the metadata used for packing a table stored in a contiguous buffer. diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index b2cde82fada..b17cafb05ab 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -84,9 +85,9 @@ enum class out_of_bounds_policy : bool { std::unique_ptr
gather( table_view const& source_table, column_view const& gather_map, - out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Reverses the rows within a table. @@ -105,8 +106,8 @@ std::unique_ptr
gather( */ std::unique_ptr
reverse( table_view const& source_table, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Reverses the elements of a column @@ -125,8 +126,8 @@ std::unique_ptr
reverse( */ std::unique_ptr reverse( column_view const& source_column, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Scatters the rows of the source table into a copy of the target table @@ -174,8 +175,8 @@ std::unique_ptr
scatter( table_view const& source, column_view const& scatter_map, table_view const& target, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Scatters a row of scalar values into a copy of the target table @@ -217,8 +218,8 @@ std::unique_ptr
scatter( std::vector> const& source, column_view const& indices, table_view const& target, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Indicates when to allocate a mask, based on an existing mask. @@ -253,6 +254,8 @@ std::unique_ptr empty_like(scalar const& input); * If the `mask_alloc` allocates a validity mask that mask is also uninitialized * and the validity bits and the null count should be set by the caller. * + * @throws cudf::data_type_error if input type is not of fixed width. + * * @param input Immutable view of input column to emulate * @param mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN * @param mr Device memory resource used to allocate the returned column's device memory @@ -262,9 +265,9 @@ std::unique_ptr empty_like(scalar const& input); */ std::unique_ptr allocate_like( column_view const& input, - mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Creates an uninitialized new column of the specified size and same type as the `input`. @@ -285,9 +288,9 @@ std::unique_ptr allocate_like( std::unique_ptr allocate_like( column_view const& input, size_type size, - mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Creates a table of empty columns with the same types as the `input_table` @@ -360,6 +363,7 @@ void copy_range_in_place(column_view const& source, * * @throws std::out_of_range for any invalid range. * @throws cudf::data_type_error if @p target and @p source have different types. + * @throws cudf::data_type_error if the data type is not fixed width, string, or dictionary * * @param source The column to copy from inside the range * @param target The column to copy from outside the range @@ -377,8 +381,8 @@ std::unique_ptr copy_range( size_type source_begin, size_type source_end, size_type target_begin, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Creates a new column by shifting all values by an offset. @@ -421,8 +425,8 @@ std::unique_ptr shift( column_view const& input, size_type offset, scalar const& fill_value, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Slices a `column_view` into a set of `column_view`s according to a set of indices. @@ -624,8 +628,8 @@ std::unique_ptr copy_if_else( column_view const& lhs, column_view const& rhs, column_view const& boolean_mask, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a new column, where each element is selected from either @p lhs or @@ -650,8 +654,8 @@ std::unique_ptr copy_if_else( scalar const& lhs, column_view const& rhs, column_view const& boolean_mask, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a new column, where each element is selected from either @p lhs or @@ -676,8 +680,8 @@ std::unique_ptr copy_if_else( column_view const& lhs, scalar const& rhs, column_view const& boolean_mask, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a new column, where each element is selected from either @p lhs or @@ -700,8 +704,8 @@ std::unique_ptr copy_if_else( scalar const& lhs, scalar const& rhs, column_view const& boolean_mask, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Scatters rows from the input table to rows of the output corresponding @@ -744,8 +748,8 @@ std::unique_ptr
boolean_mask_scatter( table_view const& input, table_view const& target, column_view const& boolean_mask, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Scatters scalar values to rows of the output corresponding @@ -783,8 +787,8 @@ std::unique_ptr
boolean_mask_scatter( std::vector> const& input, table_view const& target, column_view const& boolean_mask, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Get the element at specified index from a column @@ -803,8 +807,8 @@ std::unique_ptr
boolean_mask_scatter( std::unique_ptr get_element( column_view const& input, size_type index, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Indicates whether a row can be sampled more than once. @@ -848,7 +852,7 @@ std::unique_ptr
sample( sample_with_replacement replacement = sample_with_replacement::FALSE, int64_t const seed = 0, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Checks if a column or its descendants have non-empty null rows @@ -964,8 +968,8 @@ bool may_have_nonempty_nulls(column_view const& input); */ std::unique_ptr purge_nonempty_nulls( column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ } // namespace cudf diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 44736ca0762..06b7d24f6cd 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include #include @@ -47,7 +48,7 @@ namespace datetime { */ std::unique_ptr extract_year( cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Extracts month from any datetime type and returns an int16_t @@ -61,7 +62,7 @@ std::unique_ptr extract_year( */ std::unique_ptr extract_month( cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Extracts day from any datetime type and returns an int16_t @@ -75,7 +76,7 @@ std::unique_ptr extract_month( */ std::unique_ptr extract_day( cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Extracts a weekday from any datetime type and returns an int16_t @@ -89,7 +90,7 @@ std::unique_ptr extract_day( */ std::unique_ptr extract_weekday( cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Extracts hour from any datetime type and returns an int16_t @@ -103,7 +104,7 @@ std::unique_ptr extract_weekday( */ std::unique_ptr extract_hour( cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Extracts minute from any datetime type and returns an int16_t @@ -117,7 +118,7 @@ std::unique_ptr extract_hour( */ std::unique_ptr extract_minute( cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Extracts second from any datetime type and returns an int16_t @@ -131,7 +132,7 @@ std::unique_ptr extract_minute( */ std::unique_ptr extract_second( cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Extracts millisecond fraction from any datetime type and returns an int16_t @@ -148,7 +149,7 @@ std::unique_ptr extract_second( */ std::unique_ptr extract_millisecond_fraction( cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Extracts microsecond fraction from any datetime type and returns an int16_t @@ -165,7 +166,7 @@ std::unique_ptr extract_millisecond_fraction( */ std::unique_ptr extract_microsecond_fraction( cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Extracts nanosecond fraction from any datetime type and returns an int16_t @@ -182,7 +183,7 @@ std::unique_ptr extract_microsecond_fraction( */ std::unique_ptr extract_nanosecond_fraction( cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group /** @@ -203,7 +204,7 @@ std::unique_ptr extract_nanosecond_fraction( */ std::unique_ptr last_day_of_month( cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Computes the day number since the start of the year from the datetime and @@ -217,7 +218,7 @@ std::unique_ptr last_day_of_month( */ std::unique_ptr day_of_year( cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Adds or subtracts a number of months from the datetime type and returns a @@ -252,7 +253,7 @@ std::unique_ptr day_of_year( std::unique_ptr add_calendrical_months( cudf::column_view const& timestamps, cudf::column_view const& months, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Adds or subtracts a number of months from the datetime type and returns a @@ -287,7 +288,7 @@ std::unique_ptr add_calendrical_months( std::unique_ptr add_calendrical_months( cudf::column_view const& timestamps, cudf::scalar const& months, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Check if the year of the given date is a leap year @@ -304,7 +305,7 @@ std::unique_ptr add_calendrical_months( */ std::unique_ptr is_leap_year( cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Extract the number of days in the month @@ -320,7 +321,7 @@ std::unique_ptr is_leap_year( */ std::unique_ptr days_in_month( cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the quarter of the date @@ -336,7 +337,7 @@ std::unique_ptr days_in_month( */ std::unique_ptr extract_quarter( cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Fixed frequencies supported by datetime rounding functions ceil, floor, round. @@ -365,7 +366,7 @@ enum class rounding_frequency : int32_t { std::unique_ptr ceil_datetimes( cudf::column_view const& column, rounding_frequency freq, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Round datetimes down to the nearest multiple of the given frequency. @@ -380,7 +381,7 @@ std::unique_ptr ceil_datetimes( std::unique_ptr floor_datetimes( cudf::column_view const& column, rounding_frequency freq, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Round datetimes to the nearest multiple of the given frequency. @@ -395,7 +396,7 @@ std::unique_ptr floor_datetimes( std::unique_ptr round_datetimes( cudf::column_view const& column, rounding_frequency freq, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/detail/binaryop.hpp b/cpp/include/cudf/detail/binaryop.hpp index e5609568d10..de1fde8bc96 100644 --- a/cpp/include/cudf/detail/binaryop.hpp +++ b/cpp/include/cudf/detail/binaryop.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { //! Inner interfaces and implementations @@ -26,7 +27,7 @@ namespace detail { /** * @copydoc cudf::binary_operation(column_view const&, column_view const&, - * std::string const&, data_type, rmm::mr::device_memory_resource *) + * std::string const&, data_type, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -35,11 +36,11 @@ std::unique_ptr binary_operation(column_view const& lhs, std::string const& ptx, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::binary_operation(scalar const&, column_view const&, binary_operator, - * data_type, rmm::mr::device_memory_resource *) + * data_type, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -48,11 +49,11 @@ std::unique_ptr binary_operation(scalar const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::binary_operation(column_view const&, scalar const&, binary_operator, - * data_type, rmm::mr::device_memory_resource *) + * data_type, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -61,11 +62,11 @@ std::unique_ptr binary_operation(column_view const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::binary_operation(column_view const&, column_view const&, - * binary_operator, data_type, rmm::mr::device_memory_resource *) + * binary_operator, data_type, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -74,6 +75,6 @@ std::unique_ptr binary_operation(column_view const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/calendrical_month_sequence.cuh b/cpp/include/cudf/detail/calendrical_month_sequence.cuh index 59fb6758973..a9cf54e29b8 100644 --- a/cpp/include/cudf/detail/calendrical_month_sequence.cuh +++ b/cpp/include/cudf/detail/calendrical_month_sequence.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -38,7 +39,7 @@ struct calendrical_month_sequence_functor { scalar const& input, size_type months, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // Return empty column if n = 0 if (n == 0) return cudf::make_empty_column(input.type()); diff --git a/cpp/include/cudf/detail/concatenate.hpp b/cpp/include/cudf/detail/concatenate.hpp index 442814bc4fd..3e039175542 100644 --- a/cpp/include/cudf/detail/concatenate.hpp +++ b/cpp/include/cudf/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include @@ -29,22 +30,22 @@ namespace cudf { //! Inner interfaces and implementations namespace detail { /** - * @copydoc cudf::concatenate(host_span,rmm::mr::device_memory_resource*) + * @copydoc cudf::concatenate(host_span,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr concatenate(host_span columns_to_concat, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** - * @copydoc cudf::concatenate(host_span,rmm::mr::device_memory_resource*) + * @copydoc cudf::concatenate(host_span,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr
concatenate(host_span tables_to_concat, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/concatenate_masks.hpp b/cpp/include/cudf/detail/concatenate_masks.hpp index e7086ea17a5..dd2fb471a7d 100644 --- a/cpp/include/cudf/detail/concatenate_masks.hpp +++ b/cpp/include/cudf/detail/concatenate_masks.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include #include +#include namespace cudf { //! Inner interfaces and implementations @@ -59,13 +60,13 @@ size_type concatenate_masks(host_span views, rmm::cuda_stream_view stream); /** - * @copydoc cudf::concatenate_masks(host_span, rmm::mr::device_memory_resource*) + * @copydoc cudf::concatenate_masks(host_span, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ rmm::device_buffer concatenate_masks(host_span views, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/contiguous_split.hpp b/cpp/include/cudf/detail/contiguous_split.hpp index d9a35470b7d..de00b61cdca 100644 --- a/cpp/include/cudf/detail/contiguous_split.hpp +++ b/cpp/include/cudf/detail/contiguous_split.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { namespace detail { @@ -33,7 +34,7 @@ namespace detail { std::vector contiguous_split(cudf::table_view const& input, std::vector const& splits, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::pack @@ -42,7 +43,7 @@ std::vector contiguous_split(cudf::table_view const& input, **/ packed_columns pack(cudf::table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); // opaque implementation of `metadata_builder` since it needs to use // `serialized_column`, which is only defined in pack.cpp diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp index 115822163c3..f7430eb090d 100644 --- a/cpp/include/cudf/detail/copy.hpp +++ b/cpp/include/cudf/detail/copy.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ #include #include +#include #include @@ -123,7 +124,7 @@ std::vector split(table_view const& input, /** * @copydoc cudf::shift(column_view const&,size_type,scalar const&, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -131,7 +132,7 @@ std::unique_ptr shift(column_view const& input, size_type offset, scalar const& fill_value, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Performs segmented shifts for specified values. @@ -171,11 +172,11 @@ std::unique_ptr segmented_shift(column_view const& segmented_values, size_type offset, scalar const& fill_value, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::allocate_like(column_view const&, size_type, mask_allocation_policy, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ @@ -183,11 +184,11 @@ std::unique_ptr allocate_like(column_view const& input, size_type size, mask_allocation_policy mask_alloc, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::copy_if_else( column_view const&, column_view const&, - * column_view const&, rmm::mr::device_memory_resource*) + * column_view const&, rmm::device_async_resource_ref) * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ @@ -195,11 +196,11 @@ std::unique_ptr copy_if_else(column_view const& lhs, column_view const& rhs, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::copy_if_else( scalar const&, column_view const&, - * column_view const&, rmm::mr::device_memory_resource*) + * column_view const&, rmm::device_async_resource_ref) * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ @@ -207,11 +208,11 @@ std::unique_ptr copy_if_else(scalar const& lhs, column_view const& rhs, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::copy_if_else( column_view const&, scalar const&, - * column_view const&, rmm::mr::device_memory_resource*) + * column_view const&, rmm::device_async_resource_ref) * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ @@ -219,11 +220,11 @@ std::unique_ptr copy_if_else(column_view const& lhs, scalar const& rhs, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::copy_if_else( scalar const&, scalar const&, - * column_view const&, rmm::mr::device_memory_resource*) + * column_view const&, rmm::device_async_resource_ref) * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ @@ -231,7 +232,7 @@ std::unique_ptr copy_if_else(scalar const& lhs, scalar const& rhs, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::sample @@ -243,7 +244,7 @@ std::unique_ptr
sample(table_view const& input, sample_with_replacement replacement, int64_t const seed, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::get_element @@ -253,7 +254,7 @@ std::unique_ptr
sample(table_view const& input, std::unique_ptr get_element(column_view const& input, size_type index, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::has_nonempty_nulls @@ -276,7 +277,7 @@ bool may_have_nonempty_nulls(column_view const& input, rmm::cuda_stream_view str */ std::unique_ptr purge_nonempty_nulls(column_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh index 3af050a5da6..c98057d077a 100644 --- a/cpp/include/cudf/detail/copy_if.cuh +++ b/cpp/include/cudf/detail/copy_if.cuh @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -239,7 +240,7 @@ struct scatter_gather_functor { Filter filter, cudf::size_type per_thread, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto output_column = cudf::detail::allocate_like( input, output_size, cudf::mask_allocation_policy::RETAIN, stream, mr); @@ -286,7 +287,7 @@ struct scatter_gather_functor { Filter filter, cudf::size_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { rmm::device_uvector indices(output_size, stream); @@ -325,7 +326,7 @@ template std::unique_ptr
copy_if(table_view const& input, Filter filter, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); diff --git a/cpp/include/cudf/detail/copy_if_else.cuh b/cpp/include/cudf/detail/copy_if_else.cuh index 6162fa5ecf1..8418e279ce7 100644 --- a/cpp/include/cudf/detail/copy_if_else.cuh +++ b/cpp/include/cudf/detail/copy_if_else.cuh @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -44,29 +45,30 @@ __launch_bounds__(block_size) CUDF_KERNEL mutable_column_device_view out, size_type* __restrict__ const valid_count) { - size_type const tid = threadIdx.x + blockIdx.x * block_size; - int const warp_id = tid / warp_size; - size_type const warps_per_grid = gridDim.x * block_size / warp_size; + auto tidx = cudf::detail::grid_1d::global_thread_id(); + auto const stride = cudf::detail::grid_1d::grid_stride(); + int const warp_id = tidx / cudf::detail::warp_size; + size_type const warps_per_grid = gridDim.x * block_size / cudf::detail::warp_size; // begin/end indices for the column data - size_type begin = 0; - size_type end = out.size(); + size_type const begin = 0; + size_type const end = out.size(); // warp indices. since 1 warp == 32 threads == sizeof(bitmask_type) * 8, // each warp will process one (32 bit) of the validity mask via // __ballot_sync() - size_type warp_begin = cudf::word_index(begin); - size_type warp_end = cudf::word_index(end - 1); + size_type const warp_begin = cudf::word_index(begin); + size_type const warp_end = cudf::word_index(end - 1); // lane id within the current warp constexpr size_type leader_lane{0}; - int const lane_id = threadIdx.x % warp_size; + int const lane_id = threadIdx.x % cudf::detail::warp_size; size_type warp_valid_count{0}; // current warp. size_type warp_cur = warp_begin + warp_id; - size_type index = tid; while (warp_cur <= warp_end) { + auto const index = static_cast(tidx); auto const opt_value = (index < end) ? (filter(index) ? lhs[index] : rhs[index]) : thrust::nullopt; if (opt_value) { out.element(index) = static_cast(*opt_value); } @@ -84,7 +86,7 @@ __launch_bounds__(block_size) CUDF_KERNEL // next grid warp_cur += warps_per_grid; - index += block_size * gridDim.x; + tidx += stride; } if (has_nulls) { @@ -152,13 +154,13 @@ std::unique_ptr copy_if_else(bool nullable, FilterFn filter, cudf::data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // This is the type of the thrust::optional element in the passed iterators using Element = typename thrust::iterator_traits::value_type::value_type; size_type size = std::distance(lhs_begin, lhs_end); - size_type num_els = cudf::util::round_up_safe(size, warp_size); + size_type num_els = cudf::util::round_up_safe(size, cudf::detail::warp_size); constexpr int block_size = 256; cudf::detail::grid_1d grid{num_els, block_size, 1}; diff --git a/cpp/include/cudf/detail/copy_range.cuh b/cpp/include/cudf/detail/copy_range.cuh index 9f8b0f8b619..1b3b2056c6c 100644 --- a/cpp/include/cudf/detail/copy_range.cuh +++ b/cpp/include/cudf/detail/copy_range.cuh @@ -27,6 +27,7 @@ #include #include +#include #include #include @@ -203,7 +204,7 @@ std::unique_ptr copy_range(column_view const& source, size_type source_end, size_type target_begin, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp index c5160958165..a93c06d4371 100644 --- a/cpp/include/cudf/detail/datetime.hpp +++ b/cpp/include/cudf/detail/datetime.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,156 +19,158 @@ #include #include +#include + #include namespace cudf { namespace datetime { namespace detail { /** - * @copydoc cudf::extract_year(cudf::column_view const&, rmm::mr::device_memory_resource *) + * @copydoc cudf::extract_year(cudf::column_view const&, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_year(cudf::column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** - * @copydoc cudf::extract_month(cudf::column_view const&, rmm::mr::device_memory_resource *) + * @copydoc cudf::extract_month(cudf::column_view const&, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_month(cudf::column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** - * @copydoc cudf::extract_day(cudf::column_view const&, rmm::mr::device_memory_resource *) + * @copydoc cudf::extract_day(cudf::column_view const&, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_day(cudf::column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** - * @copydoc cudf::extract_weekday(cudf::column_view const&, rmm::mr::device_memory_resource *) + * @copydoc cudf::extract_weekday(cudf::column_view const&, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_weekday(cudf::column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** - * @copydoc cudf::extract_hour(cudf::column_view const&, rmm::mr::device_memory_resource *) + * @copydoc cudf::extract_hour(cudf::column_view const&, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_hour(cudf::column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** - * @copydoc cudf::extract_minute(cudf::column_view const&, rmm::mr::device_memory_resource *) + * @copydoc cudf::extract_minute(cudf::column_view const&, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_minute(cudf::column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** - * @copydoc cudf::extract_second(cudf::column_view const&, rmm::mr::device_memory_resource *) + * @copydoc cudf::extract_second(cudf::column_view const&, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_second(cudf::column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::extract_millisecond_fraction(cudf::column_view const&, - * rmm::mr::device_memory_resource *) + * rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_millisecond_fraction(cudf::column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::extract_microsecond_fraction(cudf::column_view const&, - * rmm::mr::device_memory_resource *) + * rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_microsecond_fraction(cudf::column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::extract_nanosecond_fraction(cudf::column_view const&, - * rmm::mr::device_memory_resource *) + * rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_nanosecond_fraction(cudf::column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** - * @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::mr::device_memory_resource *) + * @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr last_day_of_month(cudf::column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** - * @copydoc cudf::day_of_year(cudf::column_view const&, rmm::mr::device_memory_resource *) + * @copydoc cudf::day_of_year(cudf::column_view const&, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr day_of_year(cudf::column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::add_calendrical_months(cudf::column_view const&, cudf::column_view const&, - * rmm::mr::device_memory_resource *) + * rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr add_calendrical_months(cudf::column_view const& timestamps, cudf::column_view const& months, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::add_calendrical_months(cudf::column_view const&, cudf::scalar const&, - * rmm::mr::device_memory_resource *) + * rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr add_calendrical_months(cudf::column_view const& timestamps, cudf::scalar const& months, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** - * @copydoc cudf::is_leap_year(cudf::column_view const&, rmm::mr::device_memory_resource *) + * @copydoc cudf::is_leap_year(cudf::column_view const&, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr is_leap_year(cudf::column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); std::unique_ptr extract_quarter(cudf::column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace datetime diff --git a/cpp/include/cudf/detail/distinct_hash_join.cuh b/cpp/include/cudf/detail/distinct_hash_join.cuh index e874151ed36..de3d23e9470 100644 --- a/cpp/include/cudf/detail/distinct_hash_join.cuh +++ b/cpp/include/cudf/detail/distinct_hash_join.cuh @@ -21,6 +21,7 @@ #include #include +#include #include @@ -84,16 +85,10 @@ struct hasher_adapter { template struct distinct_hash_join { private: - /// Row equality type for nested columns - using nested_row_equal = cudf::experimental::row::equality::strong_index_comparator_adapter< - cudf::experimental::row::equality::device_row_comparator>; - /// Row equality type for flat columns - using flat_row_equal = cudf::experimental::row::equality::strong_index_comparator_adapter< - cudf::experimental::row::equality::device_row_comparator>; - /// Device row equal type - using d_equal_type = - std::conditional_t; + using d_equal_type = cudf::experimental::row::equality::strong_index_comparator_adapter< + cudf::experimental::row::equality::device_row_comparator>; using hasher = hasher_adapter>; using probing_scheme_type = cuco::linear_probing<1, hasher>; using cuco_storage_type = cuco::storage<1>; @@ -148,12 +143,12 @@ struct distinct_hash_join { */ std::pair>, std::unique_ptr>> - inner_join(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const; + inner_join(rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; /** * @copydoc cudf::distinct_hash_join::left_join */ std::unique_ptr> left_join( - rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const; + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; }; } // namespace cudf::detail diff --git a/cpp/include/cudf/detail/fill.hpp b/cpp/include/cudf/detail/fill.hpp index caaccfb4851..6996cda6974 100644 --- a/cpp/include/cudf/detail/fill.hpp +++ b/cpp/include/cudf/detail/fill.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include @@ -48,7 +49,7 @@ std::unique_ptr fill(column_view const& input, size_type end, scalar const& value, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 6492aa23e80..c9d350ce983 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -38,6 +38,7 @@ #include #include +#include #include #include @@ -174,7 +175,7 @@ struct column_gatherer { MapIterator gather_map_end, bool nullify_out_of_bounds, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { column_gatherer_impl gatherer{}; @@ -214,7 +215,7 @@ struct column_gatherer_impl { MapItType gather_map_end, bool nullify_out_of_bounds, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (true == nullify_out_of_bounds) { return cudf::strings::detail::gather( @@ -334,7 +335,7 @@ struct column_gatherer_impl { MapItRoot gather_map_end, bool nullify_out_of_bounds, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { lists_column_view list(column); auto gather_map_size = std::distance(gather_map_begin, gather_map_end); @@ -397,7 +398,7 @@ struct column_gatherer_impl { MapItType gather_map_end, bool nullify_out_of_bounds, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { dictionary_column_view dictionary(source_column); auto output_count = std::distance(gather_map_begin, gather_map_end); @@ -448,7 +449,7 @@ struct column_gatherer_impl { MapItRoot gather_map_end, bool nullify_out_of_bounds, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto const gather_map_size = std::distance(gather_map_begin, gather_map_end); if (gather_map_size == 0) { return empty_like(column); } @@ -554,7 +555,7 @@ void gather_bitmask(table_view const& source, std::vector>& target, gather_bitmask_op op, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (target.empty()) { return; } @@ -652,7 +653,7 @@ std::unique_ptr
gather(table_view const& source_table, MapIterator gather_map_end, out_of_bounds_policy bounds_policy, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { std::vector> destination_columns; diff --git a/cpp/include/cudf/detail/gather.hpp b/cpp/include/cudf/detail/gather.hpp index 034eb6c1282..36824f56895 100644 --- a/cpp/include/cudf/detail/gather.hpp +++ b/cpp/include/cudf/detail/gather.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include +#include #include @@ -66,12 +67,12 @@ std::unique_ptr
gather(table_view const& source_table, out_of_bounds_policy bounds_policy, negative_index_policy neg_indices, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::detail::gather(table_view const&,column_view const&,table_view * const&,cudf::out_of_bounds_policy,cudf::detail::negative_index_policy,rmm::cuda_stream_view, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * * @throws cudf::logic_error if `gather_map` span size is larger than max of `size_type`. */ @@ -80,7 +81,7 @@ std::unique_ptr
gather(table_view const& source_table, out_of_bounds_policy bounds_policy, negative_index_policy neg_indices, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/groupby.hpp b/cpp/include/cudf/detail/groupby.hpp index 0afa69be1a3..5a8c9b0a27f 100644 --- a/cpp/include/cudf/detail/groupby.hpp +++ b/cpp/include/cudf/detail/groupby.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -45,7 +46,7 @@ std::pair, std::vector> groupby( host_span requests, null_policy include_null_keys, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace hash } // namespace detail diff --git a/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp index e081a626c75..389c7952875 100644 --- a/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp +++ b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include +#include namespace cudf { namespace groupby { namespace detail { @@ -40,7 +41,7 @@ std::unique_ptr group_replace_nulls(cudf::column_view const& grouped_val device_span group_labels, cudf::replace_policy replace_policy, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace groupby diff --git a/cpp/include/cudf/detail/groupby/sort_helper.hpp b/cpp/include/cudf/detail/groupby/sort_helper.hpp index 7b386eb5f03..567efedb9b2 100644 --- a/cpp/include/cudf/detail/groupby/sort_helper.hpp +++ b/cpp/include/cudf/detail/groupby/sort_helper.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include +#include namespace cudf { namespace groupby { @@ -87,7 +88,7 @@ struct sort_groupby_helper { */ std::unique_ptr sorted_values(column_view const& values, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Groups a column of values according to `keys` @@ -101,7 +102,7 @@ struct sort_groupby_helper { */ std::unique_ptr grouped_values(column_view const& values, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Get a table of sorted unique keys @@ -109,7 +110,7 @@ struct sort_groupby_helper { * @return a new table in which each row is a unique row in the sorted key table. */ std::unique_ptr
unique_keys(rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Get a table of sorted keys @@ -117,7 +118,7 @@ struct sort_groupby_helper { * @return a new table containing the sorted keys. */ std::unique_ptr
sorted_keys(rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Get the number of groups in `keys` diff --git a/cpp/include/cudf/detail/hash_reduce_by_row.cuh b/cpp/include/cudf/detail/hash_reduce_by_row.cuh index 1df6848c575..dfe79646167 100644 --- a/cpp/include/cudf/detail/hash_reduce_by_row.cuh +++ b/cpp/include/cudf/detail/hash_reduce_by_row.cuh @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -124,7 +125,7 @@ rmm::device_uvector hash_reduce_by_row( ReduceFuncBuilder func_builder, OutputType init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto const map_dview = map.get_device_view(); auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp index 296b68d22a9..5b2b9b5e69d 100644 --- a/cpp/include/cudf/detail/interop.hpp +++ b/cpp/include/cudf/detail/interop.hpp @@ -23,6 +23,8 @@ #pragma nv_diag_suppress 611 #pragma nv_diag_suppress 2810 #endif +#include + #include #ifdef __CUDACC__ #pragma nv_diag_default 611 @@ -47,7 +49,7 @@ namespace detail { */ std::unique_ptr
from_dlpack(DLManagedTensor const* managed_tensor, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::to_dlpack @@ -56,7 +58,7 @@ std::unique_ptr
from_dlpack(DLManagedTensor const* managed_tensor, */ DLManagedTensor* to_dlpack(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); // Creating arrow as per given type_id and buffer arguments template @@ -127,19 +129,19 @@ std::shared_ptr to_arrow(cudf::scalar const& input, arrow::MemoryPool* ar_mr); /** * @copydoc cudf::from_arrow(arrow::Table const& input_table, rmm::cuda_stream_view stream, - * rmm::mr::device_memory_resource* mr) + * rmm::device_async_resource_ref mr) */ std::unique_ptr
from_arrow(arrow::Table const& input_table, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::from_arrow(arrow::Scalar const& input, rmm::cuda_stream_view stream, - * rmm::mr::device_memory_resource* mr) + * rmm::device_async_resource_ref mr) */ std::unique_ptr from_arrow(arrow::Scalar const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Return a maximum precision for a given type. diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp index 27d14874bce..aabfff746ea 100644 --- a/cpp/include/cudf/detail/join.hpp +++ b/cpp/include/cudf/detail/join.hpp @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -105,7 +106,7 @@ struct hash_join { inner_join(cudf::table_view const& probe, std::optional output_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const; + rmm::device_async_resource_ref mr) const; /** * @copydoc cudf::hash_join::left_join @@ -115,7 +116,7 @@ struct hash_join { left_join(cudf::table_view const& probe, std::optional output_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const; + rmm::device_async_resource_ref mr) const; /** * @copydoc cudf::hash_join::full_join @@ -125,7 +126,7 @@ struct hash_join { full_join(cudf::table_view const& probe, std::optional output_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const; + rmm::device_async_resource_ref mr) const; /** * @copydoc cudf::hash_join::inner_join_size @@ -144,7 +145,7 @@ struct hash_join { */ std::size_t full_join_size(cudf::table_view const& probe, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const; + rmm::device_async_resource_ref mr) const; private: /** @@ -169,7 +170,7 @@ struct hash_join { join_kind join, std::optional output_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const; + rmm::device_async_resource_ref mr) const; /** * @copydoc cudf::detail::hash_join::probe_join_indices @@ -184,7 +185,7 @@ struct hash_join { join_kind join, std::optional output_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const; + rmm::device_async_resource_ref mr) const; }; } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/label_bins.hpp b/cpp/include/cudf/detail/label_bins.hpp index 50eeba58cdd..9f6dcce448d 100644 --- a/cpp/include/cudf/detail/label_bins.hpp +++ b/cpp/include/cudf/detail/label_bins.hpp @@ -25,6 +25,7 @@ #include #include #include +#include namespace cudf { @@ -40,7 +41,7 @@ namespace detail { /** * @copydoc cudf::label_bins(column_view const& input, column_view const& left_edges, inclusive * left_inclusive, column_view const& right_edges, inclusive right_inclusive, rmm::cuda_stream_view, - * rmm::mr::device_memory_resource* mr) + * rmm::device_async_resource_ref mr) * * @param stream Stream view on which to allocate resources and queue execution. */ @@ -50,7 +51,7 @@ std::unique_ptr label_bins(column_view const& input, column_view const& right_edges, inclusive right_inclusive, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** @} */ // end of group } // namespace detail diff --git a/cpp/include/cudf/detail/merge.hpp b/cpp/include/cudf/detail/merge.hpp index 2167a484214..837eda0d7b5 100644 --- a/cpp/include/cudf/detail/merge.hpp +++ b/cpp/include/cudf/detail/merge.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #pragma once #include +#include #include @@ -45,7 +46,7 @@ using index_vector = rmm::device_uvector; * std::vector const& key_cols, * std::vector const& column_order, * std::vector const& null_precedence, - * rmm::mr::device_memory_resource* mr) + * rmm::device_async_resource_ref mr) * * @param stream CUDA stream used for device memory operations and kernel launches */ @@ -54,7 +55,7 @@ std::unique_ptr merge(std::vector const& tables_to_merg std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/normalizing_iterator.cuh b/cpp/include/cudf/detail/normalizing_iterator.cuh index 8f90afc3e57..32df13104e0 100644 --- a/cpp/include/cudf/detail/normalizing_iterator.cuh +++ b/cpp/include/cudf/detail/normalizing_iterator.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -204,8 +204,8 @@ struct alignas(16) base_normalator { private: struct integer_sizeof_fn { - template ())> - CUDF_HOST_DEVICE constexpr std::size_t operator()() const + template ())> + CUDF_HOST_DEVICE std::size_t operator()() const { #ifndef __CUDA_ARCH__ CUDF_FAIL("only integral types are supported"); @@ -213,8 +213,8 @@ struct alignas(16) base_normalator { CUDF_UNREACHABLE("only integral types are supported"); #endif } - template ())> - CUDF_HOST_DEVICE constexpr std::size_t operator()() const noexcept + template ())> + CUDF_HOST_DEVICE std::size_t operator()() const noexcept { return sizeof(T); } diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index db373f47a01..e62675cbc8c 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -110,7 +111,7 @@ CUDF_KERNEL void offset_bitmask_binop(Binop op, /** * @copydoc bitmask_binop(Binop op, host_span, host_span - * const, size_type, rmm::mr::device_memory_resource *) + * const, size_type, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches */ @@ -120,7 +121,7 @@ std::pair bitmask_binop(Binop op, host_span masks_begin_bits, size_type mask_size_bits, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto dest_mask = rmm::device_buffer{bitmask_allocation_size_bytes(mask_size_bits), stream, mr}; auto null_count = @@ -163,7 +164,7 @@ size_type inplace_bitmask_binop(Binop op, CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), "Mask pointer cannot be null"); - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource(); rmm::device_scalar d_counter{0, stream, mr}; rmm::device_uvector d_masks(masks.size(), stream, mr); rmm::device_uvector d_begin_bits(masks_begin_bits.size(), stream, mr); @@ -282,7 +283,7 @@ rmm::device_uvector segmented_count_bits(bitmask_type const* bitmask, OffsetIterator last_bit_indices_begin, count_bits_policy count_bits, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto const num_ranges = static_cast(std::distance(first_bit_indices_begin, first_bit_indices_end)); @@ -541,7 +542,7 @@ std::pair segmented_null_mask_reduction( null_policy null_handling, std::optional valid_initial_value, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto const segments_begin = thrust::make_zip_iterator(first_bit_indices_begin, last_bit_indices_begin); diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index 74e2ccd2ea1..04d8d663acb 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include @@ -28,14 +29,14 @@ namespace cudf { namespace detail { /** - * @copydoc cudf::create_null_mask(size_type, mask_state, rmm::mr::device_memory_resource*) + * @copydoc cudf::create_null_mask(size_type, mask_state, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ rmm::device_buffer create_null_mask(size_type size, mask_state state, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::set_null_mask(bitmask_type*, size_type, size_type, bool) @@ -194,7 +195,7 @@ std::vector segmented_null_count(bitmask_type const* bitmask, /** * @copydoc cudf::copy_bitmask(bitmask_type const*, size_type, size_type, - *rmm::mr::device_memory_resource*) + *rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -202,20 +203,20 @@ rmm::device_buffer copy_bitmask(bitmask_type const* mask, size_type begin_bit, size_type end_bit, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** - * @copydoc cudf::copy_bitmask(column_view const& view, rmm::mr::device_memory_resource*) + * @copydoc cudf::copy_bitmask(column_view const& view, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ rmm::device_buffer copy_bitmask(column_view const& view, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc bitmask_and(host_span, host_span const, - * size_type, rmm::mr::device_memory_resource *) + * size_type, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches */ @@ -223,7 +224,7 @@ std::pair bitmask_and(host_span masks_begin_bits, size_type mask_size_bits, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::bitmask_and @@ -232,7 +233,7 @@ std::pair bitmask_and(host_span bitmask_and(table_view const& view, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::bitmask_or @@ -241,7 +242,7 @@ std::pair bitmask_and(table_view const& view, */ std::pair bitmask_or(table_view const& view, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Performs a bitwise AND of the specified bitmasks, @@ -274,7 +275,7 @@ cudf::size_type inplace_bitmask_and(device_span dest_mask, void set_all_valid_null_masks(column_view const& input, column& output, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail diff --git a/cpp/include/cudf/detail/quantiles.hpp b/cpp/include/cudf/detail/quantiles.hpp index ac37d923d85..6c188d2ca68 100644 --- a/cpp/include/cudf/detail/quantiles.hpp +++ b/cpp/include/cudf/detail/quantiles.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace detail { @@ -35,7 +36,7 @@ std::unique_ptr quantile(column_view const& input, column_view const& ordered_indices, bool exact, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::quantiles() @@ -49,18 +50,18 @@ std::unique_ptr
quantiles(table_view const& input, std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::percentile_approx(tdigest_column_view const&, column_view const&, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr percentile_approx(tdigest::tdigest_column_view const& input, column_view const& percentiles, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/repeat.hpp b/cpp/include/cudf/detail/repeat.hpp index 883d5d158fb..abb9e45a95c 100644 --- a/cpp/include/cudf/detail/repeat.hpp +++ b/cpp/include/cudf/detail/repeat.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include #include @@ -28,7 +29,7 @@ namespace detail { /** * @copydoc cudf::repeat(table_view const&, column_view const&, bool, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -36,18 +37,18 @@ std::unique_ptr
repeat(table_view const& input_table, column_view const& count, bool check_count, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::repeat(table_view const&, size_type, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr
repeat(table_view const& input_table, size_type count, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/replace.hpp b/cpp/include/cudf/detail/replace.hpp index da83f7b285d..46203bdf2f0 100644 --- a/cpp/include/cudf/detail/replace.hpp +++ b/cpp/include/cudf/detail/replace.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include #include @@ -27,58 +28,58 @@ namespace cudf { namespace detail { /** * @copydoc cudf::replace_nulls(column_view const&, column_view const&, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr replace_nulls(column_view const& input, cudf::column_view const& replacement, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::replace_nulls(column_view const&, scalar const&, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr replace_nulls(column_view const& input, scalar const& replacement, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::replace_nulls(column_view const&, replace_policy const&, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr replace_nulls(column_view const& input, replace_policy const& replace_policy, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::replace_nans(column_view const&, column_view const&, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr replace_nans(column_view const& input, column_view const& replacement, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::replace_nans(column_view const&, scalar const&, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr replace_nans(column_view const& input, scalar const& replacement, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::find_and_replace_all @@ -89,7 +90,7 @@ std::unique_ptr find_and_replace_all(column_view const& input_col, column_view const& values_to_replace, column_view const& replacement_values, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::normalize_nans_and_zeros @@ -98,7 +99,7 @@ std::unique_ptr find_and_replace_all(column_view const& input_col, */ std::unique_ptr normalize_nans_and_zeros(column_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/reshape.hpp b/cpp/include/cudf/detail/reshape.hpp index 5ab53690a23..7a1c3d6c4f0 100644 --- a/cpp/include/cudf/detail/reshape.hpp +++ b/cpp/include/cudf/detail/reshape.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include #include @@ -33,7 +34,7 @@ namespace detail { std::unique_ptr
tile(table_view const& input, size_type count, rmm::cuda_stream_view, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::interleave_columns @@ -42,7 +43,7 @@ std::unique_ptr
tile(table_view const& input, */ std::unique_ptr interleave_columns(table_view const& input, rmm::cuda_stream_view, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/rolling.hpp b/cpp/include/cudf/detail/rolling.hpp index da90217c254..ea6f38c421c 100644 --- a/cpp/include/cudf/detail/rolling.hpp +++ b/cpp/include/cudf/detail/rolling.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include @@ -35,7 +36,7 @@ namespace detail { * column_view const& following_window, * size_type min_periods, * rolling_aggregation const& agg, - * rmm::mr::device_memory_resource* mr) + * rmm::device_async_resource_ref mr) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -45,7 +46,7 @@ std::unique_ptr rolling_window(column_view const& input, size_type min_periods, rolling_aggregation const& agg, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/round.hpp b/cpp/include/cudf/detail/round.hpp index cdfc7caef37..1a9c5c82c65 100644 --- a/cpp/include/cudf/detail/round.hpp +++ b/cpp/include/cudf/detail/round.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { //! Inner interfaces and implementations @@ -27,7 +28,7 @@ namespace detail { /** * @copydoc cudf::round(column_view const&, int32_t, rounding_method, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -35,7 +36,7 @@ std::unique_ptr round(column_view const& input, int32_t decimal_places, rounding_method method, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/scan.hpp b/cpp/include/cudf/detail/scan.hpp index f4b2d51d0cb..54c25d0157c 100644 --- a/cpp/include/cudf/detail/scan.hpp +++ b/cpp/include/cudf/detail/scan.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace detail { @@ -50,7 +51,7 @@ std::unique_ptr scan_exclusive(column_view const& input, scan_aggregation const& agg, null_policy null_handling, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Computes the inclusive scan of a column. @@ -76,7 +77,7 @@ std::unique_ptr scan_inclusive(column_view const& input, scan_aggregation const& agg, null_policy null_handling, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Generate row ranks for a column. @@ -88,7 +89,7 @@ std::unique_ptr scan_inclusive(column_view const& input, */ std::unique_ptr inclusive_rank_scan(column_view const& order_by, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Generate row dense ranks for a column. @@ -100,7 +101,7 @@ std::unique_ptr inclusive_rank_scan(column_view const& order_by, */ std::unique_ptr inclusive_dense_rank_scan(column_view const& order_by, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Generate row ONE_NORMALIZED percent ranks for a column. @@ -113,7 +114,7 @@ std::unique_ptr inclusive_dense_rank_scan(column_view const& order_by, * @return rank values. */ std::unique_ptr inclusive_one_normalized_percent_rank_scan( - column_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); + column_view const& order_by, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index dbf7bfa9527..80bc87731ca 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,10 +29,13 @@ #include #include #include +#include #include +#include #include #include +#include #include #include @@ -145,7 +148,7 @@ struct column_scatterer_impl(target, stream, mr); auto result_view = result->mutable_view(); @@ -170,7 +173,7 @@ struct column_scatterer_impl { MapIterator scatter_map_end, column_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { auto d_column = column_device_view::create(source, stream); auto const begin = d_column->begin(); @@ -187,7 +190,7 @@ struct column_scatterer_impl { MapIterator scatter_map_end, column_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { return cudf::lists::detail::scatter( source, scatter_map_begin, scatter_map_end, target, stream, mr); @@ -202,7 +205,7 @@ struct column_scatterer_impl { MapIterator scatter_map_end, column_view const& target_in, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { if (target_in.is_empty()) // empty begets empty return make_empty_column(type_id::DICTIONARY32); @@ -212,8 +215,9 @@ struct column_scatterer_impl { // check the keys match dictionary_column_view const source(source_in); dictionary_column_view const target(target_in); - CUDF_EXPECTS(source.keys().type() == target.keys().type(), - "scatter dictionary keys must be the same type"); + CUDF_EXPECTS(cudf::have_same_types(source.keys(), target.keys()), + "scatter dictionary keys must be the same type", + cudf::data_type_error); // first combine keys so both dictionaries have the same set auto target_matched = dictionary::detail::add_keys(target, source.keys(), stream, mr); @@ -261,7 +265,7 @@ struct column_scatterer { MapIterator scatter_map_end, column_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { column_scatterer_impl scatterer{}; return scatterer(source, scatter_map_begin, scatter_map_end, target, stream, mr); @@ -276,7 +280,7 @@ struct column_scatterer_impl { MapItRoot scatter_map_end, column_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { CUDF_EXPECTS(source.num_children() == target.num_children(), "Scatter source and target are not of the same type."); @@ -391,7 +395,7 @@ std::unique_ptr
scatter(table_view const& source, MapIterator scatter_map_end, table_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp index 94c795f31b2..95ed6af8c3c 100644 --- a/cpp/include/cudf/detail/scatter.hpp +++ b/cpp/include/cudf/detail/scatter.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include @@ -63,11 +64,11 @@ std::unique_ptr
scatter(table_view const& source, column_view const& scatter_map, table_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::detail::scatter(table_view const&,column_view const&,table_view - * const&,bool,rmm::cuda_stream_view,rmm::mr::device_memory_resource*) + * const&,bool,rmm::cuda_stream_view,rmm::device_async_resource_ref) * * @throws cudf::logic_error if `scatter_map` span size is larger than max of `size_type`. */ @@ -75,7 +76,7 @@ std::unique_ptr
scatter(table_view const& source, device_span const scatter_map, table_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Scatters a row of scalar values into a copy of the target table @@ -110,13 +111,13 @@ std::unique_ptr
scatter(std::vector> column_view const& indices, table_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::boolean_mask_scatter( table_view const& source, table_view const& target, * column_view const& boolean_mask, - * rmm::mr::device_memory_resource *mr) + * rmm::device_async_resource_ref mr) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -124,14 +125,14 @@ std::unique_ptr
boolean_mask_scatter(table_view const& source, table_view const& target, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::boolean_mask_scatter( * std::vector> const& source, * table_view const& target, * column_view const& boolean_mask, - * rmm::mr::device_memory_resource *mr) + * rmm::device_async_resource_ref mr) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -140,7 +141,7 @@ std::unique_ptr
boolean_mask_scatter( table_view const& target, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/search.hpp b/cpp/include/cudf/detail/search.hpp index 4277baf3edd..e60b18f4c8d 100644 --- a/cpp/include/cudf/detail/search.hpp +++ b/cpp/include/cudf/detail/search.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include +#include namespace cudf::detail { /** @@ -35,7 +36,7 @@ std::unique_ptr lower_bound(table_view const& haystack, std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::upper_bound @@ -47,24 +48,24 @@ std::unique_ptr upper_bound(table_view const& haystack, std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** - * @copydoc cudf::contains(column_view const&, scalar const&, rmm::mr::device_memory_resource*) + * @copydoc cudf::contains(column_view const&, scalar const&, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ bool contains(column_view const& haystack, scalar const& needle, rmm::cuda_stream_view stream); /** - * @copydoc cudf::contains(column_view const&, column_view const&, rmm::mr::device_memory_resource*) + * @copydoc cudf::contains(column_view const&, column_view const&, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr contains(column_view const& haystack, column_view const& needles, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Check if rows in the given `needles` table exist in the `haystack` table. @@ -96,6 +97,6 @@ rmm::device_uvector contains(table_view const& haystack, null_equality compare_nulls, nan_equality compare_nans, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace cudf::detail diff --git a/cpp/include/cudf/detail/sequence.hpp b/cpp/include/cudf/detail/sequence.hpp index 6f2a43b54de..a18a9d3b200 100644 --- a/cpp/include/cudf/detail/sequence.hpp +++ b/cpp/include/cudf/detail/sequence.hpp @@ -21,12 +21,13 @@ #include #include +#include namespace cudf { namespace detail { /** * @copydoc cudf::sequence(size_type size, scalar const& init, scalar const& step, - * rmm::mr::device_memory_resource* mr = + * rmm::device_async_resource_ref mr = *rmm::mr::get_current_device_resource()) * * @param stream CUDA stream used for device memory operations and kernel launches. @@ -35,11 +36,11 @@ std::unique_ptr sequence(size_type size, scalar const& init, scalar const& step, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::sequence(size_type size, scalar const& init, - rmm::mr::device_memory_resource* mr = + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) * * @param stream CUDA stream used for device memory operations and kernel launches. @@ -47,13 +48,13 @@ std::unique_ptr sequence(size_type size, std::unique_ptr sequence(size_type size, scalar const& init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::calendrical_month_sequence(size_type size, * scalar const& init, * size_type months, - * rmm::mr::device_memory_resource* mr) + * rmm::device_async_resource_ref mr) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -61,7 +62,7 @@ std::unique_ptr calendrical_month_sequence(size_type size, scalar const& init, size_type months, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh b/cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh index 08917bfce24..63e4fca8915 100644 --- a/cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh +++ b/cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -300,7 +301,7 @@ std::pair, size_type> make_offsets_child_column( InputIterator begin, InputIterator end, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto count = static_cast(std::distance(begin, end)); auto offsets_column = make_numeric_column( diff --git a/cpp/include/cudf/detail/sorting.hpp b/cpp/include/cudf/detail/sorting.hpp index 97cc054da57..4ddba38a7e9 100644 --- a/cpp/include/cudf/detail/sorting.hpp +++ b/cpp/include/cudf/detail/sorting.hpp @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -37,7 +38,7 @@ std::unique_ptr sorted_order(table_view const& input, std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::stable_sorted_order @@ -48,7 +49,7 @@ std::unique_ptr stable_sorted_order(table_view const& input, std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::sort_by_key @@ -60,7 +61,7 @@ std::unique_ptr
sort_by_key(table_view const& values, std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::rank @@ -74,7 +75,7 @@ std::unique_ptr rank(column_view const& input, null_order null_precedence, bool percentage, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::stable_sort_by_key @@ -86,7 +87,7 @@ std::unique_ptr
stable_sort_by_key(table_view const& values, std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::segmented_sorted_order @@ -98,7 +99,7 @@ std::unique_ptr segmented_sorted_order(table_view const& keys, std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::stable_segmented_sorted_order @@ -111,7 +112,7 @@ std::unique_ptr stable_segmented_sorted_order( std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::segmented_sort_by_key @@ -124,7 +125,7 @@ std::unique_ptr
segmented_sort_by_key(table_view const& values, std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::stable_segmented_sort_by_key @@ -137,7 +138,7 @@ std::unique_ptr
stable_segmented_sort_by_key(table_view const& values, std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::sort @@ -148,7 +149,7 @@ std::unique_ptr
sort(table_view const& values, std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::stable_sort @@ -159,7 +160,7 @@ std::unique_ptr
stable_sort(table_view const& values, std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp index 7f366c06a1c..e2974789ea1 100644 --- a/cpp/include/cudf/detail/stream_compaction.hpp +++ b/cpp/include/cudf/detail/stream_compaction.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,12 +23,13 @@ #include #include +#include namespace cudf { namespace detail { /** * @copydoc cudf::drop_nulls(table_view const&, std::vector const&, - * cudf::size_type, rmm::mr::device_memory_resource*) + * cudf::size_type, rmm::device_async_resource_ref) * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ @@ -36,11 +37,11 @@ std::unique_ptr
drop_nulls(table_view const& input, std::vector const& keys, cudf::size_type keep_threshold, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::drop_nans(table_view const&, std::vector const&, - * cudf::size_type, rmm::mr::device_memory_resource*) + * cudf::size_type, rmm::device_async_resource_ref) * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ @@ -48,7 +49,7 @@ std::unique_ptr
drop_nans(table_view const& input, std::vector const& keys, cudf::size_type keep_threshold, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::apply_boolean_mask @@ -58,7 +59,7 @@ std::unique_ptr
drop_nans(table_view const& input, std::unique_ptr
apply_boolean_mask(table_view const& input, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::unique @@ -70,7 +71,7 @@ std::unique_ptr
unique(table_view const& input, duplicate_keep_option keep, null_equality nulls_equal, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::distinct @@ -83,7 +84,7 @@ std::unique_ptr
distinct(table_view const& input, null_equality nulls_equal, nan_equality nans_equal, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::stable_distinct @@ -96,7 +97,7 @@ std::unique_ptr
stable_distinct(table_view const& input, null_equality nulls_equal, nan_equality nans_equal, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::distinct_indices @@ -108,7 +109,7 @@ rmm::device_uvector distinct_indices(table_view const& input, null_equality nulls_equal, nan_equality nans_equal, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::unique_count(column_view const&, null_policy, nan_policy) diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp index c0a79142cef..e736514ac29 100644 --- a/cpp/include/cudf/detail/structs/utilities.hpp +++ b/cpp/include/cudf/detail/structs/utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include +#include namespace cudf::structs::detail { @@ -175,7 +176,7 @@ class flattened_table { std::vector const& null_precedence, column_nullability nullability, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Superimpose nulls from a given null mask into the input column, using bitwise AND. @@ -197,7 +198,7 @@ class flattened_table { size_type null_count, std::unique_ptr&& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Push down nulls from the given input column into its children columns, using bitwise AND. @@ -222,7 +223,7 @@ class flattened_table { * to be kept alive. */ [[nodiscard]] std::pair push_down_nulls( - column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); + column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); /** * @brief Push down nulls from columns of the input table into their children columns, using @@ -249,7 +250,7 @@ class flattened_table { * to be kept alive. */ [[nodiscard]] std::pair push_down_nulls( - table_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); + table_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); /** * @brief Checks if a column or any of its children is a struct column with structs that are null. diff --git a/cpp/include/cudf/detail/tdigest/tdigest.hpp b/cpp/include/cudf/detail/tdigest/tdigest.hpp index b529d4a2c53..bfd12c18fff 100644 --- a/cpp/include/cudf/detail/tdigest/tdigest.hpp +++ b/cpp/include/cudf/detail/tdigest/tdigest.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { namespace tdigest { @@ -70,7 +71,7 @@ std::unique_ptr group_tdigest(column_view const& values, size_type num_groups, int max_centroids, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Merges tdigests within the same group to generate a new tdigest. @@ -113,7 +114,7 @@ std::unique_ptr group_merge_tdigest(column_view const& values, size_type num_groups, int max_centroids, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Create a tdigest column from its constituent components. @@ -139,7 +140,7 @@ std::unique_ptr make_tdigest_column(size_type num_rows, std::unique_ptr&& min_values, std::unique_ptr&& max_values, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Create an empty tdigest column. @@ -152,7 +153,7 @@ std::unique_ptr make_tdigest_column(size_type num_rows, * @returns An empty tdigest column. */ std::unique_ptr make_empty_tdigest_column(rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Create an empty tdigest scalar. @@ -165,7 +166,7 @@ std::unique_ptr make_empty_tdigest_column(rmm::cuda_stream_view stream, * @returns An empty tdigest scalar. */ std::unique_ptr make_empty_tdigest_scalar(rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Generate a tdigest scalar from a set of numeric input values. @@ -199,7 +200,7 @@ std::unique_ptr make_empty_tdigest_scalar(rmm::cuda_stream_view stream, std::unique_ptr reduce_tdigest(column_view const& values, int max_centroids, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Merges multiple tdigest columns to generate a new tdigest scalar. @@ -233,7 +234,7 @@ std::unique_ptr reduce_tdigest(column_view const& values, std::unique_ptr reduce_merge_tdigest(column_view const& input, int max_centroids, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace tdigest diff --git a/cpp/include/cudf/detail/timezone.hpp b/cpp/include/cudf/detail/timezone.hpp index f7f97c0a7c2..037164aa297 100644 --- a/cpp/include/cudf/detail/timezone.hpp +++ b/cpp/include/cudf/detail/timezone.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,12 +18,13 @@ #include #include +#include namespace cudf::detail { /** * @copydoc cudf::make_timezone_transition_table(std::optional, std::string_view, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -31,6 +32,6 @@ std::unique_ptr
make_timezone_transition_table( std::optional tzif_dir, std::string_view timezone_name, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); } // namespace cudf::detail diff --git a/cpp/include/cudf/detail/transform.hpp b/cpp/include/cudf/detail/transform.hpp index 965fea84860..47e13fa2e5e 100644 --- a/cpp/include/cudf/detail/transform.hpp +++ b/cpp/include/cudf/detail/transform.hpp @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { namespace detail { @@ -34,7 +35,7 @@ std::unique_ptr transform(column_view const& input, data_type output_type, bool is_ptx, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::compute_column @@ -44,7 +45,7 @@ std::unique_ptr transform(column_view const& input, std::unique_ptr compute_column(table_view const& table, ast::expression const& expr, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::nans_to_nulls @@ -52,7 +53,7 @@ std::unique_ptr compute_column(table_view const& table, * @param stream CUDA stream used for device memory operations and kernel launches. */ std::pair, size_type> nans_to_nulls( - column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); + column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); /** * @copydoc cudf::bools_to_mask @@ -60,7 +61,7 @@ std::pair, size_type> nans_to_nulls( * @param stream CUDA stream used for device memory operations and kernel launches. */ std::pair, cudf::size_type> bools_to_mask( - column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); + column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); /** * @copydoc cudf::encode @@ -68,7 +69,7 @@ std::pair, cudf::size_type> bools_to_mask( * @param stream CUDA stream used for device memory operations and kernel launches. */ std::pair, std::unique_ptr> encode( - cudf::table_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); + cudf::table_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); /** * @copydoc cudf::one_hot_encode @@ -78,7 +79,7 @@ std::pair, std::unique_ptr> encode( std::pair, table_view> one_hot_encode(column_view const& input, column_view const& categories, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::mask_to_bools @@ -89,7 +90,7 @@ std::unique_ptr mask_to_bools(bitmask_type const* null_mask, size_type begin_bit, size_type end_bit, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::row_bit_count @@ -98,7 +99,7 @@ std::unique_ptr mask_to_bools(bitmask_type const* null_mask, */ std::unique_ptr row_bit_count(table_view const& t, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::segmented_row_bit_count @@ -108,7 +109,7 @@ std::unique_ptr row_bit_count(table_view const& t, std::unique_ptr segmented_row_bit_count(table_view const& t, size_type segment_length, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/transpose.hpp b/cpp/include/cudf/detail/transpose.hpp index d0be51860b2..1f8effc8103 100644 --- a/cpp/include/cudf/detail/transpose.hpp +++ b/cpp/include/cudf/detail/transpose.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace detail { @@ -30,7 +31,7 @@ namespace detail { */ std::pair, table_view> transpose(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp index 12f864de572..5245cfdf079 100644 --- a/cpp/include/cudf/detail/unary.hpp +++ b/cpp/include/cudf/detail/unary.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include @@ -50,7 +51,7 @@ std::unique_ptr true_if(InputIterator begin, size_type size, Predicate p, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto output = make_numeric_column(data_type(type_id::BOOL8), size, mask_state::UNALLOCATED, stream, mr); @@ -68,14 +69,14 @@ std::unique_ptr true_if(InputIterator begin, std::unique_ptr unary_operation(cudf::column_view const& input, cudf::unary_operator op, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::is_valid */ std::unique_ptr is_valid(cudf::column_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::cast @@ -83,21 +84,21 @@ std::unique_ptr is_valid(cudf::column_view const& input, std::unique_ptr cast(column_view const& input, data_type type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::is_nan */ std::unique_ptr is_nan(cudf::column_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::is_not_nan */ std::unique_ptr is_not_nan(cudf::column_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/utilities/cuda.cuh b/cpp/include/cudf/detail/utilities/cuda.cuh index 86c85ca8d06..f1775c6d6d7 100644 --- a/cpp/include/cudf/detail/utilities/cuda.cuh +++ b/cpp/include/cudf/detail/utilities/cuda.cuh @@ -93,6 +93,19 @@ class grid_1d { return global_thread_id(threadIdx.x, blockIdx.x, blockDim.x); } + /** + * @brief Returns the global thread index of the current thread in a 1D grid. + * + * @tparam num_threads_per_block The number of threads per block + * + * @return thread_index_type The global thread index + */ + template + static __device__ thread_index_type global_thread_id() + { + return global_thread_id(threadIdx.x, blockIdx.x, num_threads_per_block); + } + /** * @brief Returns the stride of a 1D grid. * @@ -115,6 +128,19 @@ class grid_1d { * @return thread_index_type The number of threads in the grid. */ static __device__ thread_index_type grid_stride() { return grid_stride(blockDim.x, gridDim.x); } + + /** + * @brief Returns the stride of the current 1D grid. + * + * @tparam num_threads_per_block The number of threads per block + * + * @return thread_index_type The number of threads in the grid. + */ + template + static __device__ thread_index_type grid_stride() + { + return grid_stride(num_threads_per_block, gridDim.x); + } }; /** diff --git a/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp b/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp index 858501877b0..6901a19473e 100644 --- a/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp +++ b/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp @@ -109,30 +109,6 @@ class rmm_host_allocator { { } - /** - * @brief Copy constructor - */ - rmm_host_allocator(rmm_host_allocator const& other) = default; - - /** - * @brief Move constructor - */ - rmm_host_allocator(rmm_host_allocator&& other) = default; - - /** - * @brief Assignment operator - */ - rmm_host_allocator& operator=(rmm_host_allocator const& other) - { - mr = other.mr; - return *this; - } - - /** - * @brief rmm_host_allocator's null destructor does nothing. - */ - inline ~rmm_host_allocator() {} - /** * @brief This method allocates storage for objects in host memory. * @@ -183,7 +159,10 @@ class rmm_host_allocator { * @param x The other \p rmm_host_allocator of interest. * @return This method always returns \c true. */ - inline bool operator==(rmm_host_allocator const& x) const { return x.mr == mr; } + inline bool operator==(rmm_host_allocator const& x) const + { + return x.mr == mr && x.stream == stream; + } /** * @brief This method tests this \p rmm_host_allocator for inequality diff --git a/cpp/include/cudf/detail/utilities/stream_pool.hpp b/cpp/include/cudf/detail/utilities/stream_pool.hpp index 19ef26a10cb..e19cc3ec2f7 100644 --- a/cpp/include/cudf/detail/utilities/stream_pool.hpp +++ b/cpp/include/cudf/detail/utilities/stream_pool.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -81,6 +81,11 @@ class cuda_stream_pool { */ cuda_stream_pool* create_global_cuda_stream_pool(); +/** + * @brief Get the global stream pool. + */ +cuda_stream_pool& global_cuda_stream_pool(); + /** * @brief Acquire a set of `cuda_stream_view` objects and synchronize them to an event on another * stream. diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp index 90ad98741ad..293a4096c57 100644 --- a/cpp/include/cudf/detail/utilities/vector_factories.hpp +++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -50,7 +51,7 @@ namespace detail { template rmm::device_uvector make_zeroed_device_uvector_async(std::size_t size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { rmm::device_uvector ret(size, stream, mr); CUDF_CUDA_TRY(cudaMemsetAsync(ret.data(), 0, size * sizeof(T), stream.value())); @@ -71,7 +72,7 @@ rmm::device_uvector make_zeroed_device_uvector_async(std::size_t size, template rmm::device_uvector make_zeroed_device_uvector_sync(std::size_t size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { rmm::device_uvector ret(size, stream, mr); CUDF_CUDA_TRY(cudaMemsetAsync(ret.data(), 0, size * sizeof(T), stream.value())); @@ -94,7 +95,7 @@ rmm::device_uvector make_zeroed_device_uvector_sync(std::size_t size, template rmm::device_uvector make_device_uvector_async(host_span source_data, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { rmm::device_uvector ret(source_data.size(), stream, mr); CUDF_CUDA_TRY(cudaMemcpyAsync(ret.data(), @@ -123,7 +124,7 @@ template < std::enable_if_t< std::is_convertible_v>>* = nullptr> rmm::device_uvector make_device_uvector_async( - Container const& c, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) + Container const& c, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { return make_device_uvector_async(host_span{c}, stream, mr); } @@ -143,7 +144,7 @@ rmm::device_uvector make_device_uvector_async( template rmm::device_uvector make_device_uvector_async(device_span source_data, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { rmm::device_uvector ret(source_data.size(), stream, mr); CUDF_CUDA_TRY(cudaMemcpyAsync(ret.data(), @@ -172,7 +173,7 @@ template < std::enable_if_t< std::is_convertible_v>>* = nullptr> rmm::device_uvector make_device_uvector_async( - Container const& c, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) + Container const& c, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { return make_device_uvector_async( device_span{c}, stream, mr); @@ -193,7 +194,7 @@ rmm::device_uvector make_device_uvector_async( template rmm::device_uvector make_device_uvector_sync(host_span source_data, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto ret = make_device_uvector_async(source_data, stream, mr); stream.synchronize(); @@ -218,7 +219,7 @@ template < std::enable_if_t< std::is_convertible_v>>* = nullptr> rmm::device_uvector make_device_uvector_sync( - Container const& c, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) + Container const& c, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { return make_device_uvector_sync(host_span{c}, stream, mr); } @@ -238,7 +239,7 @@ rmm::device_uvector make_device_uvector_sync( template rmm::device_uvector make_device_uvector_sync(device_span source_data, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto ret = make_device_uvector_async(source_data, stream, mr); stream.synchronize(); @@ -263,7 +264,7 @@ template < std::enable_if_t< std::is_convertible_v>>* = nullptr> rmm::device_uvector make_device_uvector_sync( - Container const& c, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) + Container const& c, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { return make_device_uvector_sync(device_span{c}, stream, mr); } diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh index d0073177445..64a3c4edf78 100644 --- a/cpp/include/cudf/detail/valid_if.cuh +++ b/cpp/include/cudf/detail/valid_if.cuh @@ -25,6 +25,7 @@ #include #include +#include #include @@ -49,8 +50,8 @@ CUDF_KERNEL void valid_if_kernel( { constexpr size_type leader_lane{0}; auto const lane_id{threadIdx.x % warp_size}; - auto i = cudf::detail::grid_1d::global_thread_id(); - auto const stride = cudf::detail::grid_1d::grid_stride(); + auto i = cudf::detail::grid_1d::global_thread_id(); + auto const stride = cudf::detail::grid_1d::grid_stride(); size_type warp_valid_count{0}; auto active_mask = __ballot_sync(0xFFFF'FFFFu, i < size); @@ -90,7 +91,7 @@ std::pair valid_if(InputIterator begin, InputIterator end, Predicate p, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(begin <= end, "Invalid range."); diff --git a/cpp/include/cudf/dictionary/detail/concatenate.hpp b/cpp/include/cudf/dictionary/detail/concatenate.hpp index d74429484ce..55f3825b3ec 100644 --- a/cpp/include/cudf/dictionary/detail/concatenate.hpp +++ b/cpp/include/cudf/dictionary/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { namespace dictionary { @@ -39,7 +40,7 @@ namespace detail { */ std::unique_ptr concatenate(host_span columns, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace dictionary diff --git a/cpp/include/cudf/dictionary/detail/encode.hpp b/cpp/include/cudf/dictionary/detail/encode.hpp index 2aad7dd80ed..3b5a3bbab56 100644 --- a/cpp/include/cudf/dictionary/detail/encode.hpp +++ b/cpp/include/cudf/dictionary/detail/encode.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { namespace dictionary { @@ -54,7 +55,7 @@ namespace detail { std::unique_ptr encode(column_view const& column, data_type indices_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Create a column by gathering the keys from the provided @@ -73,7 +74,7 @@ std::unique_ptr encode(column_view const& column, */ std::unique_ptr decode(dictionary_column_view const& dictionary_column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Return minimal integer type for the given number of elements. diff --git a/cpp/include/cudf/dictionary/detail/merge.hpp b/cpp/include/cudf/dictionary/detail/merge.hpp index cad495d0097..c4229690ff5 100644 --- a/cpp/include/cudf/dictionary/detail/merge.hpp +++ b/cpp/include/cudf/dictionary/detail/merge.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace dictionary { @@ -44,7 +45,7 @@ std::unique_ptr merge(dictionary_column_view const& lcol, dictionary_column_view const& rcol, cudf::detail::index_vector const& row_order, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace dictionary diff --git a/cpp/include/cudf/dictionary/detail/replace.hpp b/cpp/include/cudf/dictionary/detail/replace.hpp index 0778baa84d6..81a91d57169 100644 --- a/cpp/include/cudf/dictionary/detail/replace.hpp +++ b/cpp/include/cudf/dictionary/detail/replace.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { namespace dictionary { @@ -42,7 +43,7 @@ namespace detail { std::unique_ptr replace_nulls(dictionary_column_view const& input, dictionary_column_view const& replacement, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Create a new dictionary column by replacing nulls with a @@ -59,7 +60,7 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, std::unique_ptr replace_nulls(dictionary_column_view const& input, scalar const& replacement, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace dictionary diff --git a/cpp/include/cudf/dictionary/detail/search.hpp b/cpp/include/cudf/dictionary/detail/search.hpp index 62059306b9a..2563b96b214 100644 --- a/cpp/include/cudf/dictionary/detail/search.hpp +++ b/cpp/include/cudf/dictionary/detail/search.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace dictionary { @@ -27,14 +28,14 @@ namespace detail { /** * @copydoc cudf::dictionary::get_index(dictionary_column_view const&,scalar - * const&,rmm::mr::device_memory_resource*) + * const&,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr get_index(dictionary_column_view const& dictionary, scalar const& key, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Get the index for a key if it were added to the given dictionary. @@ -58,7 +59,7 @@ std::unique_ptr get_index(dictionary_column_view const& dictionary, std::unique_ptr get_insert_index(dictionary_column_view const& dictionary, scalar const& key, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace dictionary diff --git a/cpp/include/cudf/dictionary/detail/update_keys.hpp b/cpp/include/cudf/dictionary/detail/update_keys.hpp index 6fd743ad526..e8486a80afc 100644 --- a/cpp/include/cudf/dictionary/detail/update_keys.hpp +++ b/cpp/include/cudf/dictionary/detail/update_keys.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include namespace cudf { namespace dictionary { @@ -35,7 +36,7 @@ namespace detail { std::unique_ptr add_keys(dictionary_column_view const& dictionary_column, column_view const& new_keys, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::dictionary::remove_keys(dictionary_column_view const&,column_view @@ -46,7 +47,7 @@ std::unique_ptr add_keys(dictionary_column_view const& dictionary_column std::unique_ptr remove_keys(dictionary_column_view const& dictionary_column, column_view const& keys_to_remove, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::dictionary::remove_unused_keys(dictionary_column_view @@ -56,7 +57,7 @@ std::unique_ptr remove_keys(dictionary_column_view const& dictionary_col */ std::unique_ptr remove_unused_keys(dictionary_column_view const& dictionary_column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::dictionary::set_keys(dictionary_column_view @@ -67,7 +68,7 @@ std::unique_ptr remove_unused_keys(dictionary_column_view const& diction std::unique_ptr set_keys(dictionary_column_view const& dictionary_column, column_view const& keys, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc @@ -78,7 +79,7 @@ std::unique_ptr set_keys(dictionary_column_view const& dictionary_column std::vector> match_dictionaries( cudf::host_span input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Create new dictionaries that have keys merged from dictionary columns @@ -100,9 +101,7 @@ std::vector> match_dictionaries( * @return New dictionary columns and updated cudf::table_views. */ std::pair>, std::vector> match_dictionaries( - std::vector tables, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + std::vector tables, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); } // namespace detail } // namespace dictionary diff --git a/cpp/include/cudf/dictionary/dictionary_factories.hpp b/cpp/include/cudf/dictionary/dictionary_factories.hpp index 821981ad148..7cdfa3bf9e5 100644 --- a/cpp/include/cudf/dictionary/dictionary_factories.hpp +++ b/cpp/include/cudf/dictionary/dictionary_factories.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { /** @@ -65,8 +66,8 @@ namespace cudf { std::unique_ptr make_dictionary_column( column_view const& keys_column, column_view const& indices_column, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a dictionary column by taking ownership of the provided keys @@ -117,8 +118,8 @@ std::unique_ptr make_dictionary_column(std::unique_ptr keys_colu std::unique_ptr make_dictionary_column( std::unique_ptr keys_column, std::unique_ptr indices_column, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/dictionary/encode.hpp b/cpp/include/cudf/dictionary/encode.hpp index 959b785bf87..768e2be2b0d 100644 --- a/cpp/include/cudf/dictionary/encode.hpp +++ b/cpp/include/cudf/dictionary/encode.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace dictionary { @@ -59,9 +60,9 @@ namespace dictionary { */ std::unique_ptr encode( column_view const& column, - data_type indices_type = data_type{type_id::UINT32}, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + data_type indices_type = data_type{type_id::UINT32}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create a column by gathering the keys from the provided @@ -80,8 +81,8 @@ std::unique_ptr encode( */ std::unique_ptr decode( dictionary_column_view const& dictionary_column, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace dictionary diff --git a/cpp/include/cudf/dictionary/search.hpp b/cpp/include/cudf/dictionary/search.hpp index 1b72cf42acd..1dff6dc1d5d 100644 --- a/cpp/include/cudf/dictionary/search.hpp +++ b/cpp/include/cudf/dictionary/search.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace dictionary { @@ -44,8 +45,8 @@ namespace dictionary { std::unique_ptr get_index( dictionary_column_view const& dictionary, scalar const& key, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace dictionary diff --git a/cpp/include/cudf/dictionary/update_keys.hpp b/cpp/include/cudf/dictionary/update_keys.hpp index 40504c22edd..ce7057359a1 100644 --- a/cpp/include/cudf/dictionary/update_keys.hpp +++ b/cpp/include/cudf/dictionary/update_keys.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace dictionary { @@ -59,8 +60,8 @@ namespace dictionary { std::unique_ptr add_keys( dictionary_column_view const& dictionary_column, column_view const& new_keys, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create a new dictionary column by removing the specified keys @@ -91,8 +92,8 @@ std::unique_ptr add_keys( std::unique_ptr remove_keys( dictionary_column_view const& dictionary_column, column_view const& keys_to_remove, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create a new dictionary column by removing any keys @@ -113,8 +114,8 @@ std::unique_ptr remove_keys( */ std::unique_ptr remove_unused_keys( dictionary_column_view const& dictionary_column, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create a new dictionary column by applying only the specified keys @@ -147,8 +148,8 @@ std::unique_ptr remove_unused_keys( std::unique_ptr set_keys( dictionary_column_view const& dictionary_column, column_view const& keys, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create new dictionaries that have keys merged from the input dictionaries. @@ -163,8 +164,8 @@ std::unique_ptr set_keys( */ std::vector> match_dictionaries( cudf::host_span input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace dictionary diff --git a/cpp/include/cudf/filling.hpp b/cpp/include/cudf/filling.hpp index 1268f488919..90139e8634a 100644 --- a/cpp/include/cudf/filling.hpp +++ b/cpp/include/cudf/filling.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include #include @@ -91,8 +92,8 @@ std::unique_ptr fill( size_type begin, size_type end, scalar const& value, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Repeat rows of a Table. @@ -125,8 +126,8 @@ std::unique_ptr fill( std::unique_ptr
repeat( table_view const& input_table, column_view const& count, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Repeat rows of a Table. @@ -150,8 +151,8 @@ std::unique_ptr
repeat( std::unique_ptr
repeat( table_view const& input_table, size_type count, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Fills a column with a sequence of value specified by an initial value and a step. @@ -181,8 +182,8 @@ std::unique_ptr sequence( size_type size, scalar const& init, scalar const& step, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Fills a column with a sequence of value specified by an initial value and a step of 1. @@ -208,8 +209,8 @@ std::unique_ptr sequence( std::unique_ptr sequence( size_type size, scalar const& init, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Generate a sequence of timestamps beginning at `init` and incrementing by `months` for @@ -239,8 +240,8 @@ std::unique_ptr calendrical_month_sequence( size_type size, scalar const& init, size_type months, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index 4445af6c5a8..e39d75757e8 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -67,18 +67,6 @@ constexpr inline auto is_supported_representation_type() cuda::std::is_same_v; } -/** - * @brief Returns `true` if the value type is supported for constructing a `fixed_point` - * - * @tparam T The construction value type - * @return `true` if the value type is supported to construct a `fixed_point` type - */ -template -constexpr inline auto is_supported_construction_value_type() -{ - return cuda::std::is_integral() || cuda::std::is_floating_point_v; -} - /** @} */ // end of group // Helper functions for `fixed_point` type @@ -222,23 +210,8 @@ class fixed_point { scale_type _scale; public: - using rep = Rep; ///< The representation type - - /** - * @brief Constructor that will perform shifting to store value appropriately (from floating point - * types) - * - * @tparam T The floating point type that you are constructing from - * @param value The value that will be constructed from - * @param scale The exponent that is applied to Rad to perform shifting - */ - template () && - is_supported_representation_type()>* = nullptr> - CUDF_HOST_DEVICE inline explicit fixed_point(T const& value, scale_type const& scale) - : _value{static_cast(detail::shift(value, scale))}, _scale{scale} - { - } + using rep = Rep; ///< The representation type + static constexpr auto rad = Rad; ///< The base /** * @brief Constructor that will perform shifting to store value appropriately (from integral @@ -249,7 +222,7 @@ class fixed_point { * @param scale The exponent that is applied to Rad to perform shifting */ template () && + typename cuda::std::enable_if_t && is_supported_representation_type()>* = nullptr> CUDF_HOST_DEVICE inline explicit fixed_point(T const& value, scale_type const& scale) // `value` is cast to `Rep` to avoid overflow in cases where @@ -275,8 +248,7 @@ class fixed_point { * @tparam T The value type being constructing from * @param value The value that will be constructed from */ - template ()>* = nullptr> + template >* = nullptr> CUDF_HOST_DEVICE inline fixed_point(T const& value) : _value{static_cast(value)}, _scale{scale_type{0}} { @@ -288,19 +260,6 @@ class fixed_point { */ CUDF_HOST_DEVICE inline fixed_point() : _scale{scale_type{0}} {} - /** - * @brief Explicit conversion operator for casting to floating point types - * - * @tparam U The floating point type that is being explicitly converted to - * @return The `fixed_point` number in base 10 (aka human readable format) - */ - template >* = nullptr> - explicit constexpr operator U() const - { - return detail::shift(static_cast(_value), scale_type{-_scale}); - } - /** * @brief Explicit conversion operator for casting to integral types * diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp index 1c31e8777a8..831ef68ed15 100644 --- a/cpp/include/cudf/groupby.hpp +++ b/cpp/include/cudf/groupby.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -184,17 +185,17 @@ class groupby { */ std::pair, std::vector> aggregate( host_span requests, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** - * @copydoc aggregate(host_span, rmm::mr::device_memory_resource*) + * @copydoc aggregate(host_span, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::pair, std::vector> aggregate( host_span requests, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Performs grouped scans on the specified values. * @@ -248,7 +249,7 @@ class groupby { */ std::pair, std::vector> scan( host_span requests, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Performs grouped shifts for specified values. @@ -304,7 +305,7 @@ class groupby { table_view const& values, host_span offsets, std::vector> const& fill_values, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief The grouped data corresponding to a groupby operation on a set of values. @@ -332,8 +333,8 @@ class groupby { * returned groups * @return A `groups` object representing grouped keys and values */ - groups get_groups(cudf::table_view values = {}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + groups get_groups(cudf::table_view values = {}, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Performs grouped replace nulls on @p value @@ -373,7 +374,7 @@ class groupby { std::pair, std::unique_ptr
> replace_nulls( table_view const& values, host_span replace_policies, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); private: table_view _keys; ///< Keys that determine grouping @@ -404,18 +405,18 @@ class groupby { std::pair, std::vector> dispatch_aggregation( host_span requests, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); // Sort-based groupby std::pair, std::vector> sort_aggregate( host_span requests, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); std::pair, std::vector> sort_scan( host_span requests, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); }; /** @} */ } // namespace groupby diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index 64a78da1803..3c2f6dfe0d5 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { @@ -34,42 +35,11 @@ namespace cudf { */ using hash_value_type = uint32_t; -/** - * @brief Identifies the hash function to be used - * - */ -enum class hash_id { - HASH_IDENTITY = 0, ///< Identity hash function that simply returns the key to be hashed - HASH_MURMUR3, ///< Murmur3 hash function - HASH_SPARK_MURMUR3, ///< Spark Murmur3 hash function - HASH_MD5 ///< MD5 hash function -}; - /** * @brief The default seed value for hash functions */ static constexpr uint32_t DEFAULT_HASH_SEED = 0; -/** - * @brief Computes the hash value of each row in the input set of columns. - * - * @deprecated Since 23.08 - * - * @param input The table of columns to hash - * @param hash_function The hash function enum to use - * @param seed Optional seed value to use for the hash function - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned column's device memory - * - * @returns A column where each row is the hash of a column from the input - */ -[[deprecated]] std::unique_ptr hash( - table_view const& input, - hash_id hash_function = hash_id::HASH_MURMUR3, - uint32_t seed = DEFAULT_HASH_SEED, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - //! Hash APIs namespace hashing { @@ -89,9 +59,9 @@ namespace hashing { */ std::unique_ptr murmurhash3_x86_32( table_view const& input, - uint32_t seed = DEFAULT_HASH_SEED, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + uint32_t seed = DEFAULT_HASH_SEED, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Computes the MurmurHash3 64-bit hash value of each row in the given table @@ -108,30 +78,9 @@ std::unique_ptr murmurhash3_x86_32( */ std::unique_ptr
murmurhash3_x64_128( table_view const& input, - uint64_t seed = DEFAULT_HASH_SEED, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Computes the MurmurHash3 32-bit hash value of each row in the given table - * - * @deprecated Since 24.04 - * - * This function computes the hash similar to MurmurHash3_x86_32 with special processing - * to match Spark's implementation results. - * - * @param input The table of columns to hash - * @param seed Optional seed value to use for the hash function - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned column's device memory - * - * @returns A column where each row is the hash of a row from the input - */ -[[deprecated]] std::unique_ptr spark_murmurhash3_x86_32( - table_view const& input, - uint32_t seed = DEFAULT_HASH_SEED, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + uint64_t seed = DEFAULT_HASH_SEED, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Computes the MD5 hash value of each row in the given table @@ -144,8 +93,8 @@ std::unique_ptr
murmurhash3_x64_128( */ std::unique_ptr md5( table_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Computes the SHA-1 hash value of each row in the given table @@ -158,8 +107,8 @@ std::unique_ptr md5( */ std::unique_ptr sha1( table_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Computes the SHA-224 hash value of each row in the given table @@ -172,8 +121,8 @@ std::unique_ptr sha1( */ std::unique_ptr sha224( table_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Computes the SHA-256 hash value of each row in the given table @@ -186,8 +135,8 @@ std::unique_ptr sha224( */ std::unique_ptr sha256( table_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Computes the SHA-384 hash value of each row in the given table @@ -200,8 +149,8 @@ std::unique_ptr sha256( */ std::unique_ptr sha384( table_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Computes the SHA-512 hash value of each row in the given table @@ -214,8 +163,8 @@ std::unique_ptr sha384( */ std::unique_ptr sha512( table_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Computes the XXHash_64 hash value of each row in the given table @@ -231,9 +180,9 @@ std::unique_ptr sha512( */ std::unique_ptr xxhash_64( table_view const& input, - uint64_t seed = DEFAULT_HASH_SEED, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + uint64_t seed = DEFAULT_HASH_SEED, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); } // namespace hashing diff --git a/cpp/include/cudf/hashing/detail/hashing.hpp b/cpp/include/cudf/hashing/detail/hashing.hpp index eaeb5d6b068..77266ceb48f 100644 --- a/cpp/include/cudf/hashing/detail/hashing.hpp +++ b/cpp/include/cudf/hashing/detail/hashing.hpp @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -30,46 +31,41 @@ namespace detail { std::unique_ptr murmurhash3_x86_32(table_view const& input, uint32_t seed, rmm::cuda_stream_view, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); std::unique_ptr
murmurhash3_x64_128(table_view const& input, uint64_t seed, rmm::cuda_stream_view, - rmm::mr::device_memory_resource* mr); - -std::unique_ptr spark_murmurhash3_x86_32(table_view const& input, - uint32_t seed, - rmm::cuda_stream_view, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); std::unique_ptr md5(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); std::unique_ptr sha1(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); std::unique_ptr sha224(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); std::unique_ptr sha256(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); std::unique_ptr sha384(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); std::unique_ptr sha512(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); std::unique_ptr xxhash_64(table_view const& input, uint64_t seed, rmm::cuda_stream_view, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /* Copyright 2005-2014 Daniel James. * diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index 2ee6f19614d..bb05a622f40 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,8 @@ #pragma nv_diag_suppress 611 #pragma nv_diag_suppress 2810 #endif +#include + #include #ifdef __CUDACC__ #pragma nv_diag_default 611 @@ -34,11 +36,16 @@ #include #include #include +#include #include struct DLManagedTensor; +struct ArrowDeviceArray; + +struct ArrowSchema; + namespace cudf { /** * @addtogroup interop_dlpack @@ -65,7 +72,7 @@ namespace cudf { */ std::unique_ptr
from_dlpack( DLManagedTensor const* managed_tensor, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Convert a cudf table into a DLPack DLTensor @@ -87,7 +94,7 @@ std::unique_ptr
from_dlpack( */ DLManagedTensor* to_dlpack( table_view const& input, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group @@ -162,6 +169,159 @@ std::shared_ptr to_arrow(cudf::scalar const& input, column_metadata const& metadata = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), arrow::MemoryPool* ar_mr = arrow::default_memory_pool()); + +/** + * @brief typedef for a unique_ptr to an ArrowSchema with custom deleter + * + */ +using unique_schema_t = std::unique_ptr; + +/** + * @brief typedef for a unique_ptr to an ArrowDeviceArray with a custom deleter + * + */ +using unique_device_array_t = std::unique_ptr; + +/** + * @brief Create ArrowSchema from cudf table and metadata + * + * Populates and returns an ArrowSchema C struct using a table and metadata. + * + * @note For decimals, since the precision is not stored for them in libcudf, + * decimals will be converted to an Arrow decimal128 which has the widest precision that cudf + * decimal type supports. For example, `numeric::decimal32` will be converted to Arrow decimal128 + * with the precision of 9 which is the maximum precision for 32-bit types. Similarly, + * `numeric::decimal128` will be converted to Arrow decimal128 with the precision of 38. + * + * @param input Table to create a schema from + * @param metadata Contains the hierarchy of names of columns and children + * @return ArrowSchema generated from `input` + */ +unique_schema_t to_arrow_schema(cudf::table_view const& input, + cudf::host_span metadata); + +/** + * @brief Create `ArrowDeviceArray` from cudf table and metadata + * + * Populates the C struct ArrowDeviceArray without performing copies if possible. + * This maintains the data on the GPU device and gives ownership of the table + * and its buffers to the ArrowDeviceArray struct. + * + * After calling this function, the release callback on the returned ArrowDeviceArray + * must be called to clean up the memory. + * + * @note For decimals, since the precision is not stored for them in libcudf + * it will be converted to an Arrow decimal128 with the widest-precision the cudf decimal type + * supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision + * 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be + * converted to Arrow decimal128 of the precision 38. + * + * @note Copies will be performed in the cases where cudf differs from Arrow + * such as in the representation of bools (Arrow uses a bitmap, cudf uses 1-byte per value). + * + * @param table Input table, ownership of the data will be moved to the result + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + * @return ArrowDeviceArray which will have ownership of the GPU data, consumer must call release + */ +unique_device_array_t to_arrow_device( + cudf::table&& table, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Create `ArrowDeviceArray` from cudf column and metadata + * + * Populates the C struct ArrowDeviceArray without performing copies if possible. + * This maintains the data on the GPU device and gives ownership of the table + * and its buffers to the ArrowDeviceArray struct. + * + * After calling this function, the release callback on the returned ArrowDeviceArray + * must be called to clean up the memory. + * + * @note For decimals, since the precision is not stored for them in libcudf + * it will be converted to an Arrow decimal128 with the widest-precision the cudf decimal type + * supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision + * 9 which is the maximum precision for 32-bit types. Similar, numeric::decimal128 will be + * converted to Arrow decimal128 of the precision 38. + * + * @note Copies will be performed in the cases where cudf differs from Arrow such as + * in the representation of bools (Arrow uses a bitmap, cudf uses 1 byte per value). + * + * @param col Input column, ownership of the data will be moved to the result + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + * @return ArrowDeviceArray which will have ownership of the GPU data + */ +unique_device_array_t to_arrow_device( + cudf::column&& col, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Create `ArrowDeviceArray` from a table view + * + * Populates the C struct ArrowDeviceArray performing copies only if necessary. + * This wraps the data on the GPU device and gives a view of the table data + * to the ArrowDeviceArray struct. If the caller frees the data referenced by + * the table_view, using the returned object results in undefined behavior. + * + * After calling this function, the release callback on the returned ArrowDeviceArray + * must be called to clean up any memory created during conversion. + * + * @note For decimals, since the precision is not stored for them in libcudf + * it will be converted to an Arrow decimal128 with the widest-precision the cudf decimal type + * supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision + * 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be + * converted to Arrow decimal128 of the precision 38. + * + * Copies will be performed in the cases where cudf differs from Arrow: + * - BOOL8: Arrow uses a bitmap and cudf uses 1 byte per value + * - DECIMAL32 and DECIMAL64: Converted to Arrow decimal128 + * - STRING: Arrow expects a single value int32 offset child array for empty strings columns + * + * @param table Input table + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + * @return ArrowDeviceArray which will have ownership of any copied data + */ +unique_device_array_t to_arrow_device( + cudf::table_view const& table, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Create `ArrowDeviceArray` from a column view + * + * Populates the C struct ArrowDeviceArray performing copies only if necessary. + * This wraps the data on the GPU device and gives a view of the column data + * to the ArrowDeviceArray struct. If the caller frees the data referenced by + * the column_view, using the returned object results in undefined behavior. + * + * After calling this function, the release callback on the returned ArrowDeviceArray + * must be called to clean up any memory created during conversion. + * + * @note For decimals, since the precision is not stored for them in libcudf + * it will be converted to an Arrow decimal128 with the widest-precision the cudf decimal type + * supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision + * 9 which is the maximum precision for 32-bit types. Similar, numeric::decimal128 will be + * converted to Arrow decimal128 of the precision 38. + * + * Copies will be performed in the cases where cudf differs from Arrow: + * - BOOL8: Arrow uses a bitmap and cudf uses 1 byte per value + * - DECIMAL32 and DECIMAL64: Converted to Arrow decimal128 + * - STRING: Arrow expects a single value int32 offset child array for empty strings columns + * + * @param col Input column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + * @return ArrowDeviceArray which will have ownership of any copied data + */ +unique_device_array_t to_arrow_device( + cudf::column_view const& col, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + /** * @brief Create `cudf::table` from given arrow Table input * @@ -170,11 +330,10 @@ std::shared_ptr to_arrow(cudf::scalar const& input, * @param mr Device memory resource used to allocate `cudf::table` * @return cudf table generated from given arrow Table */ - std::unique_ptr
from_arrow( arrow::Table const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create `cudf::scalar` from given arrow Scalar input @@ -184,9 +343,132 @@ std::unique_ptr
from_arrow( * @param mr Device memory resource used to allocate `cudf::scalar` * @return cudf scalar generated from given arrow Scalar */ - std::unique_ptr from_arrow( arrow::Scalar const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + +/** + * @brief typedef for a vector of owning columns, used for conversion from ArrowDeviceArray + * + */ +using owned_columns_t = std::vector>; + +/** + * @brief functor for a custom deleter to a unique_ptr of table_view + * + * When converting from an ArrowDeviceArray, there are cases where data can't + * be zero-copy (i.e. bools or non-UINT32 dictionary indices). This custom deleter + * is used to maintain ownership over the data allocated since a `cudf::table_view` + * doesn't hold ownership. + */ +template +struct custom_view_deleter { + /** + * @brief Construct a new custom view deleter object + * + * @param owned Vector of owning columns + */ + explicit custom_view_deleter(owned_columns_t&& owned) : owned_mem_{std::move(owned)} {} + + /** + * @brief operator to delete the unique_ptr + * + * @param ptr Pointer to the object to be deleted + */ + void operator()(ViewType* ptr) const { delete ptr; } + + owned_columns_t owned_mem_; ///< Owned columns that must be deleted. +}; + +/** + * @brief typedef for a unique_ptr to a `cudf::table_view` with custom deleter + * + */ +using unique_table_view_t = + std::unique_ptr>; + +/** + * @brief Create `cudf::table_view` from given `ArrowDeviceArray` and `ArrowSchema` + * + * Constructs a non-owning `cudf::table_view` using `ArrowDeviceArray` and `ArrowSchema`, + * data must be accessible to the CUDA device. Because the resulting `cudf::table_view` will + * not own the data, the `ArrowDeviceArray` must be kept alive for the lifetime of the result. + * It is the responsibility of callers to ensure they call the release callback on the + * `ArrowDeviceArray` after it is no longer needed, and that the `cudf::table_view` is not + * accessed after this happens. + * + * @throws cudf::logic_error if device_type is not `ARROW_DEVICE_CUDA`, `ARROW_DEVICE_CUDA_HOST` + * or `ARROW_DEVICE_CUDA_MANAGED` + * + * @throws cudf::data_type_error if the input array is not a struct array, non-struct + * arrays should be passed to `from_arrow_device_column` instead. + * + * @throws cudf::data_type_error if the input arrow data type is not supported. + * + * Each child of the input struct will be the columns of the resulting table_view. + * + * @note The custom deleter used for the unique_ptr to the table_view maintains ownership + * over any memory which is allocated, such as converting boolean columns from the bitmap + * used by Arrow to the 1-byte per value for cudf. + * + * @note If the input `ArrowDeviceArray` contained a non-null sync_event it is assumed + * to be a `cudaEvent_t*` and the passed in stream will have `cudaStreamWaitEvent` called + * on it with the event. This function, however, will not explicitly synchronize on the + * stream. + * + * @param schema `ArrowSchema` pointer to object describing the type of the device array + * @param input `ArrowDeviceArray` pointer to object owning the Arrow data + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to perform any allocations + * @return `cudf::table_view` generated from given Arrow data + */ +unique_table_view_t from_arrow_device( + ArrowSchema const* schema, + ArrowDeviceArray const* input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief typedef for a unique_ptr to a `cudf::column_view` with custom deleter + * + */ +using unique_column_view_t = + std::unique_ptr>; + +/** + * @brief Create `cudf::column_view` from given `ArrowDeviceArray` and `ArrowSchema` + * + * Constructs a non-owning `cudf::column_view` using `ArrowDeviceArray` and `ArrowSchema`, + * data must be accessible to the CUDA device. Because the resulting `cudf::column_view` will + * not own the data, the `ArrowDeviceArray` must be kept alive for the lifetime of the result. + * It is the responsibility of callers to ensure they call the release callback on the + * `ArrowDeviceArray` after it is no longer needed, and that the `cudf::column_view` is not + * accessed after this happens. + * + * @throws cudf::logic_error if device_type is not `ARROW_DEVICE_CUDA`, `ARROW_DEVICE_CUDA_HOST` + * or `ARROW_DEVICE_CUDA_MANAGED` + * + * @throws cudf::data_type_error input arrow data type is not supported. + * + * @note The custom deleter used for the unique_ptr to the table_view maintains ownership + * over any memory which is allocated, such as converting boolean columns from the bitmap + * used by Arrow to the 1-byte per value for cudf. + * + * @note If the input `ArrowDeviceArray` contained a non-null sync_event it is assumed + * to be a `cudaEvent_t*` and the passed in stream will have `cudaStreamWaitEvent` called + * on it with the event. This function, however, will not explicitly synchronize on the + * stream. + * + * @param schema `ArrowSchema` pointer to object describing the type of the device array + * @param input `ArrowDeviceArray` pointer to object owning the Arrow data + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to perform any allocations + * @return `cudf::column_view` generated from given Arrow data + */ +unique_column_view_t from_arrow_device_column( + ArrowSchema const* schema, + ArrowDeviceArray const* input, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/interop/detail/arrow.hpp b/cpp/include/cudf/interop/detail/arrow.hpp new file mode 100644 index 00000000000..8043ecf5422 --- /dev/null +++ b/cpp/include/cudf/interop/detail/arrow.hpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// from Arrow C Device Data Interface +// https://arrow.apache.org/docs/format/CDeviceDataInterface.html +#ifndef ARROW_C_DEVICE_DATA_INTERFACE +#define ARROW_C_DEVICE_DATA_INTERFACE + +// Device type for the allocated memory +typedef int32_t ArrowDeviceType; + +// CPU device, same as using ArrowArray directly +#define ARROW_DEVICE_CPU 1 +// CUDA GPU Device +#define ARROW_DEVICE_CUDA 2 +// Pinned CUDA CPU memory by cudaMallocHost +#define ARROW_DEVICE_CUDA_HOST 3 +// CUDA managed/unified memory allocated by cudaMallocManaged +#define ARROW_DEVICE_CUDA_MANAGED 13 + +struct ArrowDeviceArray { + struct ArrowArray array; + int64_t device_id; + ArrowDeviceType device_type; + void* sync_event; + + // reserved bytes for future expansion + int64_t reserved[3]; +}; + +#endif // ARROW_C_DEVICE_DATA_INTERFACE diff --git a/cpp/include/cudf/io/avro.hpp b/cpp/include/cudf/io/avro.hpp index 89207302850..8bc74eb574c 100644 --- a/cpp/include/cudf/io/avro.hpp +++ b/cpp/include/cudf/io/avro.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -216,7 +217,7 @@ class avro_reader_options_builder { */ table_with_metadata read_avro( avro_reader_options const& options, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace io diff --git a/cpp/include/cudf/io/csv.hpp b/cpp/include/cudf/io/csv.hpp index 435583e805d..a20f75cecd7 100644 --- a/cpp/include/cudf/io/csv.hpp +++ b/cpp/include/cudf/io/csv.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -105,6 +106,9 @@ class csv_reader_options { char _quotechar = '"'; // Whether a quote inside a value is double-quoted bool _doublequote = true; + // Whether to detect quotes surrounded by spaces e.g. ` "data" `. This flag has no effect when + // _doublequote is true + bool _detect_whitespace_around_quotes = false; // Names of columns to read as datetime std::vector _parse_dates_names; // Indexes of columns to read as datetime @@ -374,6 +378,17 @@ class csv_reader_options { */ [[nodiscard]] bool is_enabled_doublequote() const { return _doublequote; } + /** + * @brief Whether to detect quotes surrounded by spaces e.g. ` "data" `. This flag has no + * effect when _doublequote is true + * + * @return `true` if detect_whitespace_around_quotes is enabled + */ + [[nodiscard]] bool is_enabled_detect_whitespace_around_quotes() const + { + return _detect_whitespace_around_quotes; + } + /** * @brief Returns names of columns to read as datetime. * @@ -697,6 +712,14 @@ class csv_reader_options { */ void enable_doublequote(bool val) { _doublequote = val; } + /** + * @brief Sets whether to detect quotes surrounded by spaces e.g. ` "data" `. This flag has no + * effect when _doublequote is true + * + * @param val Boolean value to enable/disable + */ + void enable_detect_whitespace_around_quotes(bool val) { _detect_whitespace_around_quotes = val; } + /** * @brief Sets names of columns to read as datetime. * @@ -1125,6 +1148,19 @@ class csv_reader_options_builder { return *this; } + /** + * @brief Sets whether to detect quotes surrounded by spaces e.g. ` "data" `. This flag has no + * effect when _doublequote is true + * + * @param val Boolean value to enable/disable + * @return this for chaining + */ + csv_reader_options_builder& detect_whitespace_around_quotes(bool val) + { + options._detect_whitespace_around_quotes = val; + return *this; + } + /** * @brief Sets names of columns to read as datetime. * @@ -1315,8 +1351,8 @@ class csv_reader_options_builder { */ table_with_metadata read_csv( csv_reader_options options, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group /** @@ -1721,8 +1757,8 @@ class csv_writer_options_builder { * @param mr Device memory resource to use for device memory allocation */ void write_csv(csv_writer_options const& options, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace io diff --git a/cpp/include/cudf/io/detail/avro.hpp b/cpp/include/cudf/io/detail/avro.hpp index fede8e62d9f..fe9f935d2cc 100644 --- a/cpp/include/cudf/io/detail/avro.hpp +++ b/cpp/include/cudf/io/detail/avro.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace io { @@ -39,7 +40,7 @@ namespace avro { table_with_metadata read_avro(std::unique_ptr&& source, avro_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace avro } // namespace detail diff --git a/cpp/include/cudf/io/detail/csv.hpp b/cpp/include/cudf/io/detail/csv.hpp index 40ddcf385b0..50c1a7c163d 100644 --- a/cpp/include/cudf/io/detail/csv.hpp +++ b/cpp/include/cudf/io/detail/csv.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace io { @@ -38,7 +39,7 @@ namespace csv { table_with_metadata read_csv(std::unique_ptr&& source, csv_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Write an entire dataset to CSV format. @@ -55,7 +56,7 @@ void write_csv(data_sink* sink, host_span column_names, csv_writer_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace csv } // namespace detail diff --git a/cpp/include/cudf/io/detail/json.hpp b/cpp/include/cudf/io/detail/json.hpp index 3f7f7e9bb32..540a584908d 100644 --- a/cpp/include/cudf/io/detail/json.hpp +++ b/cpp/include/cudf/io/detail/json.hpp @@ -16,9 +16,11 @@ #pragma once +#include #include #include +#include namespace cudf::io::json::detail { @@ -35,7 +37,7 @@ namespace cudf::io::json::detail { table_with_metadata read_json(host_span> sources, json_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Write an entire dataset to JSON format. @@ -50,27 +52,27 @@ void write_json(data_sink* sink, table_view const& table, json_writer_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Normalize single quotes to double quotes using FST * - * @param inbuf Input device buffer + * @param indata Input device buffer * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ -rmm::device_uvector normalize_single_quotes(rmm::device_uvector&& inbuf, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +void normalize_single_quotes(datasource::owning_buffer>& indata, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); /** * @brief Normalize unquoted whitespace (space and tab characters) using FST * - * @param inbuf Input device buffer + * @param indata Input device buffer * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ -rmm::device_uvector normalize_whitespace(rmm::device_uvector&& inbuf, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +void normalize_whitespace(datasource::owning_buffer>& indata, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); } // namespace cudf::io::json::detail diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index 3c1486b60c2..597ddd9cf0a 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -37,13 +38,15 @@ class chunked_orc_writer_options; namespace orc::detail { +// Forward declaration of the internal reader class +class reader_impl; + /** * @brief Class to read ORC dataset data into columns. */ class reader { private: - class impl; - std::unique_ptr _impl; + std::unique_ptr _impl; public: /** @@ -57,7 +60,7 @@ class reader { explicit reader(std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Destructor explicitly declared to avoid inlining in header @@ -67,10 +70,63 @@ class reader { /** * @brief Reads the entire dataset. * - * @param options Settings for controlling reading behavior * @return The set of columns along with table metadata */ - table_with_metadata read(orc_reader_options const& options); + table_with_metadata read(); +}; + +/** + * @brief The reader class that supports iterative reading from an array of data sources. + */ +class chunked_reader { + private: + std::unique_ptr _impl; + + public: + /** + * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader(std::size_t, std::size_t, size_type, + * orc_reader_options const&, rmm::cuda_stream_view, rmm::device_async_resource_ref) + * + * @param sources Input `datasource` objects to read the dataset from + */ + explicit chunked_reader(std::size_t chunk_read_limit, + std::size_t pass_read_limit, + size_type output_row_granularity, + std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + /** + * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader(std::size_t, std::size_t, + * orc_reader_options const&, rmm::cuda_stream_view, rmm::device_async_resource_ref) + * + * @param sources Input `datasource` objects to read the dataset from + */ + explicit chunked_reader(std::size_t chunk_read_limit, + std::size_t pass_read_limit, + std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + + /** + * @brief Destructor explicitly-declared to avoid inlined in header. + * + * Since the declaration of the internal `_impl` object does not exist in this header, this + * destructor needs to be defined in a separate source file which can access to that object's + * declaration. + */ + ~chunked_reader(); + + /** + * @copydoc cudf::io::chunked_orc_reader::has_next + */ + [[nodiscard]] bool has_next() const; + + /** + * @copydoc cudf::io::chunked_orc_reader::read_chunk + */ + [[nodiscard]] table_with_metadata read_chunk() const; }; /** @@ -124,14 +180,7 @@ class writer { * @brief Finishes the chunked/streamed write process. */ void close(); - - /** - * @brief Skip work done in `close()`; should be called if `write()` failed. - * - * Calling skip_close() prevents the writer from writing the (invalid) file footer and the - * postscript. - */ - void skip_close(); }; + } // namespace orc::detail } // namespace cudf::io diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp index 0b8ee9676de..978216d971e 100644 --- a/cpp/include/cudf/io/detail/parquet.hpp +++ b/cpp/include/cudf/io/detail/parquet.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ #include #include +#include #include #include @@ -65,7 +66,7 @@ class reader { explicit reader(std::vector>&& sources, parquet_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Destructor explicitly-declared to avoid inlined in header @@ -75,11 +76,9 @@ class reader { /** * @brief Reads the dataset as per given options. * - * @param options Settings for controlling reading behavior - * * @return The set of columns along with table metadata */ - table_with_metadata read(parquet_reader_options const& options); + table_with_metadata read(); }; /** @@ -101,6 +100,13 @@ class chunked_reader : private reader { * // Process chunk * } while (reader.has_next()); * + * // Alternatively + * + * while (reader.has_next()) { + * auto const chunk = reader.read_chunk(); + * // Process chunk + * } + * * ``` * * If `chunk_read_limit == 0` (i.e., no output limit), and `pass_read_limit == 0` (no input @@ -110,7 +116,7 @@ class chunked_reader : private reader { * The chunk_read_limit parameter controls the size of the output chunks produces. If the user * specifies 100 MB of data, the reader will attempt to return chunks containing tables that have * a total bytes size (over all columns) of 100 MB or less. This is a soft limit and the code - * will not fail if it cannot satisfy the limit. It will make a best-effort atttempt only. + * will not fail if it cannot satisfy the limit. It will make a best-effort attempt only. * * The pass_read_limit parameter controls how much temporary memory is used in the process of * decoding the file. The primary contributor to this memory usage is the uncompressed size of @@ -145,7 +151,7 @@ class chunked_reader : private reader { std::vector>&& sources, parquet_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Destructor explicitly-declared to avoid inlined in header. diff --git a/cpp/include/cudf/io/detail/tokenize_json.hpp b/cpp/include/cudf/io/detail/tokenize_json.hpp index b2ea29a85c3..d08c4e7c65a 100644 --- a/cpp/include/cudf/io/detail/tokenize_json.hpp +++ b/cpp/include/cudf/io/detail/tokenize_json.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf::io::json { @@ -133,7 +134,7 @@ std::pair, rmm::device_uvector> ge device_span json_in, cudf::io::json_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp index 1f2628deea7..aa4bee4fb5e 100644 --- a/cpp/include/cudf/io/json.hpp +++ b/cpp/include/cudf/io/json.hpp @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -100,6 +101,10 @@ class json_reader_options { bool _lines = false; // Parse mixed types as a string column bool _mixed_types_as_string = false; + // Delimiter separating records in JSON lines + char _delimiter = '\n'; + // Prune columns on read, selected based on the _dtypes option + bool _prune_columns = false; // Bytes to skip from the start size_t _byte_range_offset = 0; @@ -226,6 +231,13 @@ class json_reader_options { return base_padding + num_columns * column_bytes; } + /** + * @brief Returns delimiter separating records in JSON lines + * + * @return Delimiter separating records in JSON lines + */ + char get_delimiter() const { return _delimiter; } + /** * @brief Whether to read the file as a json object per line. * @@ -240,6 +252,17 @@ class json_reader_options { */ bool is_enabled_mixed_types_as_string() const { return _mixed_types_as_string; } + /** + * @brief Whether to prune columns on read, selected based on the @ref set_dtypes option. + * + * When set as true, if the reader options include @ref set_dtypes, then + * the reader will only return those columns which are mentioned in @ref set_dtypes. + * If false, then all columns are returned, independent of the @ref set_dtypes setting. + * + * @return True if column pruning is enabled + */ + bool is_enabled_prune_columns() const { return _prune_columns; } + /** * @brief Whether to parse dates as DD/MM versus MM/DD. * @@ -250,9 +273,11 @@ class json_reader_options { /** * @brief Whether the legacy reader should be used. * + * @deprecated Since 24.06 + * * @returns true if the legacy reader will be used, false otherwise */ - bool is_enabled_legacy() const { return _legacy; } + [[deprecated]] bool is_enabled_legacy() const { return _legacy; } /** * @brief Whether the reader should keep quotes of string values. @@ -324,6 +349,30 @@ class json_reader_options { */ void set_byte_range_size(size_type size) { _byte_range_size = size; } + /** + * @brief Set delimiter separating records in JSON lines + * + * @param delimiter Delimiter separating records in JSON lines + */ + void set_delimiter(char delimiter) + { + switch (delimiter) { + case '{': + case '[': + case '}': + case ']': + case ',': + case ':': + case '"': + case '\'': + case '\\': + case ' ': + case '\t': + case '\r': CUDF_FAIL("Unsupported delimiter character.", std::invalid_argument); break; + } + _delimiter = delimiter; + } + /** * @brief Set whether to read the file as a json object per line. * @@ -339,6 +388,17 @@ class json_reader_options { */ void enable_mixed_types_as_string(bool val) { _mixed_types_as_string = val; } + /** + * @brief Set whether to prune columns on read, selected based on the @ref set_dtypes option. + * + * When set as true, if the reader options include @ref set_dtypes, then + * the reader will only return those columns which are mentioned in @ref set_dtypes. + * If false, then all columns are returned, independent of the @ref set_dtypes setting. + * + * @param val Boolean value to enable/disable column pruning + */ + void enable_prune_columns(bool val) { _prune_columns = val; } + /** * @brief Set whether to parse dates as DD/MM versus MM/DD. * @@ -349,9 +409,11 @@ class json_reader_options { /** * @brief Set whether to use the legacy reader. * + * @deprecated Since 24.06 + * * @param val Boolean value to enable/disable the legacy reader */ - void enable_legacy(bool val) { _legacy = val; } + [[deprecated]] void enable_legacy(bool val) { _legacy = val; } /** * @brief Set whether the reader should keep quotes of string values. @@ -478,6 +540,18 @@ class json_reader_options_builder { return *this; } + /** + * @brief Set delimiter separating records in JSON lines + * + * @param delimiter Delimiter separating records in JSON lines + * @return this for chaining + */ + json_reader_options_builder& delimiter(char delimiter) + { + options.set_delimiter(delimiter); + return *this; + } + /** * @brief Set whether to read the file as a json object per line. * @@ -503,6 +577,22 @@ class json_reader_options_builder { return *this; } + /** + * @brief Set whether to prune columns on read, selected based on the @ref dtypes option. + * + * When set as true, if the reader options include @ref dtypes, then + * the reader will only return those columns which are mentioned in @ref dtypes. + * If false, then all columns are returned, independent of the @ref dtypes setting. + * + * @param val Boolean value to enable/disable column pruning + * @return this for chaining + */ + json_reader_options_builder& prune_columns(bool val) + { + options._prune_columns = val; + return *this; + } + /** * @brief Set whether to parse dates as DD/MM versus MM/DD. * @@ -518,10 +608,12 @@ class json_reader_options_builder { /** * @brief Set whether to use the legacy reader. * + * @deprecated Since 24.06 + * * @param val Boolean value to enable/disable legacy parsing * @return this for chaining */ - json_reader_options_builder& legacy(bool val) + [[deprecated]] json_reader_options_builder& legacy(bool val) { options._legacy = val; return *this; @@ -612,8 +704,8 @@ class json_reader_options_builder { */ table_with_metadata read_json( json_reader_options options, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group @@ -959,8 +1051,8 @@ class json_writer_options_builder { * @param mr Device memory resource to use for device memory allocation */ void write_json(json_writer_options const& options, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace io diff --git a/cpp/include/cudf/io/memory_resource.hpp b/cpp/include/cudf/io/memory_resource.hpp index ea79d6a3029..a36e220ae7b 100644 --- a/cpp/include/cudf/io/memory_resource.hpp +++ b/cpp/include/cudf/io/memory_resource.hpp @@ -18,6 +18,8 @@ #include +#include + namespace cudf::io { /** @@ -41,4 +43,23 @@ rmm::host_async_resource_ref set_host_memory_resource(rmm::host_async_resource_r */ rmm::host_async_resource_ref get_host_memory_resource(); +/** + * @brief Options to configure the default host memory resource + */ +struct host_mr_options { + std::optional pool_size; ///< The size of the pool to use for the default host memory + ///< resource. If not set, the default pool size is used. +}; + +/** + * @brief Configure the size of the default host memory resource. + * + * @throws cudf::logic_error if called after the default host memory resource has been created + * + * @param opts Options to configure the default host memory resource + * @return True if this call successfully configured the host memory resource, false if a + * a resource was already configured. + */ +bool config_default_host_memory_resource(host_mr_options const& opts); + } // namespace cudf::io diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 5cc9ea81f29..8140f8897b7 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -57,10 +58,10 @@ class orc_reader_options { // List of individual stripes to read (ignored if empty) std::vector> _stripes; - // Rows to skip from the start; ORC stores the number of rows as uint64_t - uint64_t _skip_rows = 0; + // Rows to skip from the start + int64_t _skip_rows = 0; // Rows to read; `nullopt` is all - std::optional _num_rows; + std::optional _num_rows; // Whether to use row index to speed-up reading bool _use_index = true; @@ -124,7 +125,7 @@ class orc_reader_options { * * @return Number of rows to skip from the start */ - uint64_t get_skip_rows() const { return _skip_rows; } + int64_t get_skip_rows() const { return _skip_rows; } /** * @brief Returns number of row to read. @@ -132,7 +133,7 @@ class orc_reader_options { * @return Number of rows to read; `nullopt` if the option hasn't been set (in which case the file * is read until the end) */ - std::optional const& get_num_rows() const { return _num_rows; } + std::optional const& get_num_rows() const { return _num_rows; } /** * @brief Whether to use row index to speed-up reading. @@ -197,10 +198,10 @@ class orc_reader_options { * @throw cudf::logic_error if a negative value is passed * @throw cudf::logic_error if stripes have been previously set */ - void set_skip_rows(uint64_t rows) + void set_skip_rows(int64_t rows) { + CUDF_EXPECTS(rows >= 0, "skip_rows cannot be negative"); CUDF_EXPECTS(rows == 0 or _stripes.empty(), "Can't set both skip_rows along with stripes"); - CUDF_EXPECTS(rows <= std::numeric_limits::max(), "skip_rows is too large"); _skip_rows = rows; } @@ -212,7 +213,7 @@ class orc_reader_options { * @throw cudf::logic_error if a negative value is passed * @throw cudf::logic_error if stripes have been previously set */ - void set_num_rows(size_type nrows) + void set_num_rows(int64_t nrows) { CUDF_EXPECTS(nrows >= 0, "num_rows cannot be negative"); CUDF_EXPECTS(_stripes.empty(), "Can't set both num_rows and stripes"); @@ -270,7 +271,7 @@ class orc_reader_options_builder { * * @param src The source information used to read orc file */ - explicit orc_reader_options_builder(source_info src) : options{std::move(src)} {}; + explicit orc_reader_options_builder(source_info src) : options{std::move(src)} {} /** * @brief Sets names of the column to read. @@ -302,7 +303,7 @@ class orc_reader_options_builder { * @param rows Number of rows * @return this for chaining */ - orc_reader_options_builder& skip_rows(uint64_t rows) + orc_reader_options_builder& skip_rows(int64_t rows) { options.set_skip_rows(rows); return *this; @@ -314,7 +315,7 @@ class orc_reader_options_builder { * @param nrows Number of rows * @return this for chaining */ - orc_reader_options_builder& num_rows(size_type nrows) + orc_reader_options_builder& num_rows(int64_t nrows) { options.set_num_rows(nrows); return *this; @@ -402,8 +403,146 @@ class orc_reader_options_builder { */ table_with_metadata read_orc( orc_reader_options const& options, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + +/** + * @brief The chunked orc reader class to read an ORC file iteratively into a series of + * tables, chunk by chunk. + * + * This class is designed to address the reading issue when reading very large ORC files such + * that sizes of their columns exceed the limit that can be stored in cudf columns. By reading the + * file content by chunks using this class, each chunk is guaranteed to have its size stay within + * the given limit. + */ +class chunked_orc_reader { + public: + /** + * @brief Default constructor, this should never be used. + * + * This is added just to satisfy cython. + */ + chunked_orc_reader() = default; + + /** + * @brief Construct the reader from input/output size limits, output row granularity, along with + * other ORC reader options. + * + * The typical usage should be similar to this: + * ``` + * do { + * auto const chunk = reader.read_chunk(); + * // Process chunk + * } while (reader.has_next()); + * + * ``` + * + * If `chunk_read_limit == 0` (i.e., no output limit) and `pass_read_limit == 0` (no temporary + * memory size limit), a call to `read_chunk()` will read the whole data source and return a table + * containing all rows. + * + * The `chunk_read_limit` parameter controls the size of the output table to be returned per + * `read_chunk()` call. If the user specifies a 100 MB limit, the reader will attempt to return + * tables that have a total bytes size (over all columns) of 100 MB or less. + * This is a soft limit and the code will not fail if it cannot satisfy the limit. + * + * The `pass_read_limit` parameter controls how much temporary memory is used in the entire + * process of loading, decompressing and decoding of data. Again, this is also a soft limit and + * the reader will try to make the best effort. + * + * Finally, the parameter `output_row_granularity` controls the changes in row number of the + * output chunk. For each call to `read_chunk()`, with respect to the given `pass_read_limit`, a + * subset of stripes may be loaded, decompressed and decoded into an intermediate table. The + * reader will then subdivide that table into smaller tables for final output using + * `output_row_granularity` as the subdivision step. + * + * @param chunk_read_limit Limit on total number of bytes to be returned per `read_chunk()` call, + * or `0` if there is no limit + * @param pass_read_limit Limit on temporary memory usage for reading the data sources, + * or `0` if there is no limit + * @param output_row_granularity The granularity parameter used for subdividing the decoded + * table for final output + * @param options Settings for controlling reading behaviors + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation + * + * @throw cudf::logic_error if `output_row_granularity` is non-positive + */ + explicit chunked_orc_reader( + std::size_t chunk_read_limit, + std::size_t pass_read_limit, + size_type output_row_granularity, + orc_reader_options const& options, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + + /** + * @brief Construct the reader from input/output size limits along with other ORC reader options. + * + * This constructor implicitly call the other constructor with `output_row_granularity` set to + * `DEFAULT_OUTPUT_ROW_GRANULARITY` rows. + * + * @param chunk_read_limit Limit on total number of bytes to be returned per `read_chunk()` call, + * or `0` if there is no limit + * @param pass_read_limit Limit on temporary memory usage for reading the data sources, + * or `0` if there is no limit + * @param options Settings for controlling reading behaviors + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation + */ + explicit chunked_orc_reader( + std::size_t chunk_read_limit, + std::size_t pass_read_limit, + orc_reader_options const& options, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + + /** + * @brief Construct the reader from output size limits along with other ORC reader options. + * + * This constructor implicitly call the other constructor with `pass_read_limit` set to `0` and + * `output_row_granularity` set to `DEFAULT_OUTPUT_ROW_GRANULARITY` rows. + * + * @param chunk_read_limit Limit on total number of bytes to be returned per `read_chunk()` call, + * or `0` if there is no limit + * @param options Settings for controlling reading behaviors + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation + */ + explicit chunked_orc_reader( + std::size_t chunk_read_limit, + orc_reader_options const& options, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + + /** + * @brief Destructor, destroying the internal reader instance. + */ + ~chunked_orc_reader(); + + /** + * @brief Check if there is any data in the given data sources has not yet read. + * + * @return A boolean value indicating if there is any data left to read + */ + [[nodiscard]] bool has_next() const; + + /** + * @brief Read a chunk of rows in the given data sources. + * + * The sequence of returned tables, if concatenated by their order, guarantees to form a complete + * dataset as reading the entire given data sources at once. + * + * An empty table will be returned if the given sources are empty, or all the data has + * been read and returned by the previous calls. + * + * @return An output `cudf::table` along with its metadata + */ + [[nodiscard]] table_with_metadata read_chunk() const; + + private: + std::unique_ptr reader; +}; /** @} */ // end of group /** diff --git a/cpp/include/cudf/io/orc_metadata.hpp b/cpp/include/cudf/io/orc_metadata.hpp index 8f3eb1dff3c..35196a19349 100644 --- a/cpp/include/cudf/io/orc_metadata.hpp +++ b/cpp/include/cudf/io/orc_metadata.hpp @@ -154,6 +154,21 @@ struct timestamp_statistics : minmax_statistics { std::optional maximum_nanos; ///< nanoseconds part of the maximum }; +/** + * @brief Variant type for ORC type-specific column statistics. + * + * The variant can hold any of the supported column statistics types. + */ +using statistics_type = std::variant; + //! Orc I/O interfaces namespace orc { // forward declare the type that ProtobufReader uses. The `cudf::io::column_statistics` objects, @@ -171,16 +186,7 @@ struct column_statistics; struct column_statistics { std::optional number_of_values; ///< number of statistics std::optional has_null; ///< column has any nulls - std::variant - type_specific_stats; ///< type-specific statistics + statistics_type type_specific_stats; ///< type-specific statistics /** * @brief Construct a new column statistics object diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index dc035db8d39..b2f949cdcee 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -70,6 +71,8 @@ class parquet_reader_options { bool _convert_strings_to_categories = false; // Whether to use PANDAS metadata to load columns bool _use_pandas_metadata = true; + // Whether to read and use ARROW schema + bool _use_arrow_schema = true; // Cast timestamp columns to a specific type data_type _timestamp_type{type_id::EMPTY}; @@ -125,6 +128,13 @@ class parquet_reader_options { */ [[nodiscard]] bool is_enabled_use_pandas_metadata() const { return _use_pandas_metadata; } + /** + * @brief Returns true/false depending whether to use arrow schema while reading. + * + * @return `true` if arrow schema is used while reading + */ + [[nodiscard]] bool is_enabled_use_arrow_schema() const { return _use_arrow_schema; } + /** * @brief Returns optional tree of metadata. * @@ -195,6 +205,31 @@ class parquet_reader_options { /** * @brief Sets AST based filter for predicate pushdown. * + * The filter can utilize cudf::ast::column_name_reference to reference a column by its name, + * even if it's not necessarily present in the requested projected columns. + * To refer to output column indices, you can use cudf::ast::column_reference. + * + * For a parquet with columns ["A", "B", "C", ... "X", "Y", "Z"], + * Example 1: with/without column projection + * @code + * use_columns({"A", "X", "Z"}) + * .filter(operation(ast_operator::LESS, column_name_reference{"C"}, literal{100})); + * @endcode + * Column "C" need not be present in output table. + * Example 2: without column projection + * @code + * filter(operation(ast_operator::LESS, column_reference{1}, literal{100})); + * @endcode + * Here, `1` will refer to column "B" because output will contain all columns in + * order ["A", ..., "Z"]. + * Example 3: with column projection + * @code + * use_columns({"A", "Z", "X"}) + * .filter(operation(ast_operator::LESS, column_reference{1}, literal{100})); + * @endcode + * Here, `1` will refer to column "Z" because output will contain 3 columns in + * order ["A", "Z", "X"]. + * * @param filter AST expression to use as filter */ void set_filter(ast::expression const& filter) { _filter = filter; } @@ -213,6 +248,13 @@ class parquet_reader_options { */ void enable_use_pandas_metadata(bool val) { _use_pandas_metadata = val; } + /** + * @brief Sets to enable/disable use of arrow schema to read. + * + * @param val Boolean value whether to use arrow schema + */ + void enable_use_arrow_schema(bool val) { _use_arrow_schema = val; } + /** * @brief Sets reader column schema. * @@ -292,9 +334,7 @@ class parquet_reader_options_builder { } /** - * @brief Sets vector of individual row groups to read. - * - * @param filter Vector of row groups to read + * @copydoc parquet_reader_options::set_filter * @return this for chaining */ parquet_reader_options_builder& filter(ast::expression const& filter) @@ -327,6 +367,18 @@ class parquet_reader_options_builder { return *this; } + /** + * @brief Sets to enable/disable use of arrow schema to read. + * + * @param val Boolean value whether to use arrow schema + * @return this for chaining + */ + parquet_reader_options_builder& use_arrow_schema(bool val) + { + options._use_arrow_schema = val; + return *this; + } + /** * @brief Sets reader metadata. * @@ -409,8 +461,8 @@ class parquet_reader_options_builder { */ table_with_metadata read_parquet( parquet_reader_options const& options, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief The chunked parquet reader class to read Parquet file iteratively in to a series of @@ -446,8 +498,8 @@ class chunked_parquet_reader { chunked_parquet_reader( std::size_t chunk_read_limit, parquet_reader_options const& options, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Constructor for chunked reader. @@ -472,8 +524,8 @@ class chunked_parquet_reader { std::size_t chunk_read_limit, std::size_t pass_read_limit, parquet_reader_options const& options, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Destructor, destroying the internal reader instance. @@ -515,6 +567,15 @@ class chunked_parquet_reader { * @file */ +/** + * @brief Struct used to describe column sorting metadata + */ +struct sorting_column { + int column_idx{}; //!< leaf column index within the row group + bool is_descending{false}; //!< true if sort order is descending + bool is_nulls_first{true}; //!< true if nulls come before non-null values +}; + class parquet_writer_options_builder; /** @@ -554,7 +615,7 @@ class parquet_writer_options { // Maximum size of min or max values in column index int32_t _column_index_truncate_length = default_column_index_truncate_length; // When to use dictionary encoding for data - dictionary_policy _dictionary_policy = dictionary_policy::ALWAYS; + dictionary_policy _dictionary_policy = dictionary_policy::ADAPTIVE; // Maximum size of column chunk dictionary (in bytes) size_t _max_dictionary_size = default_max_dictionary_size; // Maximum number of rows in a page fragment @@ -563,6 +624,8 @@ class parquet_writer_options { std::shared_ptr _compression_stats; // write V2 page headers? bool _v2_page_headers = false; + // Which columns in _table are used for sorting + std::optional> _sorting_columns; /** * @brief Constructor from sink and table. @@ -761,6 +824,13 @@ class parquet_writer_options { */ [[nodiscard]] auto is_enabled_write_v2_headers() const { return _v2_page_headers; } + /** + * @brief Returns the sorting_columns. + * + * @return Column sort order metadata + */ + [[nodiscard]] auto const& get_sorting_columns() const { return _sorting_columns; } + /** * @brief Sets partitions. * @@ -892,6 +962,16 @@ class parquet_writer_options { * @param val Boolean value to enable/disable writing of V2 page headers. */ void enable_write_v2_headers(bool val) { _v2_page_headers = val; } + + /** + * @brief Sets sorting columns. + * + * @param sorting_columns Column sort order metadata + */ + void set_sorting_columns(std::vector sorting_columns) + { + _sorting_columns = std::move(sorting_columns); + } }; /** @@ -1066,7 +1146,7 @@ class parquet_writer_options_builder { * dictionary_policy::ALWAYS will allow the use of dictionary encoding even if it will result in * the disabling of compression for columns that would otherwise be compressed. * - * The default value is dictionary_policy::ALWAYS. + * The default value is dictionary_policy::ADAPTIVE. * * @param val policy for dictionary use * @return this for chaining @@ -1143,6 +1223,14 @@ class parquet_writer_options_builder { */ parquet_writer_options_builder& write_v2_headers(bool enabled); + /** + * @brief Sets column sorting metadata to chunked_parquet_writer_options. + * + * @param sorting_columns Column sort order metadata + * @return this for chaining + */ + parquet_writer_options_builder& sorting_columns(std::vector sorting_columns); + /** * @brief move parquet_writer_options member once it's built. */ @@ -1221,7 +1309,7 @@ class chunked_parquet_writer_options { // Maximum size of min or max values in column index int32_t _column_index_truncate_length = default_column_index_truncate_length; // When to use dictionary encoding for data - dictionary_policy _dictionary_policy = dictionary_policy::ALWAYS; + dictionary_policy _dictionary_policy = dictionary_policy::ADAPTIVE; // Maximum size of column chunk dictionary (in bytes) size_t _max_dictionary_size = default_max_dictionary_size; // Maximum number of rows in a page fragment @@ -1230,6 +1318,8 @@ class chunked_parquet_writer_options { std::shared_ptr _compression_stats; // write V2 page headers? bool _v2_page_headers = false; + // Which columns in _table are used for sorting + std::optional> _sorting_columns; /** * @brief Constructor from sink. @@ -1384,6 +1474,13 @@ class chunked_parquet_writer_options { */ [[nodiscard]] auto is_enabled_write_v2_headers() const { return _v2_page_headers; } + /** + * @brief Returns the sorting_columns. + * + * @return Column sort order metadata + */ + [[nodiscard]] auto const& get_sorting_columns() const { return _sorting_columns; } + /** * @brief Sets metadata. * @@ -1501,6 +1598,16 @@ class chunked_parquet_writer_options { */ void enable_write_v2_headers(bool val) { _v2_page_headers = val; } + /** + * @brief Sets sorting columns. + * + * @param sorting_columns Column sort order metadata + */ + void set_sorting_columns(std::vector sorting_columns) + { + _sorting_columns = std::move(sorting_columns); + } + /** * @brief creates builder to build chunked_parquet_writer_options. * @@ -1695,7 +1802,7 @@ class chunked_parquet_writer_options_builder { * dictionary_policy::ALWAYS will allow the use of dictionary encoding even if it will result in * the disabling of compression for columns that would otherwise be compressed. * - * The default value is dictionary_policy::ALWAYS. + * The default value is dictionary_policy::ADAPTIVE. * * @param val policy for dictionary use * @return this for chaining @@ -1740,6 +1847,15 @@ class chunked_parquet_writer_options_builder { return *this; } + /** + * @brief Sets column sorting metadata to chunked_parquet_writer_options. + * + * @param sorting_columns Column sort order metadata + * @return this for chaining + */ + chunked_parquet_writer_options_builder& sorting_columns( + std::vector sorting_columns); + /** * @brief move chunked_parquet_writer_options member once it's built. */ diff --git a/cpp/include/cudf/io/parquet_metadata.hpp b/cpp/include/cudf/io/parquet_metadata.hpp index 3149b5b5945..e0c406c180c 100644 --- a/cpp/include/cudf/io/parquet_metadata.hpp +++ b/cpp/include/cudf/io/parquet_metadata.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,6 +59,13 @@ enum class TypeKind : int8_t { */ struct parquet_column_schema { public: + /** + * @brief Default constructor. + * + * This has been added since Cython requires a default constructor to create objects on stack. + */ + explicit parquet_column_schema() = default; + /** * @brief constructor * @@ -134,6 +141,13 @@ struct parquet_column_schema { */ struct parquet_schema { public: + /** + * @brief Default constructor. + * + * This has been added since Cython requires a default constructor to create objects on stack. + */ + explicit parquet_schema() = default; + /** * @brief constructor * @@ -165,6 +179,15 @@ class parquet_metadata { public: /// Key-value metadata in the file footer. using key_value_metadata = std::unordered_map; + /// row group metadata from each RowGroup element. + using row_group_metadata = std::unordered_map; + + /** + * @brief Default constructor. + * + * This has been added since Cython requires a default constructor to create objects on stack. + */ + explicit parquet_metadata() = default; /** * @brief constructor @@ -173,15 +196,18 @@ class parquet_metadata { * @param num_rows number of rows * @param num_rowgroups number of row groups * @param file_metadata key-value metadata in the file footer + * @param rg_metadata vector of maps containing metadata for each row group */ parquet_metadata(parquet_schema schema, int64_t num_rows, size_type num_rowgroups, - key_value_metadata file_metadata) + key_value_metadata file_metadata, + std::vector rg_metadata) : _schema{std::move(schema)}, _num_rows{num_rows}, _num_rowgroups{num_rowgroups}, - _file_metadata{std::move(file_metadata)} + _file_metadata{std::move(file_metadata)}, + _rowgroup_metadata{std::move(rg_metadata)} { } @@ -207,6 +233,7 @@ class parquet_metadata { * @return Number of row groups */ [[nodiscard]] auto num_rowgroups() const { return _num_rowgroups; } + /** * @brief Returns the Key value metadata in the file footer. * @@ -214,11 +241,19 @@ class parquet_metadata { */ [[nodiscard]] auto const& metadata() const { return _file_metadata; } + /** + * @brief Returns the row group metadata in the file footer. + * + * @return vector of row group metadata as maps + */ + [[nodiscard]] auto const& rowgroup_metadata() const { return _rowgroup_metadata; } + private: parquet_schema _schema; int64_t _num_rows; size_type _num_rowgroups; key_value_metadata _file_metadata; + std::vector _rowgroup_metadata; }; /** diff --git a/cpp/include/cudf/io/text/detail/tile_state.hpp b/cpp/include/cudf/io/text/detail/tile_state.hpp index d42624aa9b7..aa9185b4983 100644 --- a/cpp/include/cudf/io/text/detail/tile_state.hpp +++ b/cpp/include/cudf/io/text/detail/tile_state.hpp @@ -16,6 +16,8 @@ #pragma once +#include + #include #include @@ -81,7 +83,7 @@ struct scan_tile_state { scan_tile_state(cudf::size_type num_tiles, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) : tile_status(rmm::device_uvector>( num_tiles, stream, mr)), tile_state_partial(rmm::device_uvector(num_tiles, stream, mr)), diff --git a/cpp/include/cudf/io/text/detail/trie.hpp b/cpp/include/cudf/io/text/detail/trie.hpp index 7bb2e4e2ece..e0b9c7635e3 100644 --- a/cpp/include/cudf/io/text/detail/trie.hpp +++ b/cpp/include/cudf/io/text/detail/trie.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -165,7 +166,7 @@ struct trie { */ static trie create(std::string const& pattern, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return create(std::vector{pattern}, stream, mr); @@ -181,7 +182,7 @@ struct trie { */ static trie create(std::vector const& patterns, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { std::vector tokens; std::vector transitions; diff --git a/cpp/include/cudf/io/text/multibyte_split.hpp b/cpp/include/cudf/io/text/multibyte_split.hpp index a7edc9be0e4..7abae7c754b 100644 --- a/cpp/include/cudf/io/text/multibyte_split.hpp +++ b/cpp/include/cudf/io/text/multibyte_split.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -85,18 +86,18 @@ struct parse_options { std::unique_ptr multibyte_split( data_chunk_source const& source, std::string const& delimiter, - parse_options options = {}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + parse_options options = {}, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); std::unique_ptr multibyte_split( data_chunk_source const& source, std::string const& delimiter, std::optional byte_range, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); std::unique_ptr multibyte_split(data_chunk_source const& source, std::string const& delimiter, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace text } // namespace io diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp index 64d627483e6..150e997f533 100644 --- a/cpp/include/cudf/io/types.hpp +++ b/cpp/include/cudf/io/types.hpp @@ -113,6 +113,7 @@ enum class column_encoding { ///< valid for BYTE_ARRAY columns) DELTA_BYTE_ARRAY, ///< Use DELTA_BYTE_ARRAY encoding (only valid for ///< BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY columns) + BYTE_STREAM_SPLIT, ///< Use BYTE_STREAM_SPLIT encoding (valid for all fixed width types) // ORC encodings: DIRECT, ///< Use DIRECT encoding DIRECT_V2, ///< Use DIRECT_V2 encoding @@ -235,6 +236,8 @@ enum dictionary_policy { struct column_name_info { std::string name; ///< Column name std::optional is_nullable; ///< Column nullability + std::optional is_binary; ///< Column is binary (i.e. not a list) + std::optional type_length; ///< Byte width of data (for fixed length data) std::vector children; ///< Child column names /** @@ -242,9 +245,12 @@ struct column_name_info { * * @param _name Column name * @param _is_nullable True if column is nullable + * @param _is_binary True if column is binary data */ - column_name_info(std::string const& _name, std::optional _is_nullable = std::nullopt) - : name(_name), is_nullable(_is_nullable) + column_name_info(std::string const& _name, + std::optional _is_nullable = std::nullopt, + std::optional _is_binary = std::nullopt) + : name(_name), is_nullable(_is_nullable), is_binary(_is_binary) { } @@ -602,8 +608,10 @@ class column_in_metadata { bool _list_column_is_map = false; bool _use_int96_timestamp = false; bool _output_as_binary = false; + bool _skip_compression = false; std::optional _decimal_precision; std::optional _parquet_field_id; + std::optional _type_length; std::vector children; column_encoding _encoding = column_encoding::USE_DEFAULT; @@ -691,6 +699,19 @@ class column_in_metadata { return *this; } + /** + * @brief Set the data length of the column. Only valid if this column is a + * fixed-length byte array. + * + * @param length The data length to set for this column + * @return this for chaining + */ + column_in_metadata& set_type_length(int32_t length) noexcept + { + _type_length = length; + return *this; + } + /** * @brief Set the parquet field id of this column. * @@ -722,6 +743,19 @@ class column_in_metadata { return *this; } + /** + * @brief Specifies whether this column should not be compressed regardless of the compression + * codec specified for the file. + * + * @param skip If `true` do not compress this column + * @return this for chaining + */ + column_in_metadata& set_skip_compression(bool skip) noexcept + { + _skip_compression = skip; + return *this; + } + /** * @brief Sets the encoding to use for this column. * @@ -811,6 +845,22 @@ class column_in_metadata { */ [[nodiscard]] uint8_t get_decimal_precision() const { return _decimal_precision.value(); } + /** + * @brief Get whether type length has been set for this column + * + * @return Boolean indicating whether type length has been set for this column + */ + [[nodiscard]] bool is_type_length_set() const noexcept { return _type_length.has_value(); } + + /** + * @brief Get the type length that was set for this column. + * + * @throws std::bad_optional_access If type length was not set for this + * column. Check using `is_type_length_set()` first. + * @return The decimal precision that was set for this column + */ + [[nodiscard]] uint8_t get_type_length() const { return _type_length.value(); } + /** * @brief Get whether parquet field id has been set for this column. * @@ -844,6 +894,13 @@ class column_in_metadata { */ [[nodiscard]] bool is_enabled_output_as_binary() const noexcept { return _output_as_binary; } + /** + * @brief Get whether to skip compressing this column + * + * @return Boolean indicating whether to skip compression of this column + */ + [[nodiscard]] bool is_enabled_skip_compression() const noexcept { return _skip_compression; } + /** * @brief Get the encoding that was set for this column. * @@ -910,6 +967,7 @@ struct partition_info { class reader_column_schema { // Whether to read binary data as a string column bool _convert_binary_to_strings{true}; + int32_t _type_length{0}; std::vector children; @@ -975,6 +1033,18 @@ class reader_column_schema { return *this; } + /** + * @brief Sets the length of fixed length data. + * + * @param type_length Size of the data type in bytes + * @return this for chaining + */ + reader_column_schema& set_type_length(int32_t type_length) + { + _type_length = type_length; + return *this; + } + /** * @brief Get whether to encode this column as binary or string data * @@ -985,6 +1055,13 @@ class reader_column_schema { return _convert_binary_to_strings; } + /** + * @brief Get the length in bytes of this fixed length data. + * + * @return The length in bytes of the data type + */ + [[nodiscard]] int32_t get_type_length() const { return _type_length; } + /** * @brief Get the number of child objects * diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index b7a3129cfec..825f758adbd 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -95,8 +96,8 @@ std::pair>, std::unique_ptr>> inner_join(cudf::table_view const& left_keys, cudf::table_view const& right_keys, - null_equality compare_nulls = null_equality::EQUAL, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + null_equality compare_nulls = null_equality::EQUAL, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a pair of row index vectors corresponding to a @@ -135,8 +136,8 @@ std::pair>, std::unique_ptr>> left_join(cudf::table_view const& left_keys, cudf::table_view const& right_keys, - null_equality compare_nulls = null_equality::EQUAL, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + null_equality compare_nulls = null_equality::EQUAL, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a pair of row index vectors corresponding to a @@ -174,8 +175,8 @@ std::pair>, std::unique_ptr>> full_join(cudf::table_view const& left_keys, cudf::table_view const& right_keys, - null_equality compare_nulls = null_equality::EQUAL, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + null_equality compare_nulls = null_equality::EQUAL, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a vector of row indices corresponding to a left semi-join @@ -202,8 +203,8 @@ full_join(cudf::table_view const& left_keys, std::unique_ptr> left_semi_join( cudf::table_view const& left_keys, cudf::table_view const& right_keys, - null_equality compare_nulls = null_equality::EQUAL, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + null_equality compare_nulls = null_equality::EQUAL, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a vector of row indices corresponding to a left anti join @@ -233,8 +234,8 @@ std::unique_ptr> left_semi_join( std::unique_ptr> left_anti_join( cudf::table_view const& left_keys, cudf::table_view const& right_keys, - null_equality compare_nulls = null_equality::EQUAL, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + null_equality compare_nulls = null_equality::EQUAL, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Performs a cross join on two tables (`left`, `right`) @@ -261,7 +262,7 @@ std::unique_ptr> left_anti_join( std::unique_ptr cross_join( cudf::table_view const& left, cudf::table_view const& right, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief The enum class to specify if any of the input join tables (`build` table and any later @@ -340,7 +341,7 @@ class hash_join { inner_join(cudf::table_view const& probe, std::optional output_size = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) const; /** * Returns the row indices that can be used to construct the result of performing @@ -365,7 +366,7 @@ class hash_join { left_join(cudf::table_view const& probe, std::optional output_size = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) const; /** * Returns the row indices that can be used to construct the result of performing @@ -390,7 +391,7 @@ class hash_join { full_join(cudf::table_view const& probe, std::optional output_size = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) const; /** * Returns the exact number of matches (rows) when performing an inner join with the specified @@ -441,8 +442,8 @@ class hash_join { */ std::size_t full_join_size( cudf::table_view const& probe, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) const; private: const std::unique_ptr _impl; @@ -497,8 +498,8 @@ class distinct_hash_join { */ std::pair>, std::unique_ptr>> - inner_join(rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; + inner_join(rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) const; /** * @brief Returns the build table indices that can be used to construct the result of performing @@ -515,8 +516,8 @@ class distinct_hash_join { * join between two tables with `build` and `probe` as the join keys. */ std::unique_ptr> left_join( - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) const; private: using impl_type = typename cudf::detail::distinct_hash_join; ///< Implementation type @@ -561,12 +562,11 @@ class distinct_hash_join { */ std::pair>, std::unique_ptr>> -conditional_inner_join( - table_view const& left, - table_view const& right, - ast::expression const& binary_predicate, - std::optional output_size = {}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +conditional_inner_join(table_view const& left, + table_view const& right, + ast::expression const& binary_predicate, + std::optional output_size = {}, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a pair of row index vectors corresponding to all pairs @@ -611,7 +611,7 @@ conditional_left_join(table_view const& left, table_view const& right, ast::expression const& binary_predicate, std::optional output_size = {}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a pair of row index vectors corresponding to all pairs @@ -653,7 +653,7 @@ std::pair>, conditional_full_join(table_view const& left, table_view const& right, ast::expression const& binary_predicate, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns an index vector corresponding to all rows in the left table @@ -692,7 +692,7 @@ std::unique_ptr> conditional_left_semi_join( table_view const& right, ast::expression const& binary_predicate, std::optional output_size = {}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns an index vector corresponding to all rows in the left table @@ -731,7 +731,7 @@ std::unique_ptr> conditional_left_anti_join( table_view const& right, ast::expression const& binary_predicate, std::optional output_size = {}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a pair of row index vectors corresponding to all pairs of @@ -789,7 +789,7 @@ mixed_inner_join( ast::expression const& binary_predicate, null_equality compare_nulls = null_equality::EQUAL, std::optional>> output_size_data = {}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a pair of row index vectors corresponding to all pairs of @@ -849,7 +849,7 @@ mixed_left_join( ast::expression const& binary_predicate, null_equality compare_nulls = null_equality::EQUAL, std::optional>> output_size_data = {}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a pair of row index vectors corresponding to all pairs of @@ -909,7 +909,7 @@ mixed_full_join( ast::expression const& binary_predicate, null_equality compare_nulls = null_equality::EQUAL, std::optional>> output_size_data = {}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns an index vector corresponding to all rows in the left tables @@ -944,9 +944,6 @@ mixed_full_join( * @param right_conditional The right table used for the conditional join * @param binary_predicate The condition on which to join * @param compare_nulls Whether or not null values join to each other or not - * @param output_size_data An optional pair of values indicating the exact output size and the - * number of matches for each row in the larger of the two input tables, left or right (may be - * precomputed using the corresponding mixed_full_join_size API). * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct @@ -958,9 +955,8 @@ std::unique_ptr> mixed_left_semi_join( table_view const& left_conditional, table_view const& right_conditional, ast::expression const& binary_predicate, - null_equality compare_nulls = null_equality::EQUAL, - std::optional>> output_size_data = {}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + null_equality compare_nulls = null_equality::EQUAL, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns an index vector corresponding to all rows in the left tables @@ -996,9 +992,6 @@ std::unique_ptr> mixed_left_semi_join( * @param right_conditional The right table used for the conditional join * @param binary_predicate The condition on which to join * @param compare_nulls Whether or not null values join to each other or not - * @param output_size_data An optional pair of values indicating the exact output size and the - * number of matches for each row in the larger of the two input tables, left or right (may be - * precomputed using the corresponding mixed_full_join_size API). * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct @@ -1010,9 +1003,8 @@ std::unique_ptr> mixed_left_anti_join( table_view const& left_conditional, table_view const& right_conditional, ast::expression const& binary_predicate, - null_equality compare_nulls = null_equality::EQUAL, - std::optional>> output_size_data = {}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + null_equality compare_nulls = null_equality::EQUAL, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the exact number of matches (rows) when performing a @@ -1051,8 +1043,8 @@ std::pair>> mixed_in table_view const& left_conditional, table_view const& right_conditional, ast::expression const& binary_predicate, - null_equality compare_nulls = null_equality::EQUAL, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + null_equality compare_nulls = null_equality::EQUAL, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the exact number of matches (rows) when performing a @@ -1091,86 +1083,8 @@ std::pair>> mixed_le table_view const& left_conditional, table_view const& right_conditional, ast::expression const& binary_predicate, - null_equality compare_nulls = null_equality::EQUAL, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Returns the exact number of matches (rows) when performing a mixed - * left semi join between the specified tables where the columns of the - * equality table are equal and the predicate evaluates to true on the - * conditional tables. - * - * If the provided predicate returns NULL for a pair of rows (left, right), - * that pair is not included in the output. It is the user's responsibility to - * choose a suitable compare_nulls value AND use appropriate null-safe - * operators in the expression. - * - * @throw cudf::logic_error If the binary predicate outputs a non-boolean result. - * @throw cudf::logic_error If the number of rows in left_equality and left_conditional do not - * match. - * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not - * match. - * - * @param left_equality The left table used for the equality join - * @param right_equality The right table used for the equality join - * @param left_conditional The left table used for the conditional join - * @param right_conditional The right table used for the conditional join - * @param binary_predicate The condition on which to join - * @param compare_nulls Whether or not null values join to each other or not - * @param mr Device memory resource used to allocate the returned table and columns' device memory - * - * @return A pair containing the size that would result from performing the - * requested join and the number of matches for each row in one of the two - * tables. Which of the two tables is an implementation detail and should not - * be relied upon, simply passed to the corresponding `mixed_left_join` API as - * is. - */ -std::pair>> mixed_left_semi_join_size( - table_view const& left_equality, - table_view const& right_equality, - table_view const& left_conditional, - table_view const& right_conditional, - ast::expression const& binary_predicate, - null_equality compare_nulls = null_equality::EQUAL, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Returns the exact number of matches (rows) when performing a mixed - * left anti join between the specified tables. - * - * If the provided predicate returns NULL for a pair of rows (left, right), - * that pair is not included in the output. It is the user's responsibility to - * choose a suitable compare_nulls value AND use appropriate null-safe - * operators in the expression. - * - * @throw cudf::logic_error If the binary predicate outputs a non-boolean result. - * @throw cudf::logic_error If the number of rows in left_equality and left_conditional do not - * match. - * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not - * match. - * - * @param left_equality The left table used for the equality join - * @param right_equality The right table used for the equality join - * @param left_conditional The left table used for the conditional join - * @param right_conditional The right table used for the conditional join - * @param binary_predicate The condition on which to join - * @param compare_nulls Whether or not null values join to each other or not - * @param mr Device memory resource used to allocate the returned table and columns' device memory - * - * @return A pair containing the size that would result from performing the - * requested join and the number of matches for each row in one of the two - * tables. Which of the two tables is an implementation detail and should not - * be relied upon, simply passed to the corresponding `mixed_left_join` API as - * is. - */ -std::pair>> mixed_left_anti_join_size( - table_view const& left_equality, - table_view const& right_equality, - table_view const& left_conditional, - table_view const& right_conditional, - ast::expression const& binary_predicate, - null_equality compare_nulls = null_equality::EQUAL, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + null_equality compare_nulls = null_equality::EQUAL, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the exact number of matches (rows) when performing a @@ -1193,7 +1107,7 @@ std::size_t conditional_inner_join_size( table_view const& left, table_view const& right, ast::expression const& binary_predicate, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the exact number of matches (rows) when performing a @@ -1216,7 +1130,7 @@ std::size_t conditional_left_join_size( table_view const& left, table_view const& right, ast::expression const& binary_predicate, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the exact number of matches (rows) when performing a @@ -1239,7 +1153,7 @@ std::size_t conditional_left_semi_join_size( table_view const& left, table_view const& right, ast::expression const& binary_predicate, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the exact number of matches (rows) when performing a @@ -1262,6 +1176,6 @@ std::size_t conditional_left_anti_join_size( table_view const& left, table_view const& right, ast::expression const& binary_predicate, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/json/json.hpp b/cpp/include/cudf/json/json.hpp index 944e0c26dd6..385e8e54bdc 100644 --- a/cpp/include/cudf/json/json.hpp +++ b/cpp/include/cudf/json/json.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include #include @@ -167,9 +168,9 @@ class get_json_object_options { std::unique_ptr get_json_object( cudf::strings_column_view const& col, cudf::string_scalar const& json_path, - get_json_object_options options = get_json_object_options{}, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + get_json_object_options options = get_json_object_options{}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace cudf diff --git a/cpp/include/cudf/labeling/label_bins.hpp b/cpp/include/cudf/labeling/label_bins.hpp index d8ea262dfe1..9091e31a9ea 100644 --- a/cpp/include/cudf/labeling/label_bins.hpp +++ b/cpp/include/cudf/labeling/label_bins.hpp @@ -22,6 +22,7 @@ #include #include +#include namespace cudf { @@ -74,8 +75,8 @@ std::unique_ptr label_bins( inclusive left_inclusive, column_view const& right_edges, inclusive right_inclusive, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/lists/combine.hpp b/cpp/include/cudf/lists/combine.hpp index 0d9c1c157eb..853562acfff 100644 --- a/cpp/include/cudf/lists/combine.hpp +++ b/cpp/include/cudf/lists/combine.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { @@ -66,7 +67,7 @@ std::unique_ptr concatenate_rows( table_view const& input, concatenate_null_policy null_policy = concatenate_null_policy::IGNORE, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Concatenating multiple lists on the same row of a lists column into a single list. @@ -97,7 +98,7 @@ std::unique_ptr concatenate_list_elements( column_view const& input, concatenate_null_policy null_policy = concatenate_null_policy::IGNORE, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace lists diff --git a/cpp/include/cudf/lists/contains.hpp b/cpp/include/cudf/lists/contains.hpp index 7cf67ec9205..060882555aa 100644 --- a/cpp/include/cudf/lists/contains.hpp +++ b/cpp/include/cudf/lists/contains.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace lists { @@ -49,8 +50,8 @@ namespace lists { std::unique_ptr contains( cudf::lists_column_view const& lists, cudf::scalar const& search_key, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create a column of `bool` values indicating whether the list rows of the first @@ -73,8 +74,8 @@ std::unique_ptr contains( std::unique_ptr contains( cudf::lists_column_view const& lists, cudf::column_view const& search_keys, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create a column of `bool` values indicating whether each row in the `lists` column @@ -95,8 +96,8 @@ std::unique_ptr contains( */ std::unique_ptr contains_nulls( cudf::lists_column_view const& lists, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Option to choose whether `index_of()` returns the first or last match @@ -138,9 +139,9 @@ enum class duplicate_find_option : int32_t { std::unique_ptr index_of( cudf::lists_column_view const& lists, cudf::scalar const& search_key, - duplicate_find_option find_option = duplicate_find_option::FIND_FIRST, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + duplicate_find_option find_option = duplicate_find_option::FIND_FIRST, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create a column of values indicating the position of a search key @@ -175,9 +176,9 @@ std::unique_ptr index_of( std::unique_ptr index_of( cudf::lists_column_view const& lists, cudf::column_view const& search_keys, - duplicate_find_option find_option = duplicate_find_option::FIND_FIRST, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + duplicate_find_option find_option = duplicate_find_option::FIND_FIRST, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace lists diff --git a/cpp/include/cudf/lists/count_elements.hpp b/cpp/include/cudf/lists/count_elements.hpp index e4bd0dca9ae..2b9f5aa5607 100644 --- a/cpp/include/cudf/lists/count_elements.hpp +++ b/cpp/include/cudf/lists/count_elements.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace lists { @@ -51,8 +52,8 @@ namespace lists { */ std::unique_ptr count_elements( lists_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of lists_elements group diff --git a/cpp/include/cudf/lists/detail/combine.hpp b/cpp/include/cudf/lists/detail/combine.hpp index 4bc45e48a9f..bd4c01bbb4b 100644 --- a/cpp/include/cudf/lists/detail/combine.hpp +++ b/cpp/include/cudf/lists/detail/combine.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,8 @@ #include #include +#include + namespace cudf { namespace lists { namespace detail { @@ -30,7 +32,7 @@ namespace detail { std::unique_ptr concatenate_rows(table_view const& input, concatenate_null_policy null_policy, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::lists::concatenate_list_elements @@ -40,7 +42,7 @@ std::unique_ptr concatenate_rows(table_view const& input, std::unique_ptr concatenate_list_elements(column_view const& input, concatenate_null_policy null_policy, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/lists/detail/concatenate.hpp b/cpp/include/cudf/lists/detail/concatenate.hpp index a1f149d4ccf..d67958ef260 100644 --- a/cpp/include/cudf/lists/detail/concatenate.hpp +++ b/cpp/include/cudf/lists/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include namespace cudf { namespace lists { @@ -45,7 +46,7 @@ namespace detail { */ std::unique_ptr concatenate(host_span columns, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/lists/detail/contains.hpp b/cpp/include/cudf/lists/detail/contains.hpp index 58ec18cb9ef..638cc7afb81 100644 --- a/cpp/include/cudf/lists/detail/contains.hpp +++ b/cpp/include/cudf/lists/detail/contains.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,8 @@ #include #include +#include + namespace cudf { namespace lists { namespace detail { @@ -26,49 +28,49 @@ namespace detail { * @copydoc cudf::lists::index_of(cudf::lists_column_view const&, * cudf::scalar const&, * duplicate_find_option, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr index_of(cudf::lists_column_view const& lists, cudf::scalar const& search_key, cudf::lists::duplicate_find_option find_option, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::lists::index_of(cudf::lists_column_view const&, * cudf::column_view const&, * duplicate_find_option, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr index_of(cudf::lists_column_view const& lists, cudf::column_view const& search_keys, cudf::lists::duplicate_find_option find_option, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::lists::contains(cudf::lists_column_view const&, * cudf::scalar const&, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr contains(cudf::lists_column_view const& lists, cudf::scalar const& search_key, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::lists::contains(cudf::lists_column_view const&, * cudf::column_view const&, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr contains(cudf::lists_column_view const& lists, cudf::column_view const& search_keys, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace lists } // namespace cudf diff --git a/cpp/include/cudf/lists/detail/copying.hpp b/cpp/include/cudf/lists/detail/copying.hpp index 3760294f079..18a70bba5e9 100644 --- a/cpp/include/cudf/lists/detail/copying.hpp +++ b/cpp/include/cudf/lists/detail/copying.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include +#include namespace cudf { namespace lists { @@ -46,7 +47,7 @@ std::unique_ptr copy_slice(lists_column_view const& lists, size_type start, size_type end, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/lists/detail/extract.hpp b/cpp/include/cudf/lists/detail/extract.hpp index 013f9b491dd..6f983d44bc9 100644 --- a/cpp/include/cudf/lists/detail/extract.hpp +++ b/cpp/include/cudf/lists/detail/extract.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,29 +18,31 @@ #include #include +#include + namespace cudf { namespace lists { namespace detail { /** * @copydoc cudf::lists::extract_list_element(lists_column_view, size_type, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_list_element(lists_column_view lists_column, size_type const index, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::lists::extract_list_element(lists_column_view, column_view const&, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_list_element(lists_column_view lists_column, column_view const& indices, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/lists/detail/gather.cuh b/cpp/include/cudf/lists/detail/gather.cuh index 03428bc347f..0cd77556f33 100644 --- a/cpp/include/cudf/lists/detail/gather.cuh +++ b/cpp/include/cudf/lists/detail/gather.cuh @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -59,7 +60,7 @@ struct gather_data { * MapItType gather_map, * size_type gather_map_size, * rmm::cuda_stream_view stream, - * rmm::mr::device_memory_resource* mr) + * rmm::device_async_resource_ref mr) * * @param prev_base_offsets The buffer backing the base offsets used in the gather map. We can * free this buffer before allocating the new one to keep peak memory @@ -71,7 +72,7 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column, size_type gather_map_size, rmm::device_uvector&& prev_base_offsets, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // size of the gather map is the # of output rows size_type output_count = gather_map_size; @@ -252,7 +253,7 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column, MapItType gather_map, size_type gather_map_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return make_gather_data( source_column, @@ -278,7 +279,7 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column, std::unique_ptr gather_list_nested(lists_column_view const& list, gather_data& gd, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Gather a leaf column from a hierarchy of list columns. @@ -295,13 +296,13 @@ std::unique_ptr gather_list_nested(lists_column_view const& list, std::unique_ptr gather_list_leaf(column_view const& column, gather_data const& gd, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::lists::segmented_gather(lists_column_view const& source_column, * lists_column_view const& gather_map_list, * out_of_bounds_policy bounds_policy, - * rmm::mr::device_memory_resource* mr) + * rmm::device_async_resource_ref mr) * * @param stream CUDA stream on which to execute kernels */ @@ -309,7 +310,7 @@ std::unique_ptr segmented_gather(lists_column_view const& source_column, lists_column_view const& gather_map_list, out_of_bounds_policy bounds_policy, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/lists/detail/interleave_columns.hpp b/cpp/include/cudf/lists/detail/interleave_columns.hpp index a5cf67c95b9..3aff93840a9 100644 --- a/cpp/include/cudf/lists/detail/interleave_columns.hpp +++ b/cpp/include/cudf/lists/detail/interleave_columns.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace lists { @@ -47,7 +48,7 @@ namespace detail { std::unique_ptr interleave_columns(table_view const& input, bool has_null_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/lists/detail/lists_column_factories.hpp b/cpp/include/cudf/lists/detail/lists_column_factories.hpp index 7b821a00b0d..192aee8d811 100644 --- a/cpp/include/cudf/lists/detail/lists_column_factories.hpp +++ b/cpp/include/cudf/lists/detail/lists_column_factories.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { namespace lists { @@ -38,7 +39,7 @@ namespace detail { std::unique_ptr make_lists_column_from_scalar(list_scalar const& value, size_type size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Create an empty lists column. @@ -51,7 +52,7 @@ std::unique_ptr make_lists_column_from_scalar(list_scalar const& v */ std::unique_ptr make_empty_lists_column(data_type child_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Create a lists column with all null rows. @@ -64,7 +65,7 @@ std::unique_ptr make_empty_lists_column(data_type child_type, std::unique_ptr make_all_nulls_lists_column(size_type size, data_type child_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/lists/detail/reverse.hpp b/cpp/include/cudf/lists/detail/reverse.hpp index 6e3b952a3b0..d099a0708b9 100644 --- a/cpp/include/cudf/lists/detail/reverse.hpp +++ b/cpp/include/cudf/lists/detail/reverse.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,8 @@ #include +#include + namespace cudf::lists::detail { /** @@ -25,6 +27,6 @@ namespace cudf::lists::detail { */ std::unique_ptr reverse(lists_column_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace cudf::lists::detail diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh index 5fc52ff1c04..c550ad5b94f 100644 --- a/cpp/include/cudf/lists/detail/scatter.cuh +++ b/cpp/include/cudf/lists/detail/scatter.cuh @@ -30,6 +30,7 @@ #include #include +#include #include #include @@ -53,7 +54,7 @@ rmm::device_uvector list_vector_from_column( IndexIterator index_begin, IndexIterator index_end, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto n_rows = thrust::distance(index_begin, index_end); @@ -98,9 +99,9 @@ std::unique_ptr scatter_impl(rmm::device_uvector cons column_view const& source, column_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { - CUDF_EXPECTS(column_types_equal(source, target), "Mismatched column types."); + CUDF_EXPECTS(have_same_types(source, target), "Mismatched column types."); auto const child_column_type = lists_column_view(target).child().type(); @@ -177,7 +178,7 @@ std::unique_ptr scatter(column_view const& source, MapIterator scatter_map_end, column_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto const num_rows = target.size(); if (num_rows == 0) { return cudf::empty_like(target); } @@ -233,7 +234,7 @@ std::unique_ptr scatter(scalar const& slr, MapIterator scatter_map_end, column_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto const num_rows = target.size(); if (num_rows == 0) { return cudf::empty_like(target); } diff --git a/cpp/include/cudf/lists/detail/scatter_helper.cuh b/cpp/include/cudf/lists/detail/scatter_helper.cuh index 605f76871b5..fc44e0bc290 100644 --- a/cpp/include/cudf/lists/detail/scatter_helper.cuh +++ b/cpp/include/cudf/lists/detail/scatter_helper.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include +#include #include @@ -136,7 +137,7 @@ std::unique_ptr build_lists_child_column_recursive( cudf::lists_column_view const& source_lists_column_view, cudf::lists_column_view const& target_lists_column_view, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/lists/detail/set_operations.hpp b/cpp/include/cudf/lists/detail/set_operations.hpp index 51fc58bee07..8746b1ba62a 100644 --- a/cpp/include/cudf/lists/detail/set_operations.hpp +++ b/cpp/include/cudf/lists/detail/set_operations.hpp @@ -22,6 +22,7 @@ #include #include +#include namespace cudf::lists::detail { @@ -35,7 +36,7 @@ std::unique_ptr have_overlap(lists_column_view const& lhs, null_equality nulls_equal, nan_equality nans_equal, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::list::intersect_distinct @@ -47,7 +48,7 @@ std::unique_ptr intersect_distinct(lists_column_view const& lhs, null_equality nulls_equal, nan_equality nans_equal, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::list::union_distinct @@ -59,7 +60,7 @@ std::unique_ptr union_distinct(lists_column_view const& lhs, null_equality nulls_equal, nan_equality nans_equal, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::list::difference_distinct @@ -71,7 +72,7 @@ std::unique_ptr difference_distinct(lists_column_view const& lhs, null_equality nulls_equal, nan_equality nans_equal, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** @} */ // end of group } // namespace cudf::lists::detail diff --git a/cpp/include/cudf/lists/detail/sorting.hpp b/cpp/include/cudf/lists/detail/sorting.hpp index c378ca8cf06..e428ea84ce6 100644 --- a/cpp/include/cudf/lists/detail/sorting.hpp +++ b/cpp/include/cudf/lists/detail/sorting.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include +#include namespace cudf { namespace lists { @@ -32,7 +33,7 @@ std::unique_ptr sort_lists(lists_column_view const& input, order column_order, null_order null_precedence, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::lists::stable_sort_lists @@ -43,7 +44,7 @@ std::unique_ptr stable_sort_lists(lists_column_view const& input, order column_order, null_order null_precedence, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/lists/detail/stream_compaction.hpp b/cpp/include/cudf/lists/detail/stream_compaction.hpp index 7ab9cf9a343..f5e5b29bc8f 100644 --- a/cpp/include/cudf/lists/detail/stream_compaction.hpp +++ b/cpp/include/cudf/lists/detail/stream_compaction.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,19 +19,20 @@ #include #include +#include namespace cudf::lists::detail { /** * @copydoc cudf::lists::apply_boolean_mask(lists_column_view const&, lists_column_view const&, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches */ std::unique_ptr apply_boolean_mask(lists_column_view const& input, lists_column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::list::distinct @@ -42,6 +43,6 @@ std::unique_ptr distinct(lists_column_view const& input, null_equality nulls_equal, nan_equality nans_equal, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace cudf::lists::detail diff --git a/cpp/include/cudf/lists/explode.hpp b/cpp/include/cudf/lists/explode.hpp index adf46805855..81d82dcfa09 100644 --- a/cpp/include/cudf/lists/explode.hpp +++ b/cpp/include/cudf/lists/explode.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include @@ -72,7 +73,7 @@ namespace cudf { std::unique_ptr
explode( table_view const& input_table, size_type explode_column_idx, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Explodes a list column's elements and includes a position column. @@ -116,7 +117,7 @@ std::unique_ptr
explode( std::unique_ptr
explode_position( table_view const& input_table, size_type explode_column_idx, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Explodes a list column's elements retaining any null entries or empty lists inside. @@ -158,7 +159,7 @@ std::unique_ptr
explode_position( std::unique_ptr
explode_outer( table_view const& input_table, size_type explode_column_idx, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Explodes a list column's elements retaining any null entries or empty lists and includes a @@ -202,7 +203,7 @@ std::unique_ptr
explode_outer( std::unique_ptr
explode_outer_position( table_view const& input_table, size_type explode_column_idx, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/lists/extract.hpp b/cpp/include/cudf/lists/extract.hpp index 14c0f59e17d..096d276fcfb 100644 --- a/cpp/include/cudf/lists/extract.hpp +++ b/cpp/include/cudf/lists/extract.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace lists { @@ -66,8 +67,8 @@ namespace lists { std::unique_ptr extract_list_element( lists_column_view const& lists_column, size_type index, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create a column where each row is a single element from the corresponding sublist @@ -107,8 +108,8 @@ std::unique_ptr extract_list_element( std::unique_ptr extract_list_element( lists_column_view const& lists_column, column_view const& indices, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace lists diff --git a/cpp/include/cudf/lists/filling.hpp b/cpp/include/cudf/lists/filling.hpp index 3730e16482d..1d840c76bf8 100644 --- a/cpp/include/cudf/lists/filling.hpp +++ b/cpp/include/cudf/lists/filling.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include @@ -66,8 +67,8 @@ namespace cudf::lists { std::unique_ptr sequences( column_view const& starts, column_view const& sizes, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create a lists column in which each row contains a sequence of values specified by a tuple @@ -108,8 +109,8 @@ std::unique_ptr sequences( column_view const& starts, column_view const& steps, column_view const& sizes, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf::lists diff --git a/cpp/include/cudf/lists/gather.hpp b/cpp/include/cudf/lists/gather.hpp index 5e6ab6816e6..a0d79c05098 100644 --- a/cpp/include/cudf/lists/gather.hpp +++ b/cpp/include/cudf/lists/gather.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { namespace lists { @@ -73,9 +74,9 @@ namespace lists { std::unique_ptr segmented_gather( lists_column_view const& source_column, lists_column_view const& gather_map_list, - out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace lists diff --git a/cpp/include/cudf/lists/reverse.hpp b/cpp/include/cudf/lists/reverse.hpp index 864cd796f72..34c40c5a3ba 100644 --- a/cpp/include/cudf/lists/reverse.hpp +++ b/cpp/include/cudf/lists/reverse.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include #include @@ -48,8 +49,8 @@ namespace cudf::lists { */ std::unique_ptr reverse( lists_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/lists/set_operations.hpp b/cpp/include/cudf/lists/set_operations.hpp index 6fb8989f0bb..b8abfd62461 100644 --- a/cpp/include/cudf/lists/set_operations.hpp +++ b/cpp/include/cudf/lists/set_operations.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf::lists { /** @@ -59,10 +60,10 @@ namespace cudf::lists { std::unique_ptr have_overlap( lists_column_view const& lhs, lists_column_view const& rhs, - null_equality nulls_equal = null_equality::EQUAL, - nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + null_equality nulls_equal = null_equality::EQUAL, + nan_equality nans_equal = nan_equality::ALL_EQUAL, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create a lists column of distinct elements common to two input lists columns. @@ -96,10 +97,10 @@ std::unique_ptr have_overlap( std::unique_ptr intersect_distinct( lists_column_view const& lhs, lists_column_view const& rhs, - null_equality nulls_equal = null_equality::EQUAL, - nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + null_equality nulls_equal = null_equality::EQUAL, + nan_equality nans_equal = nan_equality::ALL_EQUAL, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create a lists column of distinct elements found in either of two input lists columns. @@ -133,10 +134,10 @@ std::unique_ptr intersect_distinct( std::unique_ptr union_distinct( lists_column_view const& lhs, lists_column_view const& rhs, - null_equality nulls_equal = null_equality::EQUAL, - nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + null_equality nulls_equal = null_equality::EQUAL, + nan_equality nans_equal = nan_equality::ALL_EQUAL, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create a lists column of distinct elements found only in the left input column. @@ -170,10 +171,10 @@ std::unique_ptr union_distinct( std::unique_ptr difference_distinct( lists_column_view const& lhs, lists_column_view const& rhs, - null_equality nulls_equal = null_equality::EQUAL, - nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + null_equality nulls_equal = null_equality::EQUAL, + nan_equality nans_equal = nan_equality::ALL_EQUAL, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf::lists diff --git a/cpp/include/cudf/lists/sorting.hpp b/cpp/include/cudf/lists/sorting.hpp index 39a52c75a98..78cea191bc5 100644 --- a/cpp/include/cudf/lists/sorting.hpp +++ b/cpp/include/cudf/lists/sorting.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace lists { @@ -55,8 +56,8 @@ std::unique_ptr sort_lists( lists_column_view const& source_column, order column_order, null_order null_precedence, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Segmented sort of the elements within a list in each row of a list column using stable @@ -68,8 +69,8 @@ std::unique_ptr stable_sort_lists( lists_column_view const& source_column, order column_order, null_order null_precedence, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace lists diff --git a/cpp/include/cudf/lists/stream_compaction.hpp b/cpp/include/cudf/lists/stream_compaction.hpp index 3ac4f6861ec..31f09d37560 100644 --- a/cpp/include/cudf/lists/stream_compaction.hpp +++ b/cpp/include/cudf/lists/stream_compaction.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf::lists { @@ -61,8 +62,8 @@ namespace cudf::lists { std::unique_ptr apply_boolean_mask( lists_column_view const& input, lists_column_view const& boolean_mask, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create a new list column without duplicate elements in each list. @@ -86,10 +87,10 @@ std::unique_ptr apply_boolean_mask( */ std::unique_ptr distinct( lists_column_view const& input, - null_equality nulls_equal = null_equality::EQUAL, - nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + null_equality nulls_equal = null_equality::EQUAL, + nan_equality nans_equal = nan_equality::ALL_EQUAL, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/merge.hpp b/cpp/include/cudf/merge.hpp index 8886ec24bfe..29aa3ffe934 100644 --- a/cpp/include/cudf/merge.hpp +++ b/cpp/include/cudf/merge.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -105,7 +106,7 @@ std::unique_ptr merge( std::vector const& key_cols, std::vector const& column_order, std::vector const& null_precedence = {}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp index 524296e60ca..9e375df140b 100644 --- a/cpp/include/cudf/null_mask.hpp +++ b/cpp/include/cudf/null_mask.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include @@ -89,8 +90,8 @@ size_type num_bitmask_words(size_type number_of_bits); rmm::device_buffer create_null_mask( size_type size, mask_state state, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Sets a pre-allocated bitmask buffer to a given state in the range @@ -132,8 +133,8 @@ rmm::device_buffer copy_bitmask( bitmask_type const* mask, size_type begin_bit, size_type end_bit, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Copies `view`'s bitmask from the bits @@ -149,8 +150,8 @@ rmm::device_buffer copy_bitmask( */ rmm::device_buffer copy_bitmask( column_view const& view, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Performs bitwise AND of the bitmasks of columns of a table. Returns @@ -166,8 +167,8 @@ rmm::device_buffer copy_bitmask( */ std::pair bitmask_and( table_view const& view, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Performs bitwise OR of the bitmasks of columns of a table. Returns @@ -183,8 +184,8 @@ std::pair bitmask_and( */ std::pair bitmask_or( table_view const& view, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Given a validity bitmask, counts the number of null elements (unset bits) diff --git a/cpp/include/cudf/partitioning.hpp b/cpp/include/cudf/partitioning.hpp index 2c91bdf64f5..9ed56297908 100644 --- a/cpp/include/cudf/partitioning.hpp +++ b/cpp/include/cudf/partitioning.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -33,6 +34,14 @@ namespace cudf { * @brief Column partitioning APIs */ +/** + * @brief Identifies the hash function to be used in hash partitioning + */ +enum class hash_id { + HASH_IDENTITY = 0, ///< Identity hash function that simply returns the key to be hashed + HASH_MURMUR3 ///< Murmur3 hash function +}; + /** * @brief Partitions rows of `t` according to the mapping specified by * `partition_map`. @@ -70,7 +79,7 @@ std::pair, std::vector> partition( table_view const& t, column_view const& partition_map, size_type num_partitions, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Partitions rows from the input table into multiple output tables. @@ -96,10 +105,10 @@ std::pair, std::vector> hash_partition( table_view const& input, std::vector const& columns_to_hash, int num_partitions, - hash_id hash_function = hash_id::HASH_MURMUR3, - uint32_t seed = DEFAULT_HASH_SEED, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + hash_id hash_function = hash_id::HASH_MURMUR3, + uint32_t seed = DEFAULT_HASH_SEED, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Round-robin partition. @@ -241,8 +250,8 @@ std::pair, std::vector> hash_partition( std::pair, std::vector> round_robin_partition( table_view const& input, cudf::size_type num_partitions, - cudf::size_type start_partition = 0, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + cudf::size_type start_partition = 0, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/quantiles.hpp b/cpp/include/cudf/quantiles.hpp index 1f3c26fa077..a1c98ee4e9d 100644 --- a/cpp/include/cudf/quantiles.hpp +++ b/cpp/include/cudf/quantiles.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include namespace cudf { /** @@ -56,10 +57,10 @@ namespace cudf { std::unique_ptr quantile( column_view const& input, std::vector const& q, - interpolation interp = interpolation::LINEAR, - column_view const& ordered_indices = {}, - bool exact = true, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + interpolation interp = interpolation::LINEAR, + column_view const& ordered_indices = {}, + bool exact = true, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the rows of the input corresponding to the requested quantiles. @@ -98,7 +99,7 @@ std::unique_ptr
quantiles( cudf::sorted is_input_sorted = sorted::NO, std::vector const& column_order = {}, std::vector const& null_precedence = {}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Calculate approximate percentiles on an input tdigest column. @@ -125,7 +126,7 @@ std::unique_ptr
quantiles( std::unique_ptr percentile_approx( tdigest::tdigest_column_view const& input, column_view const& percentiles, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/reduction.hpp b/cpp/include/cudf/reduction.hpp index 52aebeb55e5..52f39925a2d 100644 --- a/cpp/include/cudf/reduction.hpp +++ b/cpp/include/cudf/reduction.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include #include @@ -74,6 +75,7 @@ enum class scan_type : bool { INCLUSIVE, EXCLUSIVE }; * @param col Input column view * @param agg Aggregation operator applied by the reduction * @param output_dtype The output scalar type + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned scalar's device memory * @returns Output scalar with reduce result */ @@ -81,7 +83,8 @@ std::unique_ptr reduce( column_view const& col, reduce_aggregation const& agg, data_type output_dtype, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Computes the reduction of the values in all rows of a column with an initial value @@ -95,6 +98,7 @@ std::unique_ptr reduce( * @param agg Aggregation operator applied by the reduction * @param output_dtype The output scalar type * @param init The initial value of the reduction + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned scalar's device memory * @returns Output scalar with reduce result */ @@ -103,7 +107,8 @@ std::unique_ptr reduce( reduce_aggregation const& agg, data_type output_dtype, std::optional> init, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Compute reduction of each segment in the input column @@ -144,6 +149,7 @@ std::unique_ptr reduce( * @param null_handling If `INCLUDE`, the reduction is valid if all elements in a segment are valid, * otherwise null. If `EXCLUDE`, the reduction is valid if any element in the segment is valid, * otherwise null. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned scalar's device memory * @returns Output column with results of segmented reduction */ @@ -153,7 +159,8 @@ std::unique_ptr segmented_reduce( segmented_reduce_aggregation const& agg, data_type output_dtype, null_policy null_handling, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Compute reduction of each segment in the input column with an initial value. Only SUM, @@ -168,6 +175,7 @@ std::unique_ptr segmented_reduce( * otherwise null. If `EXCLUDE`, the reduction is valid if any element in the segment is valid, * otherwise null. * @param init The initial value of the reduction + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned scalar's device memory * @returns Output column with results of segmented reduction. */ @@ -178,7 +186,8 @@ std::unique_ptr segmented_reduce( data_type output_dtype, null_policy null_handling, std::optional> init, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Computes the scan of a column. @@ -194,6 +203,7 @@ std::unique_ptr segmented_reduce( * exclusive scan if scan_type::EXCLUSIVE. * @param[in] null_handling Exclude null values when computing the result if null_policy::EXCLUDE. * Include nulls if null_policy::INCLUDE. Any operation with a null results in a null. + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned scalar's device memory * @returns Scanned output column */ @@ -201,21 +211,24 @@ std::unique_ptr scan( column_view const& input, scan_aggregation const& agg, scan_type inclusive, - null_policy null_handling = null_policy::EXCLUDE, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + null_policy null_handling = null_policy::EXCLUDE, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Determines the minimum and maximum values of a column. * * * @param col column to compute minmax + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return A std::pair of scalars with the first scalar being the minimum value and the second * scalar being the maximum value of the input column. */ std::pair, std::unique_ptr> minmax( column_view const& col, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/reduction/detail/histogram.hpp b/cpp/include/cudf/reduction/detail/histogram.hpp index 97c711fda4e..f23c5a14e33 100644 --- a/cpp/include/cudf/reduction/detail/histogram.hpp +++ b/cpp/include/cudf/reduction/detail/histogram.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -42,7 +43,7 @@ namespace cudf::reduction::detail { compute_row_frequencies(table_view const& input, std::optional const& partial_counts, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Create an empty histogram column. diff --git a/cpp/include/cudf/reduction/detail/reduction.cuh b/cpp/include/cudf/reduction/detail/reduction.cuh index 9807d4cb4ea..7d1754d86f2 100644 --- a/cpp/include/cudf/reduction/detail/reduction.cuh +++ b/cpp/include/cudf/reduction/detail/reduction.cuh @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -62,7 +63,7 @@ std::unique_ptr reduce(InputIterator d_in, op::simple_op op, std::optional init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto const binary_op = cudf::detail::cast_functor(op.get_binary_op()); auto const initial_value = init.value_or(op.template get_identity()); @@ -105,7 +106,7 @@ std::unique_ptr reduce(InputIterator d_in, op::simple_op op, std::optional init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FAIL( "This function should never be called. fixed_point reduce should always go through the reduce " @@ -122,7 +123,7 @@ std::unique_ptr reduce(InputIterator d_in, op::simple_op op, std::optional init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto const binary_op = cudf::detail::cast_functor(op.get_binary_op()); auto const initial_value = init.value_or(op.template get_identity()); @@ -188,7 +189,7 @@ std::unique_ptr reduce(InputIterator d_in, cudf::size_type valid_count, cudf::size_type ddof, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto const binary_op = cudf::detail::cast_functor(op.get_binary_op()); auto const initial_value = op.template get_identity(); diff --git a/cpp/include/cudf/reduction/detail/reduction.hpp b/cpp/include/cudf/reduction/detail/reduction.hpp index 4cbfb82ae6b..78f90a1e2c9 100644 --- a/cpp/include/cudf/reduction/detail/reduction.hpp +++ b/cpp/include/cudf/reduction/detail/reduction.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,13 +20,15 @@ #include #include +#include + #include namespace cudf::reduction::detail { /** * @copydoc cudf::reduce(column_view const&, reduce_aggregation const&, data_type, - * std::optional>, rmm::mr::device_memory_resource*) + * std::optional>, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -35,6 +37,6 @@ std::unique_ptr reduce(column_view const& col, data_type output_dtype, std::optional> init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace cudf::reduction::detail diff --git a/cpp/include/cudf/reduction/detail/reduction_functions.hpp b/cpp/include/cudf/reduction/detail/reduction_functions.hpp index 704332c8e1d..31d465619b9 100644 --- a/cpp/include/cudf/reduction/detail/reduction_functions.hpp +++ b/cpp/include/cudf/reduction/detail/reduction_functions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include @@ -47,7 +48,7 @@ std::unique_ptr sum(column_view const& col, data_type const output_dtype, std::optional> init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Computes minimum of elements in input column @@ -67,7 +68,7 @@ std::unique_ptr min(column_view const& col, data_type const output_dtype, std::optional> init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Computes maximum of elements in input column @@ -87,7 +88,7 @@ std::unique_ptr max(column_view const& col, data_type const output_dtype, std::optional> init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Computes any of elements in input column is true when typecasted to bool @@ -108,7 +109,7 @@ std::unique_ptr any(column_view const& col, data_type const output_dtype, std::optional> init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Computes all of elements in input column is true when typecasted to bool @@ -129,7 +130,7 @@ std::unique_ptr all(column_view const& col, data_type const output_dtype, std::optional> init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Compute frequency for each unique element in the input column. @@ -144,7 +145,7 @@ std::unique_ptr all(column_view const& col, */ std::unique_ptr histogram(column_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Merge multiple histograms together. @@ -156,7 +157,7 @@ std::unique_ptr histogram(column_view const& input, */ std::unique_ptr merge_histogram(column_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Computes product of elements in input column @@ -177,7 +178,7 @@ std::unique_ptr product(column_view const& col, data_type const output_dtype, std::optional> init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Computes sum of squares of elements in input column @@ -196,7 +197,7 @@ std::unique_ptr product(column_view const& col, std::unique_ptr sum_of_squares(column_view const& col, data_type const output_dtype, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Computes mean of elements in input column @@ -215,7 +216,7 @@ std::unique_ptr sum_of_squares(column_view const& col, std::unique_ptr mean(column_view const& col, data_type const output_dtype, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Computes variance of elements in input column @@ -237,7 +238,7 @@ std::unique_ptr variance(column_view const& col, data_type const output_dtype, size_type ddof, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Computes standard deviation of elements in input column @@ -259,7 +260,7 @@ std::unique_ptr standard_deviation(column_view const& col, data_type const output_dtype, size_type ddof, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Returns nth element in input column @@ -289,7 +290,7 @@ std::unique_ptr nth_element(column_view const& col, size_type n, null_policy null_handling, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Collect input column into a (list) scalar @@ -303,7 +304,7 @@ std::unique_ptr nth_element(column_view const& col, std::unique_ptr collect_list(column_view const& col, null_policy null_handling, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Merge a bunch of list scalars into single list scalar @@ -315,7 +316,7 @@ std::unique_ptr collect_list(column_view const& col, */ std::unique_ptr merge_lists(lists_column_view const& col, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Collect input column into a (list) scalar without duplicated elements @@ -333,7 +334,7 @@ std::unique_ptr collect_set(column_view const& col, null_equality nulls_equal, nan_equality nans_equal, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Merge a bunch of list scalars into single list scalar then drop duplicated elements @@ -349,7 +350,7 @@ std::unique_ptr merge_sets(lists_column_view const& col, null_equality nulls_equal, nan_equality nans_equal, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace reduction diff --git a/cpp/include/cudf/reduction/detail/segmented_reduction_functions.hpp b/cpp/include/cudf/reduction/detail/segmented_reduction_functions.hpp index 3902a7200a9..770ac6580ef 100644 --- a/cpp/include/cudf/reduction/detail/segmented_reduction_functions.hpp +++ b/cpp/include/cudf/reduction/detail/segmented_reduction_functions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include @@ -57,7 +58,7 @@ std::unique_ptr segmented_sum(column_view const& col, null_policy null_handling, std::optional> init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Computes product of each segment in the input column @@ -87,7 +88,7 @@ std::unique_ptr segmented_product(column_view const& col, null_policy null_handling, std::optional> init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Compute minimum of each segment in the input column @@ -116,7 +117,7 @@ std::unique_ptr segmented_min(column_view const& col, null_policy null_handling, std::optional> init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Compute maximum of each segment in the input column @@ -145,7 +146,7 @@ std::unique_ptr segmented_max(column_view const& col, null_policy null_handling, std::optional> init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Compute if any of the values in the segment are true when typecasted to bool @@ -175,7 +176,7 @@ std::unique_ptr segmented_any(column_view const& col, null_policy null_handling, std::optional> init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Compute if all of the values in the segment are true when typecasted to bool @@ -205,7 +206,7 @@ std::unique_ptr segmented_all(column_view const& col, null_policy null_handling, std::optional> init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Computes mean of elements of segments in the input column @@ -233,7 +234,7 @@ std::unique_ptr segmented_mean(column_view const& col, data_type const output_dtype, null_policy null_handling, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Computes sum of squares of elements of segments in the input column @@ -261,7 +262,7 @@ std::unique_ptr segmented_sum_of_squares(column_view const& col, data_type const output_dtype, null_policy null_handling, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Computes the standard deviation of elements of segments in the input column @@ -292,7 +293,7 @@ std::unique_ptr segmented_standard_deviation(column_view const& col, null_policy null_handling, size_type ddof, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Computes the variance of elements of segments in the input column @@ -323,7 +324,7 @@ std::unique_ptr segmented_variance(column_view const& col, null_policy null_handling, size_type ddof, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Counts the number of unique values within each segment of a column @@ -351,7 +352,7 @@ std::unique_ptr segmented_nunique(column_view const& col, device_span offsets, null_policy null_handling, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace reduction diff --git a/cpp/include/cudf/replace.hpp b/cpp/include/cudf/replace.hpp index 3405dc8b796..ae20e72f023 100644 --- a/cpp/include/cudf/replace.hpp +++ b/cpp/include/cudf/replace.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include #include @@ -55,8 +56,8 @@ enum class replace_policy : bool { PRECEDING, FOLLOWING }; std::unique_ptr replace_nulls( column_view const& input, column_view const& replacement, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Replaces all null values in a column with a scalar. @@ -74,8 +75,8 @@ std::unique_ptr replace_nulls( std::unique_ptr replace_nulls( column_view const& input, scalar const& replacement, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Replaces all null values in a column with the first non-null value that precedes/follows. @@ -93,8 +94,8 @@ std::unique_ptr replace_nulls( std::unique_ptr replace_nulls( column_view const& input, replace_policy const& replace_policy, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Replaces all NaN values in a column with corresponding values from another column @@ -121,8 +122,8 @@ std::unique_ptr replace_nulls( std::unique_ptr replace_nans( column_view const& input, column_view const& replacement, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Replaces all NaN values in a column with a scalar @@ -148,8 +149,8 @@ std::unique_ptr replace_nans( std::unique_ptr replace_nans( column_view const& input, scalar const& replacement, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Return a copy of `input_col` replacing any `values_to_replace[i]` @@ -167,8 +168,8 @@ std::unique_ptr find_and_replace_all( column_view const& input_col, column_view const& values_to_replace, column_view const& replacement_values, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Replaces values less than `lo` in `input` with `lo_replace`, @@ -222,8 +223,8 @@ std::unique_ptr clamp( scalar const& lo_replace, scalar const& hi, scalar const& hi_replace, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Replaces values less than `lo` in `input` with `lo`, @@ -268,8 +269,8 @@ std::unique_ptr clamp( column_view const& input, scalar const& lo, scalar const& hi, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Copies from a column of floating-point elements and replaces `-NaN` and `-0.0` with `+NaN` @@ -288,8 +289,8 @@ std::unique_ptr clamp( */ std::unique_ptr normalize_nans_and_zeros( column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Modifies a column of floating-point elements to replace all `-NaN` and `-0.0` with `+NaN` diff --git a/cpp/include/cudf/reshape.hpp b/cpp/include/cudf/reshape.hpp index 42cfb890a31..26316be7fd4 100644 --- a/cpp/include/cudf/reshape.hpp +++ b/cpp/include/cudf/reshape.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include @@ -52,7 +53,7 @@ namespace cudf { */ std::unique_ptr interleave_columns( table_view const& input, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Repeats the rows from `input` table `count` times to form a new table. @@ -75,7 +76,7 @@ std::unique_ptr interleave_columns( std::unique_ptr
tile( table_view const& input, size_type count, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Configures whether byte casting flips endianness @@ -100,7 +101,7 @@ enum class flip_endianness : bool { NO, YES }; std::unique_ptr byte_cast( column_view const& input_column, flip_endianness endian_configuration, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/rolling.hpp b/cpp/include/cudf/rolling.hpp index ec93c709163..2cd34f48265 100644 --- a/cpp/include/cudf/rolling.hpp +++ b/cpp/include/cudf/rolling.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include #include @@ -66,7 +67,7 @@ std::unique_ptr rolling_window( size_type following_window, size_type min_periods, rolling_aggregation const& agg, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief @copybrief rolling_window @@ -76,7 +77,7 @@ std::unique_ptr rolling_window( * size_type following_window, * size_type min_periods, * rolling_aggregation const& agg, - * rmm::mr::device_memory_resource* mr) + * rmm::device_async_resource_ref mr) * * @param default_outputs A column of per-row default values to be returned instead * of nulls. Used for LEAD()/LAG(), if the row offset crosses @@ -89,7 +90,7 @@ std::unique_ptr rolling_window( size_type following_window, size_type min_periods, rolling_aggregation const& agg, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Abstraction for window boundary sizes @@ -237,7 +238,7 @@ std::unique_ptr grouped_rolling_window( size_type following_window, size_type min_periods, rolling_aggregation const& aggr, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief @copybrief grouped_rolling_window @@ -248,7 +249,7 @@ std::unique_ptr grouped_rolling_window( * size_type following_window, * size_type min_periods, * rolling_aggregation const& aggr, - * rmm::mr::device_memory_resource* mr) + * rmm::device_async_resource_ref mr) */ std::unique_ptr grouped_rolling_window( table_view const& group_keys, @@ -257,7 +258,7 @@ std::unique_ptr grouped_rolling_window( window_bounds following_window, size_type min_periods, rolling_aggregation const& aggr, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief @copybrief grouped_rolling_window @@ -268,7 +269,7 @@ std::unique_ptr grouped_rolling_window( * size_type following_window, * size_type min_periods, * rolling_aggregation const& aggr, - * rmm::mr::device_memory_resource* mr) + * rmm::device_async_resource_ref mr) * * @param default_outputs A column of per-row default values to be returned instead * of nulls. Used for LEAD()/LAG(), if the row offset crosses @@ -282,7 +283,7 @@ std::unique_ptr grouped_rolling_window( size_type following_window, size_type min_periods, rolling_aggregation const& aggr, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief @copybrief grouped_rolling_window @@ -294,7 +295,7 @@ std::unique_ptr grouped_rolling_window( * size_type following_window, * size_type min_periods, * rolling_aggregation const& aggr, - * rmm::mr::device_memory_resource* mr) + * rmm::device_async_resource_ref mr) */ std::unique_ptr grouped_rolling_window( table_view const& group_keys, @@ -304,7 +305,7 @@ std::unique_ptr grouped_rolling_window( window_bounds following_window, size_type min_periods, rolling_aggregation const& aggr, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Applies a grouping-aware, timestamp-based rolling window function to the values in a @@ -399,7 +400,7 @@ std::unique_ptr grouped_time_range_rolling_window( size_type following_window_in_days, size_type min_periods, rolling_aggregation const& aggr, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Applies a grouping-aware, timestamp-based rolling window function to the values in a @@ -414,7 +415,7 @@ std::unique_ptr grouped_time_range_rolling_window( * size_type following_window_in_days, * size_type min_periods, * rolling_aggregation const& aggr, - * rmm::mr::device_memory_resource* mr) + * rmm::device_async_resource_ref mr) * * The `preceding_window_in_days` and `following_window_in_days` are specified as a `window_bounds` * and supports "unbounded" windows, if set to `window_bounds::unbounded()`. @@ -428,7 +429,7 @@ std::unique_ptr grouped_time_range_rolling_window( window_bounds following_window_in_days, size_type min_periods, rolling_aggregation const& aggr, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Applies a grouping-aware, value range-based rolling window function to the values in a @@ -548,7 +549,7 @@ std::unique_ptr grouped_range_rolling_window( range_window_bounds const& following, size_type min_periods, rolling_aggregation const& aggr, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Applies a variable-size rolling window function to the values in a column. @@ -591,7 +592,7 @@ std::unique_ptr rolling_window( column_view const& following_window, size_type min_periods, rolling_aggregation const& agg, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/round.hpp b/cpp/include/cudf/round.hpp index ee088628b94..85935f8f05c 100644 --- a/cpp/include/cudf/round.hpp +++ b/cpp/include/cudf/round.hpp @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { @@ -72,9 +73,9 @@ enum class rounding_method : int32_t { HALF_UP, HALF_EVEN }; */ std::unique_ptr round( column_view const& input, - int32_t decimal_places = 0, - rounding_method method = rounding_method::HALF_UP, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + int32_t decimal_places = 0, + rounding_method method = rounding_method::HALF_UP, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index 08bffab5067..da1d0d743a7 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ #include #include #include +#include /** * @file @@ -112,8 +113,8 @@ class scalar { * @param mr Device memory resource to use for device memory allocation. */ scalar(scalar const& other, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new scalar object. @@ -127,9 +128,9 @@ class scalar { * @param mr Device memory resource to use for device memory allocation. */ scalar(data_type type, - bool is_valid = false, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = false, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); }; namespace detail { @@ -164,8 +165,8 @@ class fixed_width_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ fixed_width_scalar(fixed_width_scalar const& other, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Set the value of the scalar. @@ -214,9 +215,9 @@ class fixed_width_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ fixed_width_scalar(T value, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new fixed width scalar object from existing device memory. @@ -227,9 +228,9 @@ class fixed_width_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ fixed_width_scalar(rmm::device_scalar&& data, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); }; } // namespace detail @@ -264,8 +265,8 @@ class numeric_scalar : public detail::fixed_width_scalar { * @param mr Device memory resource to use for device memory allocation. */ numeric_scalar(numeric_scalar const& other, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new numeric scalar object. @@ -276,9 +277,9 @@ class numeric_scalar : public detail::fixed_width_scalar { * @param mr Device memory resource to use for device memory allocation. */ numeric_scalar(T value, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new numeric scalar object from existing device memory. @@ -289,9 +290,9 @@ class numeric_scalar : public detail::fixed_width_scalar { * @param mr Device memory resource to use for device memory allocation. */ numeric_scalar(rmm::device_scalar&& data, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); }; /** @@ -327,8 +328,8 @@ class fixed_point_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ fixed_point_scalar(fixed_point_scalar const& other, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new fixed_point scalar object from already shifted value and scale. @@ -341,9 +342,9 @@ class fixed_point_scalar : public scalar { */ fixed_point_scalar(rep_type value, numeric::scale_type scale, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new fixed_point scalar object from a value and default 0-scale. @@ -354,9 +355,9 @@ class fixed_point_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ fixed_point_scalar(rep_type value, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new fixed_point scalar object from a fixed_point number. @@ -367,9 +368,9 @@ class fixed_point_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ fixed_point_scalar(T value, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new fixed_point scalar object from existing device memory. @@ -382,9 +383,9 @@ class fixed_point_scalar : public scalar { */ fixed_point_scalar(rmm::device_scalar&& data, numeric::scale_type scale, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Get the value of the scalar. @@ -451,8 +452,8 @@ class string_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ string_scalar(string_scalar const& other, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new string scalar object. @@ -465,9 +466,9 @@ class string_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ string_scalar(std::string const& string, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new string scalar object from string_view. @@ -480,9 +481,9 @@ class string_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ string_scalar(value_type const& source, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new string scalar object from string_view in device memory. @@ -495,9 +496,9 @@ class string_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ string_scalar(rmm::device_scalar& data, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new string scalar object by moving an existing string data buffer. @@ -511,9 +512,9 @@ class string_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ string_scalar(rmm::device_buffer&& data, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Explicit conversion operator to get the value of the scalar in a host std::string. @@ -584,8 +585,8 @@ class chrono_scalar : public detail::fixed_width_scalar { * @param mr Device memory resource to use for device memory allocation. */ chrono_scalar(chrono_scalar const& other, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new chrono scalar object. @@ -596,9 +597,9 @@ class chrono_scalar : public detail::fixed_width_scalar { * @param mr Device memory resource to use for device memory allocation. */ chrono_scalar(T value, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new chrono scalar object from existing device memory. @@ -609,9 +610,9 @@ class chrono_scalar : public detail::fixed_width_scalar { * @param mr Device memory resource to use for device memory allocation. */ chrono_scalar(rmm::device_scalar&& data, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); }; /** @@ -643,8 +644,8 @@ class timestamp_scalar : public chrono_scalar { * @param mr Device memory resource to use for device memory allocation. */ timestamp_scalar(timestamp_scalar const& other, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new timestamp scalar object from a duration that is @@ -659,8 +660,8 @@ class timestamp_scalar : public chrono_scalar { template timestamp_scalar(Duration2 const& value, bool is_valid, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the duration in number of ticks since the UNIX epoch. @@ -699,8 +700,8 @@ class duration_scalar : public chrono_scalar { * @param mr Device memory resource to use for device memory allocation. */ duration_scalar(duration_scalar const& other, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new duration scalar object from tick counts. @@ -712,8 +713,8 @@ class duration_scalar : public chrono_scalar { */ duration_scalar(rep_type value, bool is_valid, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the duration in number of ticks. @@ -748,8 +749,8 @@ class list_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ list_scalar(list_scalar const& other, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new list scalar object from column_view. @@ -762,9 +763,9 @@ class list_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ list_scalar(cudf::column_view const& data, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new list scalar object from existing column. @@ -775,9 +776,9 @@ class list_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ list_scalar(cudf::column&& data, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a non-owning, immutable view to underlying device data. @@ -813,8 +814,8 @@ class struct_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ struct_scalar(struct_scalar const& other, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new struct scalar object from table_view. @@ -827,9 +828,9 @@ class struct_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ struct_scalar(table_view const& data, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new struct scalar object from a host_span of column_views. @@ -842,9 +843,9 @@ class struct_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ struct_scalar(host_span data, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new struct scalar object from an existing table in device memory. @@ -858,9 +859,9 @@ class struct_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ struct_scalar(table&& data, - bool is_valid = true, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + bool is_valid = true, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a non-owning, immutable view to underlying device data. @@ -888,7 +889,7 @@ class struct_scalar : public scalar { static table init_data(table&& data, bool is_valid, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); }; /** @} */ // end of group diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp index 78b6c4fd0e9..7dd4674a2fd 100644 --- a/cpp/include/cudf/scalar/scalar_factories.hpp +++ b/cpp/include/cudf/scalar/scalar_factories.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { /** @@ -43,8 +44,8 @@ namespace cudf { */ std::unique_ptr make_numeric_scalar( data_type type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct scalar with uninitialized storage to hold a value of the @@ -60,8 +61,8 @@ std::unique_ptr make_numeric_scalar( */ std::unique_ptr make_timestamp_scalar( data_type type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct scalar with uninitialized storage to hold a value of the @@ -77,8 +78,8 @@ std::unique_ptr make_timestamp_scalar( */ std::unique_ptr make_duration_scalar( data_type type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct scalar with uninitialized storage to hold a value of the @@ -94,8 +95,8 @@ std::unique_ptr make_duration_scalar( */ std::unique_ptr make_fixed_width_scalar( data_type type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct STRING type scalar given a `std::string`. @@ -111,8 +112,8 @@ std::unique_ptr make_fixed_width_scalar( */ std::unique_ptr make_string_scalar( std::string const& string, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Constructs default constructed scalar of type `type` @@ -126,8 +127,8 @@ std::unique_ptr make_string_scalar( */ std::unique_ptr make_default_constructed_scalar( data_type type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Creates an empty (invalid) scalar of the same type as the `input` column_view. @@ -141,8 +142,8 @@ std::unique_ptr make_default_constructed_scalar( */ std::unique_ptr make_empty_scalar_like( column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct scalar using the given value of fixed width type @@ -156,8 +157,8 @@ std::unique_ptr make_empty_scalar_like( template std::unique_ptr make_fixed_width_scalar( T value, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { return std::make_unique>(value, true, stream, mr); } @@ -176,8 +177,8 @@ template std::unique_ptr make_fixed_point_scalar( typename T::rep value, numeric::scale_type scale, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { return std::make_unique>(value, scale, true, stream, mr); } @@ -192,8 +193,8 @@ std::unique_ptr make_fixed_point_scalar( */ std::unique_ptr make_list_scalar( column_view elements, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a struct scalar using the given table_view. @@ -207,8 +208,8 @@ std::unique_ptr make_list_scalar( */ std::unique_ptr make_struct_scalar( table_view const& data, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a struct scalar using the given span of column views. @@ -222,8 +223,8 @@ std::unique_ptr make_struct_scalar( */ std::unique_ptr make_struct_scalar( host_span data, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/search.hpp b/cpp/include/cudf/search.hpp index 49acce6a63b..2e50ba2d687 100644 --- a/cpp/include/cudf/search.hpp +++ b/cpp/include/cudf/search.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include @@ -72,8 +73,8 @@ std::unique_ptr lower_bound( table_view const& needles, std::vector const& column_order, std::vector const& null_precedence, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Find largest indices in a sorted table where values should be inserted to maintain order. @@ -114,8 +115,8 @@ std::unique_ptr upper_bound( table_view const& needles, std::vector const& column_order, std::vector const& null_precedence, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Check if the given `needle` value exists in the `haystack` column. @@ -163,8 +164,8 @@ bool contains(column_view const& haystack, std::unique_ptr contains( column_view const& haystack, column_view const& needles, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index 42bcb5da8e3..79a00cbce42 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -54,7 +55,7 @@ std::unique_ptr sorted_order( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Computes the row indices that would produce `input` in a stable @@ -69,7 +70,7 @@ std::unique_ptr stable_sorted_order( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Checks whether the rows of a `table` are sorted in a lexicographical @@ -113,7 +114,7 @@ std::unique_ptr
sort( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Performs a stable lexicographic sort of the rows of a table @@ -125,7 +126,7 @@ std::unique_ptr
stable_sort( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Performs a key-value sort. @@ -155,7 +156,7 @@ std::unique_ptr
sort_by_key( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Performs a key-value stable sort. @@ -168,7 +169,7 @@ std::unique_ptr
stable_sort_by_key( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Computes the ranks of input column in sorted order. @@ -207,8 +208,8 @@ std::unique_ptr rank( null_policy null_handling, null_order null_precedence, bool percentage, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns sorted order after sorting each segment in the table. @@ -259,7 +260,7 @@ std::unique_ptr segmented_sorted_order( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns sorted order after stably sorting each segment in the table. @@ -272,7 +273,7 @@ std::unique_ptr stable_segmented_sorted_order( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Performs a lexicographic segmented sort of a table @@ -328,7 +329,7 @@ std::unique_ptr
segmented_sort_by_key( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Performs a stably lexicographic segmented sort of a table @@ -342,7 +343,7 @@ std::unique_ptr
stable_segmented_sort_by_key( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/stream_compaction.hpp b/cpp/include/cudf/stream_compaction.hpp index 3e7bdf13707..c386b3a22b4 100644 --- a/cpp/include/cudf/stream_compaction.hpp +++ b/cpp/include/cudf/stream_compaction.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -73,7 +74,7 @@ std::unique_ptr
drop_nulls( table_view const& input, std::vector const& keys, cudf::size_type keep_threshold, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Filters a table to remove null elements. @@ -104,7 +105,7 @@ std::unique_ptr
drop_nulls( std::unique_ptr
drop_nulls( table_view const& input, std::vector const& keys, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Filters a table to remove NANs with threshold count. @@ -147,7 +148,7 @@ std::unique_ptr
drop_nans( table_view const& input, std::vector const& keys, cudf::size_type keep_threshold, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Filters a table to remove NANs. @@ -179,7 +180,7 @@ std::unique_ptr
drop_nans( std::unique_ptr
drop_nans( table_view const& input, std::vector const& keys, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Filters `input` using `boolean_mask` of boolean values as a mask. @@ -205,7 +206,7 @@ std::unique_ptr
drop_nans( std::unique_ptr
apply_boolean_mask( table_view const& input, column_view const& boolean_mask, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Choices for drop_duplicates API for retainment of duplicate rows @@ -248,8 +249,8 @@ std::unique_ptr
unique( table_view const& input, std::vector const& keys, duplicate_keep_option keep, - null_equality nulls_equal = null_equality::EQUAL, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + null_equality nulls_equal = null_equality::EQUAL, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create a new table without duplicate rows. @@ -273,10 +274,10 @@ std::unique_ptr
unique( std::unique_ptr
distinct( table_view const& input, std::vector const& keys, - duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, - null_equality nulls_equal = null_equality::EQUAL, - nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, + null_equality nulls_equal = null_equality::EQUAL, + nan_equality nans_equal = nan_equality::ALL_EQUAL, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create a column of indices of all distinct rows in the input table. @@ -294,11 +295,11 @@ std::unique_ptr
distinct( */ std::unique_ptr distinct_indices( table_view const& input, - duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, - null_equality nulls_equal = null_equality::EQUAL, - nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, + null_equality nulls_equal = null_equality::EQUAL, + nan_equality nans_equal = nan_equality::ALL_EQUAL, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Create a new table without duplicate rows, preserving input order. @@ -325,10 +326,10 @@ std::unique_ptr distinct_indices( std::unique_ptr
stable_distinct( table_view const& input, std::vector const& keys, - duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, - null_equality nulls_equal = null_equality::EQUAL, - nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, + null_equality nulls_equal = null_equality::EQUAL, + nan_equality nans_equal = nan_equality::ALL_EQUAL, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Count the number of consecutive groups of equivalent rows in a column. diff --git a/cpp/include/cudf/strings/attributes.hpp b/cpp/include/cudf/strings/attributes.hpp index 85086e44a26..26f906b3102 100644 --- a/cpp/include/cudf/strings/attributes.hpp +++ b/cpp/include/cudf/strings/attributes.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { @@ -47,7 +48,7 @@ namespace strings { */ std::unique_ptr count_characters( strings_column_view const& input, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a column containing byte lengths @@ -65,7 +66,7 @@ std::unique_ptr count_characters( */ std::unique_ptr count_bytes( strings_column_view const& input, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Creates a numeric column with code point values (integers) for each @@ -85,7 +86,7 @@ std::unique_ptr count_bytes( */ std::unique_ptr code_points( strings_column_view const& input, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of strings_apis group diff --git a/cpp/include/cudf/strings/capitalize.hpp b/cpp/include/cudf/strings/capitalize.hpp index 57375e9ac6a..f8cbdc09748 100644 --- a/cpp/include/cudf/strings/capitalize.hpp +++ b/cpp/include/cudf/strings/capitalize.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -60,9 +61,9 @@ namespace strings { */ std::unique_ptr capitalize( strings_column_view const& input, - string_scalar const& delimiters = string_scalar("", true, cudf::get_default_stream()), - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + string_scalar const& delimiters = string_scalar("", true, cudf::get_default_stream()), + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Modifies first character of each word to upper-case and lower-cases the rest. @@ -95,7 +96,7 @@ std::unique_ptr title( strings_column_view const& input, string_character_types sequence_type = string_character_types::ALPHA, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Checks if the strings in the input column are title formatted. @@ -123,8 +124,8 @@ std::unique_ptr title( */ std::unique_ptr is_title( strings_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/case.hpp b/cpp/include/cudf/strings/case.hpp index 94191686a92..5403fa8db7e 100644 --- a/cpp/include/cudf/strings/case.hpp +++ b/cpp/include/cudf/strings/case.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -44,8 +45,8 @@ namespace strings { */ std::unique_ptr to_lower( strings_column_view const& strings, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Converts a column of strings to upper case. @@ -63,8 +64,8 @@ std::unique_ptr to_lower( */ std::unique_ptr to_upper( strings_column_view const& strings, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a column of strings converting lower case characters to @@ -83,8 +84,8 @@ std::unique_ptr to_upper( */ std::unique_ptr swapcase( strings_column_view const& strings, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/char_types/char_types.hpp b/cpp/include/cudf/strings/char_types/char_types.hpp index c6db5dab08a..da7a238a400 100644 --- a/cpp/include/cudf/strings/char_types/char_types.hpp +++ b/cpp/include/cudf/strings/char_types/char_types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -67,7 +68,7 @@ std::unique_ptr all_characters_of_type( string_character_types types, string_character_types verify_types = string_character_types::ALL_TYPES, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Filter specific character types from a column of strings. @@ -114,7 +115,7 @@ std::unique_ptr filter_characters_of_type( string_scalar const& replacement = string_scalar(""), string_character_types types_to_keep = string_character_types::ALL_TYPES, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/combine.hpp b/cpp/include/cudf/strings/combine.hpp index 568e8ac50ec..8cc735831b8 100644 --- a/cpp/include/cudf/strings/combine.hpp +++ b/cpp/include/cudf/strings/combine.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -77,10 +78,10 @@ enum class output_if_empty_list { */ std::unique_ptr join_strings( strings_column_view const& input, - string_scalar const& separator = string_scalar(""), - string_scalar const& narep = string_scalar("", false), - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + string_scalar const& separator = string_scalar(""), + string_scalar const& narep = string_scalar("", false), + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Concatenates a list of strings columns using separators for each row @@ -148,7 +149,7 @@ std::unique_ptr concatenate( string_scalar const& col_narep = string_scalar("", false), separator_on_nulls separate_nulls = separator_on_nulls::YES, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Row-wise concatenates the given list of strings columns and @@ -199,11 +200,11 @@ std::unique_ptr concatenate( */ std::unique_ptr concatenate( table_view const& strings_columns, - string_scalar const& separator = string_scalar(""), - string_scalar const& narep = string_scalar("", false), - separator_on_nulls separate_nulls = separator_on_nulls::YES, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + string_scalar const& separator = string_scalar(""), + string_scalar const& narep = string_scalar("", false), + separator_on_nulls separate_nulls = separator_on_nulls::YES, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Given a lists column of strings (each row is a list of strings), concatenates the strings @@ -270,7 +271,7 @@ std::unique_ptr join_list_elements( separator_on_nulls separate_nulls = separator_on_nulls::YES, output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Given a lists column of strings (each row is a list of strings), concatenates the strings @@ -329,7 +330,7 @@ std::unique_ptr join_list_elements( separator_on_nulls separate_nulls = separator_on_nulls::YES, output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/contains.hpp b/cpp/include/cudf/strings/contains.hpp index 341c146df92..f79a0f19e9c 100644 --- a/cpp/include/cudf/strings/contains.hpp +++ b/cpp/include/cudf/strings/contains.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -59,8 +60,8 @@ struct regex_program; std::unique_ptr contains_re( strings_column_view const& input, regex_program const& prog, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a boolean column identifying rows which @@ -87,8 +88,8 @@ std::unique_ptr contains_re( std::unique_ptr matches_re( strings_column_view const& input, regex_program const& prog, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the number of times the given regex_program's pattern @@ -115,8 +116,8 @@ std::unique_ptr matches_re( std::unique_ptr count_re( strings_column_view const& input, regex_program const& prog, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a boolean column identifying rows which @@ -163,7 +164,7 @@ std::unique_ptr like( string_scalar const& pattern, string_scalar const& escape_character = string_scalar(""), rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a boolean column identifying rows which @@ -204,7 +205,7 @@ std::unique_ptr like( strings_column_view const& patterns, string_scalar const& escape_character = string_scalar(""), rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_booleans.hpp b/cpp/include/cudf/strings/convert/convert_booleans.hpp index 9e9f25e800a..9c922361914 100644 --- a/cpp/include/cudf/strings/convert/convert_booleans.hpp +++ b/cpp/include/cudf/strings/convert/convert_booleans.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -44,8 +45,8 @@ namespace strings { std::unique_ptr to_booleans( strings_column_view const& input, string_scalar const& true_string, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a new strings column converting the boolean values from the @@ -66,8 +67,8 @@ std::unique_ptr from_booleans( column_view const& booleans, string_scalar const& true_string, string_scalar const& false_string, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_datetime.hpp b/cpp/include/cudf/strings/convert/convert_datetime.hpp index 81cce14b53b..b89384d718b 100644 --- a/cpp/include/cudf/strings/convert/convert_datetime.hpp +++ b/cpp/include/cudf/strings/convert/convert_datetime.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -88,8 +89,8 @@ std::unique_ptr to_timestamps( strings_column_view const& input, data_type timestamp_type, std::string_view format, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Verifies the given strings column can be parsed to timestamps using the provided format @@ -135,8 +136,8 @@ std::unique_ptr to_timestamps( std::unique_ptr is_timestamp( strings_column_view const& input, std::string_view format, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a new strings column converting a timestamp column into @@ -246,11 +247,11 @@ std::unique_ptr is_timestamp( */ std::unique_ptr from_timestamps( column_view const& timestamps, - std::string_view format = "%Y-%m-%dT%H:%M:%SZ", - strings_column_view const& names = strings_column_view(column_view{ + std::string_view format = "%Y-%m-%dT%H:%M:%SZ", + strings_column_view const& names = strings_column_view(column_view{ data_type{type_id::STRING}, 0, nullptr, nullptr, 0}), - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_durations.hpp b/cpp/include/cudf/strings/convert/convert_durations.hpp index a1f4e4ead1d..2db719a4f1f 100644 --- a/cpp/include/cudf/strings/convert/convert_durations.hpp +++ b/cpp/include/cudf/strings/convert/convert_durations.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -76,8 +77,8 @@ std::unique_ptr to_durations( strings_column_view const& input, data_type duration_type, std::string_view format, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a new strings column converting a duration column into @@ -126,9 +127,9 @@ std::unique_ptr to_durations( */ std::unique_ptr from_durations( column_view const& durations, - std::string_view format = "%D days %H:%M:%S", - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + std::string_view format = "%D days %H:%M:%S", + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_fixed_point.hpp b/cpp/include/cudf/strings/convert/convert_fixed_point.hpp index 8f37715967a..9911bea1948 100644 --- a/cpp/include/cudf/strings/convert/convert_fixed_point.hpp +++ b/cpp/include/cudf/strings/convert/convert_fixed_point.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -62,8 +63,8 @@ namespace strings { std::unique_ptr to_fixed_point( strings_column_view const& input, data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a new strings column converting the fixed-point values @@ -92,8 +93,8 @@ std::unique_ptr to_fixed_point( */ std::unique_ptr from_fixed_point( column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a boolean column identifying strings in which all @@ -123,9 +124,9 @@ std::unique_ptr from_fixed_point( */ std::unique_ptr is_fixed_point( strings_column_view const& input, - data_type decimal_type = data_type{type_id::DECIMAL64}, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + data_type decimal_type = data_type{type_id::DECIMAL64}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_floats.hpp b/cpp/include/cudf/strings/convert/convert_floats.hpp index a35cb68ef4e..feb5b528686 100644 --- a/cpp/include/cudf/strings/convert/convert_floats.hpp +++ b/cpp/include/cudf/strings/convert/convert_floats.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -48,8 +49,8 @@ namespace strings { std::unique_ptr to_floats( strings_column_view const& strings, data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a new strings column converting the float values from the @@ -71,8 +72,8 @@ std::unique_ptr to_floats( */ std::unique_ptr from_floats( column_view const& floats, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a boolean column identifying strings in which all @@ -97,8 +98,8 @@ std::unique_ptr from_floats( */ std::unique_ptr is_float( strings_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_integers.hpp b/cpp/include/cudf/strings/convert/convert_integers.hpp index 74ec5d315a2..82696811fdc 100644 --- a/cpp/include/cudf/strings/convert/convert_integers.hpp +++ b/cpp/include/cudf/strings/convert/convert_integers.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -55,8 +56,8 @@ namespace strings { std::unique_ptr to_integers( strings_column_view const& input, data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a new strings column converting the integer values from the @@ -76,8 +77,8 @@ std::unique_ptr to_integers( */ std::unique_ptr from_integers( column_view const& integers, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a boolean column identifying strings in which all @@ -105,8 +106,8 @@ std::unique_ptr from_integers( */ std::unique_ptr is_integer( strings_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a boolean column identifying strings in which all @@ -139,8 +140,8 @@ std::unique_ptr is_integer( std::unique_ptr is_integer( strings_column_view const& input, data_type int_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a new integer numeric column parsing hexadecimal values from the @@ -169,8 +170,8 @@ std::unique_ptr is_integer( std::unique_ptr hex_to_integers( strings_column_view const& input, data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a boolean column identifying strings in which all @@ -196,8 +197,8 @@ std::unique_ptr hex_to_integers( */ std::unique_ptr is_hex( strings_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a new strings column converting integer columns to hexadecimal @@ -229,8 +230,8 @@ std::unique_ptr is_hex( */ std::unique_ptr integers_to_hex( column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_ipv4.hpp b/cpp/include/cudf/strings/convert/convert_ipv4.hpp index 25ad7b86748..64f8a412ce9 100644 --- a/cpp/include/cudf/strings/convert/convert_ipv4.hpp +++ b/cpp/include/cudf/strings/convert/convert_ipv4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -55,8 +56,8 @@ namespace strings { */ std::unique_ptr ipv4_to_integers( strings_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Converts integers into IPv4 addresses as strings. @@ -80,8 +81,8 @@ std::unique_ptr ipv4_to_integers( */ std::unique_ptr integers_to_ipv4( column_view const& integers, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a boolean column identifying strings in which all @@ -107,8 +108,8 @@ std::unique_ptr integers_to_ipv4( */ std::unique_ptr is_ipv4( strings_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_lists.hpp b/cpp/include/cudf/strings/convert/convert_lists.hpp index dedf4e95138..a88bbe99492 100644 --- a/cpp/include/cudf/strings/convert/convert_lists.hpp +++ b/cpp/include/cudf/strings/convert/convert_lists.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -63,7 +64,7 @@ std::unique_ptr format_list_column( strings_column_view const& separators = strings_column_view(column_view{ data_type{type_id::STRING}, 0, nullptr, nullptr, 0}), rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_urls.hpp b/cpp/include/cudf/strings/convert/convert_urls.hpp index 902835081af..30988d2ff0a 100644 --- a/cpp/include/cudf/strings/convert/convert_urls.hpp +++ b/cpp/include/cudf/strings/convert/convert_urls.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -46,8 +47,8 @@ namespace strings { */ std::unique_ptr url_encode( strings_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Encodes each string using URL encoding. @@ -69,8 +70,8 @@ std::unique_ptr url_encode( */ std::unique_ptr url_decode( strings_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/detail/combine.hpp b/cpp/include/cudf/strings/detail/combine.hpp index 3b8ed0f4e0d..25214055787 100644 --- a/cpp/include/cudf/strings/detail/combine.hpp +++ b/cpp/include/cudf/strings/detail/combine.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -30,7 +31,7 @@ namespace detail { /** * @copydoc concatenate(table_view const&,string_scalar const&,string_scalar - * const&,rmm::mr::device_memory_resource*) + * const&,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -39,11 +40,11 @@ std::unique_ptr concatenate(table_view const& strings_columns, string_scalar const& narep, separator_on_nulls separate_nulls, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc join_strings(table_view const&,string_scalar const&,string_scalar - * const&,rmm::mr::device_memory_resource*) + * const&,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -51,11 +52,11 @@ std::unique_ptr join_strings(strings_column_view const& strings, string_scalar const& separator, string_scalar const& narep, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc join_list_elements(table_view const&,string_scalar const&,string_scalar - * const&,separator_on_nulls,output_if_empty_list,rmm::mr::device_memory_resource*) + * const&,separator_on_nulls,output_if_empty_list,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -65,7 +66,7 @@ std::unique_ptr join_list_elements(lists_column_view const& lists_string separator_on_nulls separate_nulls, output_if_empty_list empty_list_policy, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace strings diff --git a/cpp/include/cudf/strings/detail/concatenate.hpp b/cpp/include/cudf/strings/detail/concatenate.hpp index 511e240886a..b5dd5b9516a 100644 --- a/cpp/include/cudf/strings/detail/concatenate.hpp +++ b/cpp/include/cudf/strings/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -44,7 +45,7 @@ namespace detail { */ std::unique_ptr concatenate(host_span columns, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace strings diff --git a/cpp/include/cudf/strings/detail/convert/string_to_float.cuh b/cpp/include/cudf/strings/detail/convert/string_to_float.cuh index ab934750f9e..bbf56cf1446 100644 --- a/cpp/include/cudf/strings/detail/convert/string_to_float.cuh +++ b/cpp/include/cudf/strings/detail/convert/string_to_float.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,6 +102,9 @@ __device__ inline double stod(string_view const& d_str) ch = *in_ptr++; if (ch < '0' || ch > '9') break; exp_ten = (exp_ten * 10) + (int)(ch - '0'); + // Prevent integer overflow in exp_ten. 100,000,000 is the largest + // power of ten that can be multiplied by 10 without overflow. + if (exp_ten >= 100'000'000) { break; } } } } diff --git a/cpp/include/cudf/strings/detail/converters.hpp b/cpp/include/cudf/strings/detail/converters.hpp index 3337815342c..d212239264b 100644 --- a/cpp/include/cudf/strings/detail/converters.hpp +++ b/cpp/include/cudf/strings/detail/converters.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,63 +20,64 @@ #include #include +#include namespace cudf { namespace strings { namespace detail { /** - * @copydoc to_integers(strings_column_view const&,data_type,rmm::mr::device_memory_resource*) + * @copydoc to_integers(strings_column_view const&,data_type,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr to_integers(strings_column_view const& strings, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** - * @copydoc from_integers(strings_column_view const&,rmm::mr::device_memory_resource*) + * @copydoc from_integers(strings_column_view const&,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr from_integers(column_view const& integers, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** - * @copydoc to_floats(strings_column_view const&,data_type,rmm::mr::device_memory_resource*) + * @copydoc to_floats(strings_column_view const&,data_type,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr to_floats(strings_column_view const& strings, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** - * @copydoc from_floats(strings_column_view const&,rmm::mr::device_memory_resource*) + * @copydoc from_floats(strings_column_view const&,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr from_floats(column_view const& floats, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc to_booleans(strings_column_view const&,string_scalar - * const&,rmm::mr::device_memory_resource*) + * const&,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr to_booleans(strings_column_view const& strings, string_scalar const& true_string, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc from_booleans(strings_column_view const&,string_scalar const&,string_scalar - * const&,rmm::mr::device_memory_resource*) + * const&,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -84,11 +85,11 @@ std::unique_ptr from_booleans(column_view const& booleans, string_scalar const& true_string, string_scalar const& false_string, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc to_timestamps(strings_column_view const&,data_type,std::string_view, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -96,11 +97,11 @@ std::unique_ptr to_timestamps(strings_column_view const& strings, data_type timestamp_type, std::string_view format, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc from_timestamps(strings_column_view const&,std::string_view, - * strings_column_view const&,rmm::mr::device_memory_resource*) + * strings_column_view const&,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -108,11 +109,11 @@ std::unique_ptr from_timestamps(column_view const& timestamps, std::string_view format, strings_column_view const& names, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc to_durations(strings_column_view const&,data_type,std::string_view, - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -120,37 +121,37 @@ std::unique_ptr to_durations(strings_column_view const& strings, data_type duration_type, std::string_view format, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc from_durations(strings_column_view const&,std::string_view. - * rmm::mr::device_memory_resource*) + * rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr from_durations(column_view const& durations, std::string_view format, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** - * @copydoc to_fixed_point(strings_column_view const&,data_type,rmm::mr::device_memory_resource*) + * @copydoc to_fixed_point(strings_column_view const&,data_type,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr to_fixed_point(strings_column_view const& strings, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** - * @copydoc from_fixed_point(strings_column_view const&,rmm::mr::device_memory_resource*) + * @copydoc from_fixed_point(strings_column_view const&,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr from_fixed_point(column_view const& integers, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace strings diff --git a/cpp/include/cudf/strings/detail/copy_if_else.cuh b/cpp/include/cudf/strings/detail/copy_if_else.cuh index 08ba99e90d8..4db7651330b 100644 --- a/cpp/include/cudf/strings/detail/copy_if_else.cuh +++ b/cpp/include/cudf/strings/detail/copy_if_else.cuh @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -59,7 +60,7 @@ std::unique_ptr copy_if_else(StringIterLeft lhs_begin, StringIterRight rhs_begin, Filter filter_fn, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto strings_count = std::distance(lhs_begin, lhs_end); if (strings_count == 0) { return make_empty_column(type_id::STRING); } diff --git a/cpp/include/cudf/strings/detail/copy_range.hpp b/cpp/include/cudf/strings/detail/copy_range.hpp index e18f1fdc5ad..192c5b833c6 100644 --- a/cpp/include/cudf/strings/detail/copy_range.hpp +++ b/cpp/include/cudf/strings/detail/copy_range.hpp @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -53,7 +54,7 @@ std::unique_ptr copy_range(strings_column_view const& source, size_type source_end, size_type target_begin, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace strings diff --git a/cpp/include/cudf/strings/detail/copying.hpp b/cpp/include/cudf/strings/detail/copying.hpp index 7e82ad4c679..240cac17188 100644 --- a/cpp/include/cudf/strings/detail/copying.hpp +++ b/cpp/include/cudf/strings/detail/copying.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -53,7 +54,7 @@ std::unique_ptr copy_slice(strings_column_view const& strings, size_type start, size_type end, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Returns a new strings column created by shifting the rows by a specified offset. @@ -80,7 +81,7 @@ std::unique_ptr shift(strings_column_view const& input, size_type offset, scalar const& fill_value, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace strings diff --git a/cpp/include/cudf/strings/detail/fill.hpp b/cpp/include/cudf/strings/detail/fill.hpp index 43e3f6198f3..c5d005fbf75 100644 --- a/cpp/include/cudf/strings/detail/fill.hpp +++ b/cpp/include/cudf/strings/detail/fill.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -47,7 +48,7 @@ std::unique_ptr fill(strings_column_view const& strings, size_type end, string_scalar const& value, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace strings diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh index 06d959acffb..fcd74bebfe8 100644 --- a/cpp/include/cudf/strings/detail/gather.cuh +++ b/cpp/include/cudf/strings/detail/gather.cuh @@ -19,22 +19,19 @@ #include #include #include -#include #include +#include #include #include -#include #include #include +#include #include -#include #include #include #include -#include -#include #include namespace cudf { @@ -225,9 +222,9 @@ rmm::device_uvector gather_chars(StringIterator strings_begin, MapIterator map_begin, MapIterator map_end, cudf::detail::input_offsetalator const offsets, - size_type chars_bytes, + int64_t chars_bytes, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto const output_count = std::distance(map_begin, map_end); if (output_count == 0) return rmm::device_uvector(0, stream, mr); @@ -238,9 +235,9 @@ rmm::device_uvector gather_chars(StringIterator strings_begin, constexpr int warps_per_threadblock = 4; // String parallel strategy will be used if average string length is above this threshold. // Otherwise, char parallel strategy will be used. - constexpr size_type string_parallel_threshold = 32; + constexpr int64_t string_parallel_threshold = 32; - size_type average_string_length = chars_bytes / output_count; + int64_t const average_string_length = chars_bytes / output_count; if (average_string_length > string_parallel_threshold) { constexpr int max_threadblocks = 65536; @@ -290,7 +287,7 @@ std::unique_ptr gather(strings_column_view const& strings, MapIterator begin, MapIterator end, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto const output_count = std::distance(begin, end); if (output_count == 0) return make_empty_column(type_id::STRING); @@ -301,7 +298,7 @@ std::unique_ptr gather(strings_column_view const& strings, strings.is_empty() ? make_empty_column(type_id::INT32)->view() : strings.offsets(), strings.offset()); - auto offsets_itr = thrust::make_transform_iterator( + auto sizes_itr = thrust::make_transform_iterator( begin, cuda::proclaim_return_type( [d_strings = *d_strings, d_in_offsets] __device__(size_type idx) { @@ -309,8 +306,8 @@ std::unique_ptr gather(strings_column_view const& strings, if (not d_strings.is_valid(idx)) { return 0; } return static_cast(d_in_offsets[idx + 1] - d_in_offsets[idx]); })); - auto [out_offsets_column, total_bytes] = - cudf::detail::make_offsets_child_column(offsets_itr, offsets_itr + output_count, stream, mr); + auto [out_offsets_column, total_bytes] = cudf::strings::detail::make_offsets_child_column( + sizes_itr, sizes_itr + output_count, stream, mr); // build chars column auto const offsets_view = @@ -354,7 +351,7 @@ std::unique_ptr gather(strings_column_view const& strings, MapIterator end, bool nullify_out_of_bounds, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (nullify_out_of_bounds) return gather(strings, begin, end, stream, mr); return gather(strings, begin, end, stream, mr); diff --git a/cpp/include/cudf/strings/detail/merge.cuh b/cpp/include/cudf/strings/detail/merge.cuh deleted file mode 100644 index f05e957783f..00000000000 --- a/cpp/include/cudf/strings/detail/merge.cuh +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include - -namespace cudf { -namespace strings { -namespace detail { -/** - * @brief Merges two strings columns. - * - * Caller must set the validity mask in the output column. - * - * @tparam row_order_iterator This must be an iterator for type thrust::tuple. - * - * @param lhs First column. - * @param rhs Second column. - * @param row_order Indexes for each column. - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. - */ -template -std::unique_ptr merge(strings_column_view const& lhs, - strings_column_view const& rhs, - row_order_iterator begin, - row_order_iterator end, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - using cudf::detail::side; - size_type strings_count = static_cast(std::distance(begin, end)); - if (strings_count == 0) return make_empty_column(type_id::STRING); - - auto lhs_column = column_device_view::create(lhs.parent(), stream); - auto d_lhs = *lhs_column; - auto rhs_column = column_device_view::create(rhs.parent(), stream); - auto d_rhs = *rhs_column; - - // caller will set the null mask - rmm::device_buffer null_mask{0, stream, mr}; - size_type null_count = lhs.null_count() + rhs.null_count(); - if (null_count > 0) - null_mask = cudf::detail::create_null_mask(strings_count, mask_state::ALL_VALID, stream, mr); - - // build offsets column - auto offsets_transformer = - cuda::proclaim_return_type([d_lhs, d_rhs] __device__(auto index_pair) { - auto const [side, index] = index_pair; - if (side == side::LEFT ? d_lhs.is_null(index) : d_rhs.is_null(index)) return 0; - auto d_str = - side == side::LEFT ? d_lhs.element(index) : d_rhs.element(index); - return d_str.size_bytes(); - }); - auto offsets_transformer_itr = thrust::make_transform_iterator(begin, offsets_transformer); - auto [offsets_column, bytes] = cudf::detail::make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); - auto d_offsets = offsets_column->view().template data(); - - // create the chars column - rmm::device_uvector chars(bytes, stream, mr); - auto d_chars = chars.data(); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings_count, - [d_lhs, d_rhs, begin, d_offsets, d_chars] __device__(size_type idx) { - auto const [side, index] = begin[idx]; - if (side == side::LEFT ? d_lhs.is_null(index) : d_rhs.is_null(index)) return; - auto d_str = side == side::LEFT ? d_lhs.element(index) - : d_rhs.element(index); - memcpy(d_chars + d_offsets[idx], d_str.data(), d_str.size_bytes()); - }); - - return make_strings_column( - strings_count, std::move(offsets_column), chars.release(), null_count, std::move(null_mask)); -} - -} // namespace detail -} // namespace strings -} // namespace cudf diff --git a/cpp/include/cudf/strings/detail/merge.hpp b/cpp/include/cudf/strings/detail/merge.hpp new file mode 100644 index 00000000000..35fd9c0593d --- /dev/null +++ b/cpp/include/cudf/strings/detail/merge.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include + +namespace cudf ::strings ::detail { +/** + * @brief Merges two strings columns + * + * @param lhs First column + * @param rhs Second column + * @param row_order Indices for each column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column + */ +std::unique_ptr merge(strings_column_view const& lhs, + strings_column_view const& rhs, + cudf::detail::index_vector const& row_order, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + +} // namespace cudf::strings::detail diff --git a/cpp/include/cudf/strings/detail/replace.hpp b/cpp/include/cudf/strings/detail/replace.hpp index 28027291b28..aad89beb47e 100644 --- a/cpp/include/cudf/strings/detail/replace.hpp +++ b/cpp/include/cudf/strings/detail/replace.hpp @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -28,24 +29,24 @@ namespace detail { /** * @copydoc cudf::strings::replace(strings_column_view const&, string_scalar const&, - * string_scalar const&, int32_t, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + * string_scalar const&, int32_t, rmm::cuda_stream_view, rmm::device_async_resource_ref) */ std::unique_ptr replace(strings_column_view const& strings, string_scalar const& target, string_scalar const& repl, int32_t maxrepl, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::strings::replace(strings_column_view const&, strings_column_view const&, - * strings_column_view const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + * strings_column_view const&, rmm::cuda_stream_view, rmm::device_async_resource_ref) */ std::unique_ptr replace(strings_column_view const& strings, strings_column_view const& targets, strings_column_view const& repls, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Replaces any null string entries with the given string. @@ -68,18 +69,36 @@ std::unique_ptr replace(strings_column_view const& strings, std::unique_ptr replace_nulls(strings_column_view const& strings, string_scalar const& repl, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::strings::replace_slice(strings_column_view const&, string_scalar const&, - * size_type, size_type, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + * size_type, size_type, rmm::cuda_stream_view, rmm::device_async_resource_ref) */ std::unique_ptr replace_slice(strings_column_view const& strings, string_scalar const& repl, size_type start, size_type stop, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); + +/** + * @brief Return a copy of `input` replacing any `values_to_replace[i]` + * found with `replacement_values[i]` + * + * @param input The column to find and replace values + * @param values_to_replace The values to find + * @param replacement_values The corresponding replacement values + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Copy of `input` with specified values replaced + */ +std::unique_ptr find_and_replace_all( + cudf::strings_column_view const& input, + cudf::strings_column_view const& values_to_replace, + cudf::strings_column_view const& replacement_values, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); } // namespace detail } // namespace strings diff --git a/cpp/include/cudf/strings/detail/scan.hpp b/cpp/include/cudf/strings/detail/scan.hpp index 611e32e28cd..f32afa64a72 100644 --- a/cpp/include/cudf/strings/detail/scan.hpp +++ b/cpp/include/cudf/strings/detail/scan.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -40,7 +41,7 @@ template std::unique_ptr scan_inclusive(column_view const& input, bitmask_type const* mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace strings diff --git a/cpp/include/cudf/strings/detail/scatter.cuh b/cpp/include/cudf/strings/detail/scatter.cuh index 8b8c11dcd5c..87f0e7ae47c 100644 --- a/cpp/include/cudf/strings/detail/scatter.cuh +++ b/cpp/include/cudf/strings/detail/scatter.cuh @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -63,7 +64,7 @@ std::unique_ptr scatter(SourceIterator begin, MapIterator scatter_map, strings_column_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (target.is_empty()) return make_empty_column(type_id::STRING); diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh index 49c4be88ca5..f105a6dc546 100644 --- a/cpp/include/cudf/strings/detail/strings_children.cuh +++ b/cpp/include/cudf/strings/detail/strings_children.cuh @@ -17,12 +17,15 @@ #include #include +#include #include +#include #include #include #include #include +#include #include #include @@ -33,94 +36,6 @@ namespace cudf { namespace strings { namespace detail { -/** - * @brief Creates child offsets and chars data by applying the template function that - * can be used for computing the output size of each string as well as create the output - * - * @throws std::overflow_error if the output strings column exceeds the column size limit - * - * @tparam SizeAndExecuteFunction Function must accept an index and return a size. - * It must also have members d_offsets and d_chars which are set to - * memory containing the offsets and chars columns during write. - * - * @param size_and_exec_fn This is called twice. Once for the output size of each string - * and once again to fill in the memory pointed to by d_chars. - * @param exec_size Number of rows for executing the `size_and_exec_fn` function. - * @param strings_count Number of strings. - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned columns' device memory. - * @return Offsets child column and chars data for a strings column - */ -template -auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn, - size_type exec_size, - size_type strings_count, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto offsets_column = make_numeric_column( - data_type{type_to_id()}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); - auto offsets_view = offsets_column->mutable_view(); - auto d_offsets = offsets_view.template data(); - size_and_exec_fn.d_offsets = d_offsets; - - // This is called twice -- once for offsets and once for chars. - // Reducing the number of places size_and_exec_fn is inlined speeds up compile time. - auto for_each_fn = [exec_size, stream](SizeAndExecuteFunction& size_and_exec_fn) { - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - exec_size, - size_and_exec_fn); - }; - - // Compute the output sizes - for_each_fn(size_and_exec_fn); - - // Convert the sizes to offsets - auto const bytes = - cudf::detail::sizes_to_offsets(d_offsets, d_offsets + strings_count + 1, d_offsets, stream); - CUDF_EXPECTS(bytes <= std::numeric_limits::max(), - "Size of output exceeds the column size limit", - std::overflow_error); - - // Now build the chars column - rmm::device_uvector chars(bytes, stream, mr); - - // Execute the function fn again to fill the chars column. - // Note that if the output chars column has zero size, the function fn should not be called to - // avoid accidentally overwriting the offsets. - if (bytes > 0) { - size_and_exec_fn.d_chars = chars.data(); - for_each_fn(size_and_exec_fn); - } - - return std::pair(std::move(offsets_column), std::move(chars)); -} - -/** - * @brief Creates child offsets and chars columns by applying the template function that - * can be used for computing the output size of each string as well as create the output. - * - * @tparam SizeAndExecuteFunction Function must accept an index and return a size. - * It must also have members d_offsets and d_chars which are set to - * memory containing the offsets and chars columns during write. - * - * @param size_and_exec_fn This is called twice. Once for the output size of each string - * and once again to fill in the memory pointed to by d_chars. - * @param strings_count Number of strings. - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned columns' device memory. - * @return offsets child column and chars child column for a strings column - */ -template -auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn, - size_type strings_count, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - return make_strings_children(size_and_exec_fn, strings_count, strings_count, stream, mr); -} - /** * @brief Create an offsets column to be a child of a compound column * @@ -142,7 +57,7 @@ std::pair, int64_t> make_offsets_child_column( InputIterator begin, InputIterator end, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto constexpr size_type_max = static_cast(std::numeric_limits::max()); auto const lcount = static_cast(std::distance(begin, end)); @@ -163,22 +78,170 @@ std::pair, int64_t> make_offsets_child_column( }); auto input_itr = cudf::detail::make_counting_transform_iterator(0, map_fn); // Use the sizes-to-offsets iterator to compute the total number of elements - auto const total_elements = + auto const total_bytes = cudf::detail::sizes_to_offsets(input_itr, input_itr + strings_count + 1, d_offsets, stream); - // TODO: replace exception with if-statement when enabling creating INT64 offsets - CUDF_EXPECTS(total_elements <= size_type_max, - "Size of output exceeds the character size limit", + auto const threshold = get_offset64_threshold(); + CUDF_EXPECTS(is_large_strings_enabled() || (total_bytes < threshold), + "Size of output exceeds the column size limit", std::overflow_error); - // if (total_elements >= get_offset64_threshold()) { - // // recompute as int64 offsets when above the threshold - // offsets_column = make_numeric_column( - // data_type{type_id::INT64}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); - // auto d_offsets64 = offsets_column->mutable_view().template data(); - // sizes_to_offsets(input_itr, input_itr + strings_count + 1, d_offsets64, stream); - // } - - return std::pair(std::move(offsets_column), total_elements); + if (total_bytes >= get_offset64_threshold()) { + // recompute as int64 offsets when above the threshold + offsets_column = make_numeric_column( + data_type{type_id::INT64}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); + auto d_offsets64 = offsets_column->mutable_view().template data(); + cudf::detail::sizes_to_offsets(input_itr, input_itr + strings_count + 1, d_offsets64, stream); + } + + return std::pair(std::move(offsets_column), total_bytes); +} + +/** + * @brief Kernel used by make_strings_children for calling the given functor + * + * @tparam SizeAndExecuteFunction Functor type to call in each thread + * + * @param fn Functor to call in each thread + * @param exec_size Total number of threads to be processed by this kernel + */ +template +CUDF_KERNEL void strings_children_kernel(SizeAndExecuteFunction fn, size_type exec_size) +{ + auto tid = cudf::detail::grid_1d::global_thread_id(); + if (tid < exec_size) { fn(tid); } +} + +/** + * @brief Creates child offsets and chars data by applying the template function that + * can be used for computing the output size of each string as well as create the output + * + * The `size_and_exec_fn` is expected declare an operator() function with a size_type parameter + * and 3 member variables: + * - `d_sizes`: output size in bytes of each output row for the 1st pass call + * - `d_chars`: output buffer for new string data for the 2nd pass call + * - `d_offsets`: used for addressing the specific output row data in `d_chars` + * + * The 1st pass call computes the output sizes and is identified by `d_chars==nullptr`. + * Null rows should be set with an output size of 0. + * + * @code{.cpp} + * struct size_and_exec_fn { + * size_type* d_sizes; + * char* d_chars; + * input_offsetalator d_offsets; + * + * __device__ void operator()(size_type thread_idx) + * { + * // functor-specific logic to resolve out_idx from thread_idx + * if( !d_chars ) { + * d_sizes[out_idx] = output_size; + * } else { + * auto d_output = d_chars + d_offsets[out_idx]; + * // write characters to d_output + * } + * } + * }; + * @endcode + * + * @tparam SizeAndExecuteFunction Functor type with an operator() function accepting + * an index parameter and three member variables: `size_type* d_sizes` + * `char* d_chars`, and `input_offsetalator d_offsets`. + * + * @param size_and_exec_fn This is called twice. Once for the output size of each string + * and once again to fill in the memory pointed to by d_chars. + * @param exec_size Number of threads for executing the `size_and_exec_fn` function + * @param strings_count Number of strings + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned columns' device memory + * @return Offsets child column and chars vector for creating a strings column + */ +template +auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn, + size_type exec_size, + size_type strings_count, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + // This is called twice -- once for computing sizes and once for writing chars. + // Reducing the number of places size_and_exec_fn is inlined speeds up compile time. + auto for_each_fn = [exec_size, stream](SizeAndExecuteFunction& size_and_exec_fn) { + auto constexpr block_size = 256; + auto grid = cudf::detail::grid_1d{exec_size, block_size}; + strings_children_kernel<<>>(size_and_exec_fn, + exec_size); + }; + + // Compute the output sizes + auto output_sizes = rmm::device_uvector(strings_count, stream); + size_and_exec_fn.d_sizes = output_sizes.data(); + size_and_exec_fn.d_chars = nullptr; + for_each_fn(size_and_exec_fn); + + // Convert the sizes to offsets + auto [offsets_column, bytes] = cudf::strings::detail::make_offsets_child_column( + output_sizes.begin(), output_sizes.end(), stream, mr); + size_and_exec_fn.d_offsets = + cudf::detail::offsetalator_factory::make_input_iterator(offsets_column->view()); + + // Now build the chars column + rmm::device_uvector chars(bytes, stream, mr); + size_and_exec_fn.d_chars = chars.data(); + + // Execute the function fn again to fill in the chars data. + if (bytes > 0) { for_each_fn(size_and_exec_fn); } + + return std::pair(std::move(offsets_column), std::move(chars)); +} + +/** + * @brief Creates child offsets and chars columns by applying the template function that + * can be used for computing the output size of each string as well as create the output + * + * The `size_and_exec_fn` is expected declare an operator() function with a size_type parameter + * and 3 member variables: + * - `d_sizes`: output size in bytes of each output row for the 1st pass call + * - `d_chars`: output buffer for new string data for the 2nd pass call + * - `d_offsets`: used for addressing the specific output row data in `d_chars` + * + * The 1st pass call computes the output sizes and is identified by `d_chars==nullptr`. + * Null rows should be set with an output size of 0. + * + * @code{.cpp} + * struct size_and_exec_fn { + * size_type* d_sizes; + * char* d_chars; + * input_offsetalator d_offsets; + * + * __device__ void operator()(size_type idx) + * { + * if( !d_chars ) { + * d_sizes[idx] = output_size; + * } else { + * auto d_output = d_chars + d_offsets[idx]; + * // write characters to d_output + * } + * } + * }; + * @endcode + * + * @tparam SizeAndExecuteFunction Functor type with an operator() function accepting + * an index parameter and three member variables: `size_type* d_sizes` + * `char* d_chars`, and `input_offsetalator d_offsets`. + * + * @param size_and_exec_fn This is called twice. Once for the output size of each string + * and once again to fill in the memory pointed to by `d_chars`. + * @param strings_count Number of strings + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned columns' device memory + * @return Offsets child column and chars vector for creating a strings column + */ +template +auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn, + size_type strings_count, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + return make_strings_children(size_and_exec_fn, strings_count, strings_count, stream, mr); } } // namespace detail diff --git a/cpp/include/cudf/strings/detail/strings_column_factories.cuh b/cpp/include/cudf/strings/detail/strings_column_factories.cuh index 8e19f08a5cc..a3221038eed 100644 --- a/cpp/include/cudf/strings/detail/strings_column_factories.cuh +++ b/cpp/include/cudf/strings/detail/strings_column_factories.cuh @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -73,7 +74,7 @@ template std::unique_ptr make_strings_column(IndexPairIterator begin, IndexPairIterator end, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); size_type strings_count = thrust::distance(begin, end); @@ -85,9 +86,10 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, return (item.first != nullptr ? static_cast(item.second) : size_type{0}); }); auto offsets_transformer_itr = thrust::make_transform_iterator(begin, offsets_transformer); - auto [offsets_column, bytes] = cudf::detail::make_offsets_child_column( + auto [offsets_column, bytes] = cudf::strings::detail::make_offsets_child_column( offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); - auto offsets_view = offsets_column->view(); + auto const d_offsets = + cudf::detail::offsetalator_factory::make_input_iterator(offsets_column->view()); // create null mask auto validator = [] __device__(string_index_pair const item) { return item.first != nullptr; }; @@ -97,11 +99,10 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, (null_count > 0) ? std::move(new_nulls.first) : rmm::device_buffer{0, stream, mr}; // build chars column - auto chars_data = [offsets_view, bytes = bytes, begin, strings_count, null_count, stream, mr] { + auto chars_data = [d_offsets, bytes = bytes, begin, strings_count, null_count, stream, mr] { auto const avg_bytes_per_row = bytes / std::max(strings_count - null_count, 1); // use a character-parallel kernel for long string lengths if (avg_bytes_per_row > FACTORY_BYTES_PER_ROW_THRESHOLD) { - auto const d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets_view); auto const str_begin = thrust::make_transform_iterator( begin, cuda::proclaim_return_type([] __device__(auto ip) { return string_view{ip.first, ip.second}; @@ -120,12 +121,11 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, auto d_chars = chars_data.data(); auto copy_chars = [d_chars] __device__(auto item) { string_index_pair const str = thrust::get<0>(item); - size_type const offset = thrust::get<1>(item); + int64_t const offset = thrust::get<1>(item); if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second); }; thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_zip_iterator( - thrust::make_tuple(begin, offsets_view.template begin())), + thrust::make_zip_iterator(thrust::make_tuple(begin, d_offsets)), strings_count, copy_chars); return chars_data; @@ -163,25 +163,19 @@ std::unique_ptr make_strings_column(CharIterator chars_begin, size_type null_count, rmm::device_buffer&& null_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); size_type strings_count = thrust::distance(offsets_begin, offsets_end) - 1; - size_type bytes = std::distance(chars_begin, chars_end) * sizeof(char); - if (strings_count == 0) return make_empty_column(type_id::STRING); + if (strings_count == 0) { return make_empty_column(type_id::STRING); } + int64_t const bytes = std::distance(chars_begin, chars_end) * sizeof(char); CUDF_EXPECTS(bytes >= 0, "invalid offsets data"); // build offsets column -- this is the number of strings + 1 - auto offsets_column = make_numeric_column( - data_type{type_to_id()}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); - auto offsets_view = offsets_column->mutable_view(); - thrust::transform(rmm::exec_policy(stream), - offsets_begin, - offsets_end, - offsets_view.data(), - cuda::proclaim_return_type( - [] __device__(auto offset) { return static_cast(offset); })); + auto [offsets_column, computed_bytes] = + cudf::strings::detail::make_offsets_child_column(offsets_begin, offsets_end, stream, mr); + CUDF_EXPECTS(bytes == computed_bytes, "unexpected byte count"); // build chars column rmm::device_uvector chars_data(bytes, stream, mr); diff --git a/cpp/include/cudf/strings/detail/utilities.hpp b/cpp/include/cudf/strings/detail/utilities.hpp index 8d8065dbcaf..4467a9d0023 100644 --- a/cpp/include/cudf/strings/detail/utilities.hpp +++ b/cpp/include/cudf/strings/detail/utilities.hpp @@ -22,11 +22,30 @@ #include #include +#include namespace cudf { namespace strings { namespace detail { +/** + * @brief Create an offsets column to be a child of a strings column + * + * This will return the properly typed column to be filled in by the caller + * given the number of bytes to address. + * + * @param chars_bytes Number of bytes for the chars in the strings column + * @param count Number of elements for the offsets column. + * This is the number of rows in the parent strings column +1. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return The offsets child column for a strings column + */ +std::unique_ptr create_offsets_child_column(int64_t chars_bytes, + size_type count, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + /** * @brief Creates a string_view vector from a strings column. * @@ -38,7 +57,7 @@ namespace detail { rmm::device_uvector create_string_vector_from_column( cudf::strings_column_view const strings, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Return the threshold size for a strings column to use int64 offsets @@ -52,6 +71,15 @@ rmm::device_uvector create_string_vector_from_column( */ int64_t get_offset64_threshold(); +/** + * @brief Checks if large strings is enabled + * + * This checks the setting in the environment variable LIBCUDF_LARGE_STRINGS_ENABLED. + * + * @return true if large strings are supported + */ +bool is_large_strings_enabled(); + /** * @brief Return a normalized offset value from a strings offsets column * diff --git a/cpp/include/cudf/strings/extract.hpp b/cpp/include/cudf/strings/extract.hpp index a4db1ac46da..4138e1e59d5 100644 --- a/cpp/include/cudf/strings/extract.hpp +++ b/cpp/include/cudf/strings/extract.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -62,8 +63,8 @@ struct regex_program; std::unique_ptr
extract( strings_column_view const& input, regex_program const& prog, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a lists column of strings where each string column row corresponds to the @@ -98,8 +99,8 @@ std::unique_ptr
extract( std::unique_ptr extract_all_record( strings_column_view const& input, regex_program const& prog, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/find.hpp b/cpp/include/cudf/strings/find.hpp index c1aa8b294b3..c116dbc2fe1 100644 --- a/cpp/include/cudf/strings/find.hpp +++ b/cpp/include/cudf/strings/find.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -55,10 +56,10 @@ namespace strings { std::unique_ptr find( strings_column_view const& input, string_scalar const& target, - size_type start = 0, - size_type stop = -1, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + size_type start = 0, + size_type stop = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a column of character position values where the target @@ -86,10 +87,10 @@ std::unique_ptr find( std::unique_ptr rfind( strings_column_view const& input, string_scalar const& target, - size_type start = 0, - size_type stop = -1, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + size_type start = 0, + size_type stop = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a column of character position values where the target @@ -114,9 +115,9 @@ std::unique_ptr rfind( std::unique_ptr find( strings_column_view const& input, strings_column_view const& target, - size_type start = 0, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + size_type start = 0, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a column of boolean values for each string where true indicates @@ -136,8 +137,8 @@ std::unique_ptr find( std::unique_ptr contains( strings_column_view const& input, string_scalar const& target, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a column of boolean values for each string where true indicates @@ -161,8 +162,8 @@ std::unique_ptr contains( std::unique_ptr contains( strings_column_view const& input, strings_column_view const& targets, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a column of boolean values for each string where true indicates @@ -183,8 +184,8 @@ std::unique_ptr contains( std::unique_ptr starts_with( strings_column_view const& input, string_scalar const& target, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a column of boolean values for each string where true indicates @@ -209,8 +210,8 @@ std::unique_ptr starts_with( std::unique_ptr starts_with( strings_column_view const& input, strings_column_view const& targets, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a column of boolean values for each string where true indicates @@ -231,8 +232,8 @@ std::unique_ptr starts_with( std::unique_ptr ends_with( strings_column_view const& input, string_scalar const& target, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a column of boolean values for each string where true indicates @@ -257,8 +258,8 @@ std::unique_ptr ends_with( std::unique_ptr ends_with( strings_column_view const& input, strings_column_view const& targets, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings } // namespace cudf diff --git a/cpp/include/cudf/strings/find_multiple.hpp b/cpp/include/cudf/strings/find_multiple.hpp index 06b851c5012..c2e82aa6f1a 100644 --- a/cpp/include/cudf/strings/find_multiple.hpp +++ b/cpp/include/cudf/strings/find_multiple.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -57,8 +58,8 @@ namespace strings { std::unique_ptr find_multiple( strings_column_view const& input, strings_column_view const& targets, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/findall.hpp b/cpp/include/cudf/strings/findall.hpp index 379b9624dc6..abc1d28ee4c 100644 --- a/cpp/include/cudf/strings/findall.hpp +++ b/cpp/include/cudf/strings/findall.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -64,8 +65,8 @@ struct regex_program; std::unique_ptr findall( strings_column_view const& input, regex_program const& prog, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/padding.hpp b/cpp/include/cudf/strings/padding.hpp index f0cb351eeda..f1382d6ea29 100644 --- a/cpp/include/cudf/strings/padding.hpp +++ b/cpp/include/cudf/strings/padding.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -58,10 +59,10 @@ namespace strings { std::unique_ptr pad( strings_column_view const& input, size_type width, - side_type side = side_type::RIGHT, - std::string_view fill_char = " ", - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + side_type side = side_type::RIGHT, + std::string_view fill_char = " ", + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Add '0' as padding to the left of each string. @@ -90,8 +91,8 @@ std::unique_ptr pad( std::unique_ptr zfill( strings_column_view const& input, size_type width, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/repeat_strings.hpp b/cpp/include/cudf/strings/repeat_strings.hpp index 7dc9c33f579..cbf1edc8331 100644 --- a/cpp/include/cudf/strings/repeat_strings.hpp +++ b/cpp/include/cudf/strings/repeat_strings.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -59,8 +60,8 @@ namespace strings { std::unique_ptr repeat_string( string_scalar const& input, size_type repeat_times, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Repeat each string in the given strings column a given number of times @@ -90,8 +91,8 @@ std::unique_ptr repeat_string( std::unique_ptr repeat_strings( strings_column_view const& input, size_type repeat_times, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Repeat each string in the given strings column by the numbers of times given in another @@ -127,8 +128,8 @@ std::unique_ptr repeat_strings( std::unique_ptr repeat_strings( strings_column_view const& input, column_view const& repeat_times, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/replace.hpp b/cpp/include/cudf/strings/replace.hpp index 2476a41e886..9525db44b69 100644 --- a/cpp/include/cudf/strings/replace.hpp +++ b/cpp/include/cudf/strings/replace.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -67,9 +68,9 @@ std::unique_ptr replace( strings_column_view const& input, string_scalar const& target, string_scalar const& repl, - cudf::size_type maxrepl = -1, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + cudf::size_type maxrepl = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief This function replaces each string in the column with the provided @@ -107,11 +108,11 @@ std::unique_ptr replace( */ std::unique_ptr replace_slice( strings_column_view const& input, - string_scalar const& repl = string_scalar(""), - size_type start = 0, - size_type stop = -1, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + string_scalar const& repl = string_scalar(""), + size_type start = 0, + size_type stop = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Replaces substrings matching a list of targets with the corresponding @@ -156,8 +157,8 @@ std::unique_ptr replace( strings_column_view const& input, strings_column_view const& targets, strings_column_view const& repls, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/replace_re.hpp b/cpp/include/cudf/strings/replace_re.hpp index 77db2882253..f61f9585144 100644 --- a/cpp/include/cudf/strings/replace_re.hpp +++ b/cpp/include/cudf/strings/replace_re.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include @@ -59,7 +60,7 @@ std::unique_ptr replace_re( string_scalar const& replacement = string_scalar(""), std::optional max_replace_count = std::nullopt, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief For each string, replaces any character sequence matching the given patterns @@ -81,9 +82,9 @@ std::unique_ptr replace_re( strings_column_view const& input, std::vector const& patterns, strings_column_view const& replacements, - regex_flags const flags = regex_flags::DEFAULT, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + regex_flags const flags = regex_flags::DEFAULT, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief For each string, replaces any character sequence matching the given regex @@ -107,8 +108,8 @@ std::unique_ptr replace_with_backrefs( strings_column_view const& input, regex_program const& prog, std::string_view replacement, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); } // namespace strings } // namespace cudf diff --git a/cpp/include/cudf/strings/reverse.hpp b/cpp/include/cudf/strings/reverse.hpp index 4fc8fbf67c2..86656693c8b 100644 --- a/cpp/include/cudf/strings/reverse.hpp +++ b/cpp/include/cudf/strings/reverse.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -47,8 +48,8 @@ namespace strings { */ std::unique_ptr reverse( strings_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/slice.hpp b/cpp/include/cudf/strings/slice.hpp index f106663be9b..e2be6abd344 100644 --- a/cpp/include/cudf/strings/slice.hpp +++ b/cpp/include/cudf/strings/slice.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -64,7 +65,7 @@ std::unique_ptr slice_strings( numeric_scalar const& stop = numeric_scalar(0, false), numeric_scalar const& step = numeric_scalar(1), rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a new strings column that contains substrings of the @@ -108,8 +109,8 @@ std::unique_ptr slice_strings( strings_column_view const& input, column_view const& starts, column_view const& stops, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/split/partition.hpp b/cpp/include/cudf/strings/split/partition.hpp index 25eedf1e86b..0a837034ba1 100644 --- a/cpp/include/cudf/strings/split/partition.hpp +++ b/cpp/include/cudf/strings/split/partition.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -60,9 +61,9 @@ namespace strings { */ std::unique_ptr
partition( strings_column_view const& input, - string_scalar const& delimiter = string_scalar(""), - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + string_scalar const& delimiter = string_scalar(""), + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a set of 3 columns by splitting each string using the @@ -94,9 +95,9 @@ std::unique_ptr
partition( */ std::unique_ptr
rpartition( strings_column_view const& input, - string_scalar const& delimiter = string_scalar(""), - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + string_scalar const& delimiter = string_scalar(""), + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/split/split.hpp b/cpp/include/cudf/strings/split/split.hpp index a34a59577a0..d5c44406ca7 100644 --- a/cpp/include/cudf/strings/split/split.hpp +++ b/cpp/include/cudf/strings/split/split.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -54,10 +55,10 @@ namespace strings { */ std::unique_ptr
split( strings_column_view const& strings_column, - string_scalar const& delimiter = string_scalar(""), - size_type maxsplit = -1, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + string_scalar const& delimiter = string_scalar(""), + size_type maxsplit = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a list of columns by splitting each string using the @@ -84,10 +85,10 @@ std::unique_ptr
split( */ std::unique_ptr
rsplit( strings_column_view const& strings_column, - string_scalar const& delimiter = string_scalar(""), - size_type maxsplit = -1, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + string_scalar const& delimiter = string_scalar(""), + size_type maxsplit = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Splits individual strings elements into a list of strings. @@ -158,10 +159,10 @@ std::unique_ptr
rsplit( */ std::unique_ptr split_record( strings_column_view const& strings, - string_scalar const& delimiter = string_scalar(""), - size_type maxsplit = -1, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + string_scalar const& delimiter = string_scalar(""), + size_type maxsplit = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Splits individual strings elements into a list of strings starting @@ -237,10 +238,10 @@ std::unique_ptr split_record( */ std::unique_ptr rsplit_record( strings_column_view const& strings, - string_scalar const& delimiter = string_scalar(""), - size_type maxsplit = -1, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + string_scalar const& delimiter = string_scalar(""), + size_type maxsplit = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/split/split_re.hpp b/cpp/include/cudf/strings/split/split_re.hpp index f1736cb7e0c..81595fa7ed4 100644 --- a/cpp/include/cudf/strings/split/split_re.hpp +++ b/cpp/include/cudf/strings/split/split_re.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -82,9 +83,9 @@ struct regex_program; std::unique_ptr
split_re( strings_column_view const& input, regex_program const& prog, - size_type maxsplit = -1, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + size_type maxsplit = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Splits strings elements into a table of strings columns using a @@ -138,9 +139,9 @@ std::unique_ptr
split_re( std::unique_ptr
rsplit_re( strings_column_view const& input, regex_program const& prog, - size_type maxsplit = -1, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + size_type maxsplit = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Splits strings elements into a list column of strings @@ -196,9 +197,9 @@ std::unique_ptr
rsplit_re( std::unique_ptr split_record_re( strings_column_view const& input, regex_program const& prog, - size_type maxsplit = -1, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + size_type maxsplit = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Splits strings elements into a list column of strings using the given @@ -256,9 +257,9 @@ std::unique_ptr split_record_re( std::unique_ptr rsplit_record_re( strings_column_view const& input, regex_program const& prog, - size_type maxsplit = -1, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + size_type maxsplit = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/strings_column_view.hpp b/cpp/include/cudf/strings/strings_column_view.hpp index 1156f0a5b73..1e9e73cef4c 100644 --- a/cpp/include/cudf/strings/strings_column_view.hpp +++ b/cpp/include/cudf/strings/strings_column_view.hpp @@ -85,28 +85,6 @@ class strings_column_view : private column_view { */ [[nodiscard]] column_view offsets() const; - /** - * @brief Return an iterator for the offsets child column. - * - * @deprecated Since 24.04 - * - * This automatically applies the offset of the parent. - * - * @return Iterator pointing to the first offset value. - */ - [[deprecated]] offset_iterator offsets_begin() const; - - /** - * @brief Return an end iterator for the offsets child column. - * - * @deprecated Since 24.04 - * - * This automatically applies the offset of the parent. - * - * @return Iterator pointing 1 past the last offset value. - */ - [[deprecated]] offset_iterator offsets_end() const; - /** * @brief Returns the number of bytes in the chars child column. * diff --git a/cpp/include/cudf/strings/strip.hpp b/cpp/include/cudf/strings/strip.hpp index 556d6805ac3..6fb9bbc45e6 100644 --- a/cpp/include/cudf/strings/strip.hpp +++ b/cpp/include/cudf/strings/strip.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -63,10 +64,10 @@ namespace strings { */ std::unique_ptr strip( strings_column_view const& input, - side_type side = side_type::BOTH, - string_scalar const& to_strip = string_scalar(""), - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + side_type side = side_type::BOTH, + string_scalar const& to_strip = string_scalar(""), + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/translate.hpp b/cpp/include/cudf/strings/translate.hpp index 4bd09352b09..9cd6b7d5974 100644 --- a/cpp/include/cudf/strings/translate.hpp +++ b/cpp/include/cudf/strings/translate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include @@ -56,8 +57,8 @@ namespace strings { std::unique_ptr translate( strings_column_view const& input, std::vector> const& chars_table, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Removes or keeps the specified character ranges in cudf::strings::filter_characters @@ -101,10 +102,10 @@ enum class filter_type : bool { std::unique_ptr filter_characters( strings_column_view const& input, std::vector> characters_to_filter, - filter_type keep_characters = filter_type::KEEP, - string_scalar const& replacement = string_scalar(""), - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + filter_type keep_characters = filter_type::KEEP, + string_scalar const& replacement = string_scalar(""), + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/wrap.hpp b/cpp/include/cudf/strings/wrap.hpp index efdc3e62aff..c05c33fbac8 100644 --- a/cpp/include/cudf/strings/wrap.hpp +++ b/cpp/include/cudf/strings/wrap.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -66,8 +67,8 @@ namespace strings { std::unique_ptr wrap( strings_column_view const& input, size_type width, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/structs/detail/concatenate.hpp b/cpp/include/cudf/structs/detail/concatenate.hpp index 82ccca188e2..5dc3169c0c4 100644 --- a/cpp/include/cudf/structs/detail/concatenate.hpp +++ b/cpp/include/cudf/structs/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,8 @@ #include #include +#include + namespace cudf { namespace structs { namespace detail { @@ -50,7 +52,7 @@ namespace detail { */ std::unique_ptr concatenate(host_span columns, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace structs diff --git a/cpp/include/cudf/structs/detail/scan.hpp b/cpp/include/cudf/structs/detail/scan.hpp index 531e0a6c65f..c97a8452ecd 100644 --- a/cpp/include/cudf/structs/detail/scan.hpp +++ b/cpp/include/cudf/structs/detail/scan.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { namespace structs { @@ -38,7 +39,7 @@ namespace detail { template std::unique_ptr scan_inclusive(column_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace structs diff --git a/cpp/include/cudf/table/table.hpp b/cpp/include/cudf/table/table.hpp index 439b02c2d53..8efe6eb8c72 100644 --- a/cpp/include/cudf/table/table.hpp +++ b/cpp/include/cudf/table/table.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -56,8 +57,8 @@ class table { * @param mr Device memory resource to use for all device memory allocations */ explicit table(table const& other, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Moves the contents from a vector of `unique_ptr`s to columns to * construct a new table. @@ -75,8 +76,8 @@ class table { * @param mr Device memory resource used for allocating the device memory for the new columns */ table(table_view view, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the number of columns in the table diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp index 4f3b23747e6..ad12b1eef4e 100644 --- a/cpp/include/cudf/table/table_view.hpp +++ b/cpp/include/cudf/table/table_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -339,15 +339,6 @@ bool has_nested_nullable_columns(table_view const& input); */ std::vector get_nullable_columns(table_view const& table); -/** - * @brief Checks if two `table_view`s have columns of same types - * - * @param lhs left-side table_view operand - * @param rhs right-side table_view operand - * @return boolean comparison result - */ -bool have_same_types(table_view const& lhs, table_view const& rhs); - /** * @brief Copy column_views from a table_view into another table_view according to * a column indices map. diff --git a/cpp/include/cudf/timezone.hpp b/cpp/include/cudf/timezone.hpp index 56678c73811..7f65128526e 100644 --- a/cpp/include/cudf/timezone.hpp +++ b/cpp/include/cudf/timezone.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -49,6 +50,6 @@ static constexpr uint32_t solar_cycle_entry_count = 2 * solar_cycle_years; std::unique_ptr
make_timezone_transition_table( std::optional tzif_dir, std::string_view timezone_name, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); } // namespace cudf diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index 49ec3d7c0d5..7bb9fb7a42e 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -20,6 +20,7 @@ #include #include +#include #include @@ -54,7 +55,7 @@ std::unique_ptr transform( std::string const& unary_udf, data_type output_type, bool is_ptx, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Creates a null_mask from `input` by converting `NaN` to null and @@ -69,7 +70,7 @@ std::unique_ptr transform( */ std::pair, size_type> nans_to_nulls( column_view const& input, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Compute a new column by evaluating an expression tree on a table. @@ -87,7 +88,7 @@ std::pair, size_type> nans_to_nulls( std::unique_ptr compute_column( table_view const& table, ast::expression const& expr, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Creates a bitmask from a column of boolean elements. @@ -106,7 +107,7 @@ std::unique_ptr compute_column( */ std::pair, cudf::size_type> bools_to_mask( column_view const& input, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Encode the rows of the given table as integers @@ -134,7 +135,7 @@ std::pair, cudf::size_type> bools_to_mask( */ std::pair, std::unique_ptr> encode( cudf::table_view const& input, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Encodes `input` by generating a new column for each value in `categories` indicating the @@ -166,7 +167,7 @@ std::pair, std::unique_ptr> encode( std::pair, table_view> one_hot_encode( column_view const& input, column_view const& categories, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Creates a boolean column from given bitmask. @@ -193,7 +194,7 @@ std::unique_ptr mask_to_bools( bitmask_type const* bitmask, size_type begin_bit, size_type end_bit, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns an approximate cumulative size in bits of all columns in the `table_view` for @@ -221,8 +222,7 @@ std::unique_ptr mask_to_bools( * @return A 32-bit integer column containing the per-row bit counts */ std::unique_ptr row_bit_count( - table_view const& t, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + table_view const& t, rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns an approximate cumulative size in bits of all columns in the `table_view` for @@ -245,7 +245,7 @@ std::unique_ptr row_bit_count( std::unique_ptr segmented_row_bit_count( table_view const& t, size_type segment_length, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/transpose.hpp b/cpp/include/cudf/transpose.hpp index e5d083ae7b3..c01a04afe87 100644 --- a/cpp/include/cudf/transpose.hpp +++ b/cpp/include/cudf/transpose.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include namespace cudf { /** @@ -44,7 +45,7 @@ namespace cudf { */ std::pair, table_view> transpose( table_view const& input, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/unary.hpp b/cpp/include/cudf/unary.hpp index 64e802d88dd..74c8bc67d3a 100644 --- a/cpp/include/cudf/unary.hpp +++ b/cpp/include/cudf/unary.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,10 +16,13 @@ #pragma once +#include #include #include +#include #include +#include #include @@ -31,6 +34,77 @@ namespace cudf { * @brief Column APIs for unary ops */ +/** + * @brief Convert a floating-point value to fixed point + * + * @note This conversion was moved from fixed-point member functions to free functions. + * This is so that the complex conversion code is not included into many parts of the + * code base that don't need it, and so that it's more obvious to pinpoint where these + * conversions are occurring. + * + * @tparam Fixed The fixed-point type to convert to + * @tparam Floating The floating-point type to convert from + * @param floating The floating-point value to convert + * @param scale The desired scale of the fixed-point value + * @return The converted fixed-point value + */ +template () && + cuda::std::is_floating_point_v>* = nullptr> +CUDF_HOST_DEVICE Fixed convert_floating_to_fixed(Floating floating, numeric::scale_type scale) +{ + using Rep = typename Fixed::rep; + auto const shifted = numeric::detail::shift(floating, scale); + numeric::scaled_integer scaled{static_cast(shifted), scale}; + return Fixed(scaled); +} + +/** + * @brief Convert a fixed-point value to floating point + * + * @note This conversion was moved from fixed-point member functions to free functions. + * This is so that the complex conversion code is not included into many parts of the + * code base that don't need it, and so that it's more obvious to pinpoint where these + * conversions are occurring. + * + * @tparam Floating The floating-point type to convert to + * @tparam Fixed The fixed-point type to convert from + * @param fixed The fixed-point value to convert + * @return The converted floating-point value + */ +template && + is_fixed_point()>* = nullptr> +CUDF_HOST_DEVICE Floating convert_fixed_to_floating(Fixed fixed) +{ + using Rep = typename Fixed::rep; + auto const casted = static_cast(fixed.value()); + auto const scale = numeric::scale_type{-fixed.scale()}; + return numeric::detail::shift(casted, scale); +} + +/** + * @brief Convert a value to floating point + * + * @tparam Floating The floating-point type to convert to + * @tparam Input The input type to convert from + * @param input The input value to convert + * @return The converted floating-point value + */ +template >* = nullptr> +CUDF_HOST_DEVICE Floating convert_to_floating(Input input) +{ + if constexpr (is_fixed_point()) { + return convert_fixed_to_floating(input); + } else { + return static_cast(input); + } +} + /** * @brief Types of unary operations that can be performed on data. */ @@ -74,8 +148,8 @@ enum class unary_operator : int32_t { std::unique_ptr unary_operation( cudf::column_view const& input, cudf::unary_operator op, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Creates a column of `type_id::BOOL8` elements where for every element in `input` `true` @@ -90,8 +164,8 @@ std::unique_ptr unary_operation( */ std::unique_ptr is_null( cudf::column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Creates a column of `type_id::BOOL8` elements where for every element in `input` `true` @@ -106,8 +180,8 @@ std::unique_ptr is_null( */ std::unique_ptr is_valid( cudf::column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Casts data from dtype specified in input to dtype specified in output. @@ -125,8 +199,8 @@ std::unique_ptr is_valid( std::unique_ptr cast( column_view const& input, data_type out_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Creates a column of `type_id::BOOL8` elements indicating the presence of `NaN` values @@ -143,8 +217,8 @@ std::unique_ptr cast( */ std::unique_ptr is_nan( cudf::column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Creates a column of `type_id::BOOL8` elements indicating the absence of `NaN` values @@ -162,8 +236,8 @@ std::unique_ptr is_nan( */ std::unique_ptr is_not_nan( cudf::column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/io/utilities/thread_pool.hpp b/cpp/include/cudf/utilities/thread_pool.hpp similarity index 100% rename from cpp/src/io/utilities/thread_pool.hpp rename to cpp/include/cudf/utilities/thread_pool.hpp diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index 2dda0740b96..d191e44228a 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -397,7 +397,10 @@ template constexpr inline bool is_fixed_point() { return std::is_same_v || std::is_same_v || - std::is_same_v; + std::is_same_v || + std::is_same_v, T> || + std::is_same_v, T> || + std::is_same_v, T>; } /** diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index b925fc8ae92..fd3b0581c11 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,11 +16,16 @@ #pragma once #include +#include + +#include namespace cudf { /** - * @brief Compares the type of two `column_view`s + * @brief Compare the types of two `column_view`s + * + * @deprecated Since 24.06. Use cudf::have_same_types instead. * * This function returns true if the type of `lhs` equals that of `rhs`. * - For fixed point types, the scale is compared. @@ -34,10 +39,11 @@ namespace cudf { * @param rhs The second `column_view` to compare * @return true if column types match */ -bool column_types_equal(column_view const& lhs, column_view const& rhs); +[[deprecated]] bool column_types_equal(column_view const& lhs, column_view const& rhs); /** * @brief Compare the type IDs of two `column_view`s + * * This function returns true if the type of `lhs` equals that of `rhs`. * - For fixed point types, the scale is ignored. * @@ -47,4 +53,98 @@ bool column_types_equal(column_view const& lhs, column_view const& rhs); */ bool column_types_equivalent(column_view const& lhs, column_view const& rhs); +/** + * @brief Compares the type of two `column_view`s + * + * This function returns true if the type of `lhs` equals that of `rhs`. + * - For fixed point types, the scale is compared. + * - For dictionary types, the type of the keys are compared if both are + * non-empty columns. + * - For lists types, the type of child columns are compared recursively. + * - For struct types, the type of each field are compared in order. + * - For all other types, the `id` of `data_type` is compared. + * + * @param lhs The first `column_view` to compare + * @param rhs The second `column_view` to compare + * @return true if types match + */ +bool have_same_types(column_view const& lhs, column_view const& rhs); + +/** + * @brief Compare the types of a `column_view` and a `scalar` + * + * This function returns true if the type of `lhs` equals that of `rhs`. + * - For fixed point types, the scale is compared. + * - For dictionary column types, the type of the keys is compared to the + * scalar type. + * - For lists types, the types of child columns are compared recursively. + * - For struct types, the types of each field are compared in order. + * - For all other types, the `id` of `data_type` is compared. + * + * @param lhs The `column_view` to compare + * @param rhs The `scalar` to compare + * @return true if types match + */ +bool have_same_types(column_view const& lhs, scalar const& rhs); + +/** + * @brief Compare the types of a `scalar` and a `column_view` + * + * This function returns true if the type of `lhs` equals that of `rhs`. + * - For fixed point types, the scale is compared. + * - For dictionary column types, the type of the keys is compared to the + * scalar type. + * - For lists types, the types of child columns are compared recursively. + * - For struct types, the types of each field are compared in order. + * - For all other types, the `id` of `data_type` is compared. + * + * @param lhs The `scalar` to compare + * @param rhs The `column_view` to compare + * @return true if types match + */ +bool have_same_types(scalar const& lhs, column_view const& rhs); + +/** + * @brief Compare the types of two `scalar`s + * + * This function returns true if the type of `lhs` equals that of `rhs`. + * - For fixed point types, the scale is compared. + * - For lists types, the types of child columns are compared recursively. + * - For struct types, the types of each field are compared in order. + * - For all other types, the `id` of `data_type` is compared. + * + * @param lhs The first `scalar` to compare + * @param rhs The second `scalar` to compare + * @return true if types match + */ +bool have_same_types(scalar const& lhs, scalar const& rhs); + +/** + * @brief Checks if two `table_view`s have columns of same types + * + * @param lhs left-side table_view operand + * @param rhs right-side table_view operand + * @return boolean comparison result + */ +bool have_same_types(table_view const& lhs, table_view const& rhs); + +/** + * @brief Compare the types of a range of `column_view` or `scalar` objects + * + * This function returns true if all objects in the range have the same type, in the sense of + * cudf::have_same_types. + * + * @tparam ForwardIt Forward iterator + * @param first The first iterator + * @param last The last iterator + * @return true if all types match + */ +template +inline bool all_have_same_types(ForwardIt first, ForwardIt last) +{ + return first == last || std::all_of(std::next(first), last, [want = *first](auto const& c) { + return cudf::have_same_types(want, c); + }); +} + } // namespace cudf diff --git a/cpp/include/cudf_test/base_fixture.hpp b/cpp/include/cudf_test/base_fixture.hpp index 14b94e061ae..18f75bbc842 100644 --- a/cpp/include/cudf_test/base_fixture.hpp +++ b/cpp/include/cudf_test/base_fixture.hpp @@ -23,6 +23,7 @@ #include #include +#include namespace cudf { namespace test { @@ -36,7 +37,7 @@ namespace test { * ``` */ class BaseFixture : public ::testing::Test { - rmm::mr::device_memory_resource* _mr{rmm::mr::get_current_device_resource()}; + rmm::device_async_resource_ref _mr{rmm::mr::get_current_device_resource()}; public: /** @@ -44,7 +45,7 @@ class BaseFixture : public ::testing::Test { * all tests inheriting from this fixture * @return pointer to memory resource */ - rmm::mr::device_memory_resource* mr() { return _mr; } + rmm::device_async_resource_ref mr() { return _mr; } }; /** @@ -57,7 +58,7 @@ class BaseFixture : public ::testing::Test { */ template class BaseFixtureWithParam : public ::testing::TestWithParam { - rmm::mr::device_memory_resource* _mr{rmm::mr::get_current_device_resource()}; + rmm::device_async_resource_ref _mr{rmm::mr::get_current_device_resource()}; public: /** @@ -65,7 +66,7 @@ class BaseFixtureWithParam : public ::testing::TestWithParam { * all tests inheriting from this fixture * @return pointer to memory resource */ - rmm::mr::device_memory_resource* mr() const { return _mr; } + rmm::device_async_resource_ref mr() const { return _mr; } }; /** diff --git a/cpp/include/cudf_test/column_utilities.hpp b/cpp/include/cudf_test/column_utilities.hpp index a8957473175..c83599a8072 100644 --- a/cpp/include/cudf_test/column_utilities.hpp +++ b/cpp/include/cudf_test/column_utilities.hpp @@ -210,6 +210,29 @@ template <> std::pair, std::vector> to_host(column_view c); //! @endcond +/** + * @brief For enabling large strings testing in specific tests + */ +struct large_strings_enabler { + /** + * @brief Create large strings enable object + * + * @param default_enable Default enables large strings support + */ + large_strings_enabler(bool default_enable = true); + ~large_strings_enabler(); + + /** + * @brief Enable large strings support + */ + void enable(); + + /** + * @brief Disable large strings support + */ + void disable(); +}; + } // namespace cudf::test // Macros for showing line of failure. @@ -242,3 +265,5 @@ std::pair, std::vector> to_host(c SCOPED_TRACE(" <-- line of failure\n"); \ cudf::test::detail::expect_equal_buffers(lhs, rhs, size_bytes); \ } while (0) + +#define CUDF_TEST_ENABLE_LARGE_STRINGS() cudf::test::large_strings_enabler ls___ diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index 151fe50be4f..dc873658abf 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -17,7 +17,6 @@ #pragma once #include -#include #include #include diff --git a/cpp/include/cudf_test/cudf_gtest.hpp b/cpp/include/cudf_test/cudf_gtest.hpp index fa76204d622..89394fbd1c3 100644 --- a/cpp/include/cudf_test/cudf_gtest.hpp +++ b/cpp/include/cudf_test/cudf_gtest.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,86 +16,6 @@ #pragma once -#ifdef GTEST_INCLUDE_GTEST_GTEST_H_ -#error "Don't include gtest/gtest.h directly, include cudf_gtest.hpp instead" -#endif - -/** - * @file cudf_gtest.hpp - * @brief Work around for GTests( <=v1.10 ) emulation of variadic templates in - * @verbatim ::Testing::Types @endverbatim - * - * @note Instead of including `gtest/gtest.h`, all libcudf test files should - * include `cudf_gtest.hpp` instead. - * - * Removes the 50 type limit in a type-parameterized test list. - * - * Uses macros to rename GTests's emulated variadic template types and then - * redefines them properly. - */ - -// @cond -#if __has_include() -// gtest doesn't provide a version header so we need to -// use a file existence trick. -// gtest-type-util.h.pump only exists in versions < 1.11 -#define Types Types_NOT_USED -#define Types0 Types0_NOT_USED -#define TypeList TypeList_NOT_USED -#define Templates Templates_NOT_USED -#define Templates0 Templates0_NOT_USED -#include -#undef Types -#undef Types0 -#undef TypeList -#undef Templates -#undef Templates0 - -namespace testing { -template -struct Types { - using type = Types; -}; - -template -struct Types { - using Head = T; - using Tail = Types; - - using type = Types; -}; - -namespace internal { -using Types0 = Types<>; - -template -struct Templates {}; - -template -struct Templates { - using Head = internal::TemplateSel; - using Tail = Templates; - - using type = Templates; -}; - -using Templates0 = Templates<>; - -template -struct TypeList { - using type = Types; -}; - -template -struct TypeList> { - using type = Types; -}; - -} // namespace internal -} // namespace testing -#endif // gtest < 1.11 -// @endcond - #include #include diff --git a/cpp/include/cudf_test/testing_main.hpp b/cpp/include/cudf_test/testing_main.hpp index 88e3088d794..66b831b917f 100644 --- a/cpp/include/cudf_test/testing_main.hpp +++ b/cpp/include/cudf_test/testing_main.hpp @@ -145,6 +145,51 @@ inline auto parse_cudf_test_opts(int argc, char** argv) } } +/** + * @brief Sets up stream mode memory resource adaptor + * + * The resource adaptor is only set as the current device resource if the + * stream mode is enabled. + * + * The caller must keep the return object alive for the life of the test runs. + * + * @param cmd_opts Command line options returned by parse_cudf_test_opts + * @return Memory resource adaptor + */ +inline auto make_memory_resource_adaptor(cxxopts::ParseResult const& cmd_opts) +{ + auto const rmm_mode = cmd_opts["rmm_mode"].as(); + auto resource = cudf::test::create_memory_resource(rmm_mode); + rmm::mr::set_current_device_resource(resource.get()); + return resource; +} + +/** + * @brief Sets up stream mode memory resource adaptor + * + * The resource adaptor is only set as the current device resource if the + * stream mode is enabled. + * + * The caller must keep the return object alive for the life of the test runs. + * + * @param cmd_opts Command line options returned by parse_cudf_test_opts + * @return Memory resource adaptor + */ +inline auto make_stream_mode_adaptor(cxxopts::ParseResult const& cmd_opts) +{ + auto resource = rmm::mr::get_current_device_resource(); + auto const stream_mode = cmd_opts["stream_mode"].as(); + auto const stream_error_mode = cmd_opts["stream_error_mode"].as(); + auto const error_on_invalid_stream = (stream_error_mode == "error"); + auto const check_default_stream = (stream_mode == "new_cudf_default"); + auto adaptor = + make_stream_checking_resource_adaptor(resource, error_on_invalid_stream, check_default_stream); + if ((stream_mode == "new_cudf_default") || (stream_mode == "new_testing_default")) { + rmm::mr::set_current_device_resource(&adaptor); + } + return adaptor; +} + /** * @brief Macro that defines main function for gtest programs that use rmm * @@ -155,25 +200,12 @@ inline auto parse_cudf_test_opts(int argc, char** argv) * function parses the command line to customize test behavior, like the * allocation mode used for creating the default memory resource. */ -#define CUDF_TEST_PROGRAM_MAIN() \ - int main(int argc, char** argv) \ - { \ - ::testing::InitGoogleTest(&argc, argv); \ - auto const cmd_opts = parse_cudf_test_opts(argc, argv); \ - auto const rmm_mode = cmd_opts["rmm_mode"].as(); \ - auto resource = cudf::test::create_memory_resource(rmm_mode); \ - rmm::mr::set_current_device_resource(resource.get()); \ - \ - auto const stream_mode = cmd_opts["stream_mode"].as(); \ - if ((stream_mode == "new_cudf_default") || (stream_mode == "new_testing_default")) { \ - auto const stream_error_mode = cmd_opts["stream_error_mode"].as(); \ - auto const error_on_invalid_stream = (stream_error_mode == "error"); \ - auto const check_default_stream = (stream_mode == "new_cudf_default"); \ - auto adaptor = make_stream_checking_resource_adaptor( \ - resource.get(), error_on_invalid_stream, check_default_stream); \ - rmm::mr::set_current_device_resource(&adaptor); \ - return RUN_ALL_TESTS(); \ - } \ - \ - return RUN_ALL_TESTS(); \ +#define CUDF_TEST_PROGRAM_MAIN() \ + int main(int argc, char** argv) \ + { \ + ::testing::InitGoogleTest(&argc, argv); \ + auto const cmd_opts = parse_cudf_test_opts(argc, argv); \ + [[maybe_unused]] auto mr = make_memory_resource_adaptor(cmd_opts); \ + [[maybe_unused]] auto adaptor = make_stream_mode_adaptor(cmd_opts); \ + return RUN_ALL_TESTS(); \ } diff --git a/cpp/include/nvtext/byte_pair_encoding.hpp b/cpp/include/nvtext/byte_pair_encoding.hpp index 4d6d8335eac..375d44e367a 100644 --- a/cpp/include/nvtext/byte_pair_encoding.hpp +++ b/cpp/include/nvtext/byte_pair_encoding.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,8 @@ #include #include +#include + namespace nvtext { /** @@ -45,8 +47,8 @@ struct bpe_merge_pairs { * @param mr Device memory resource used to allocate the device memory */ bpe_merge_pairs(std::unique_ptr&& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new bpe merge pairs object @@ -56,8 +58,8 @@ struct bpe_merge_pairs { * @param mr Device memory resource used to allocate the device memory */ bpe_merge_pairs(cudf::strings_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); ~bpe_merge_pairs(); bpe_merge_pairs(); @@ -94,8 +96,8 @@ struct bpe_merge_pairs { */ std::unique_ptr load_merge_pairs( cudf::strings_column_view const& merge_pairs, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Byte pair encode the input strings. @@ -127,7 +129,7 @@ std::unique_ptr byte_pair_encoding( cudf::strings_column_view const& input, bpe_merge_pairs const& merges_pairs, cudf::string_scalar const& separator = cudf::string_scalar(" "), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace nvtext diff --git a/cpp/include/nvtext/detail/generate_ngrams.hpp b/cpp/include/nvtext/detail/generate_ngrams.hpp index 835124141d4..c4b89b6d495 100644 --- a/cpp/include/nvtext/detail/generate_ngrams.hpp +++ b/cpp/include/nvtext/detail/generate_ngrams.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,20 +18,21 @@ #include #include +#include namespace nvtext { namespace detail { /** * @copydoc hash_character_ngrams(cudf::strings_column_view const&, - * cudf::size_type, rmm::mr::device_memory_resource*) + * cudf::size_type, rmm::device_async_resource_ref) * * @param stream CUDA stream used for allocating/copying device memory and launching kernels */ std::unique_ptr hash_character_ngrams(cudf::strings_column_view const& strings, cudf::size_type ngrams, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace nvtext diff --git a/cpp/include/nvtext/detail/load_hash_file.hpp b/cpp/include/nvtext/detail/load_hash_file.hpp index f4107adb07e..0c27981f80b 100644 --- a/cpp/include/nvtext/detail/load_hash_file.hpp +++ b/cpp/include/nvtext/detail/load_hash_file.hpp @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -43,7 +44,7 @@ namespace detail { std::unique_ptr load_vocabulary_file( std::string const& filename_hashed_vocabulary, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace nvtext diff --git a/cpp/include/nvtext/detail/tokenize.hpp b/cpp/include/nvtext/detail/tokenize.hpp index 80a6edc496b..d48027e4631 100644 --- a/cpp/include/nvtext/detail/tokenize.hpp +++ b/cpp/include/nvtext/detail/tokenize.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,52 +21,53 @@ #include #include +#include namespace nvtext { namespace detail { /** * @copydoc nvtext::tokenize(strings_column_view const&,string_scalar - * const&,rmm::mr::device_memory_resource*) + * const&,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches */ std::unique_ptr tokenize(cudf::strings_column_view const& strings, cudf::string_scalar const& delimiter, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc nvtext::tokenize(strings_column_view const&,strings_column_view - * const&,rmm::mr::device_memory_resource*) + * const&,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches */ std::unique_ptr tokenize(cudf::strings_column_view const& strings, cudf::strings_column_view const& delimiters, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc nvtext::count_tokens(strings_column_view const&, string_scalar - * const&,rmm::mr::device_memory_resource*) + * const&,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches */ std::unique_ptr count_tokens(cudf::strings_column_view const& strings, cudf::string_scalar const& delimiter, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc nvtext::count_tokens(strings_column_view const&,strings_column_view - * const&,rmm::mr::device_memory_resource*) + * const&,rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches */ std::unique_ptr count_tokens(cudf::strings_column_view const& strings, cudf::strings_column_view const& delimiters, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace nvtext diff --git a/cpp/include/nvtext/edit_distance.hpp b/cpp/include/nvtext/edit_distance.hpp index 9a24662455b..bfdfb4d1a1c 100644 --- a/cpp/include/nvtext/edit_distance.hpp +++ b/cpp/include/nvtext/edit_distance.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,8 @@ #include #include +#include + //! NVText APIs namespace nvtext { /** @@ -60,8 +62,8 @@ namespace nvtext { std::unique_ptr edit_distance( cudf::strings_column_view const& input, cudf::strings_column_view const& targets, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Compute the edit distance between all the strings in the input column. @@ -98,8 +100,8 @@ std::unique_ptr edit_distance( */ std::unique_ptr edit_distance_matrix( cudf::strings_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace nvtext diff --git a/cpp/include/nvtext/generate_ngrams.hpp b/cpp/include/nvtext/generate_ngrams.hpp index 46f2c0e7bc9..bebe2e46023 100644 --- a/cpp/include/nvtext/generate_ngrams.hpp +++ b/cpp/include/nvtext/generate_ngrams.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,8 @@ #include #include +#include + namespace nvtext { /** * @addtogroup nvtext_ngrams @@ -58,25 +60,23 @@ std::unique_ptr generate_ngrams( cudf::strings_column_view const& input, cudf::size_type ngrams, cudf::string_scalar const& separator, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** - * @brief Generates ngrams of characters within each string. + * @brief Generates ngrams of characters within each string * - * Each character of a string used to build ngrams. + * Each character of a string is used to build ngrams for the output row. * Ngrams are not created across strings. * * ``` - * ["ab", "cde", "fgh"] would generate bigrams as ["ab", "cd", "de", "fg", "gh"] + * ["ab", "cde", "fgh"] would generate bigrams as + * [["ab"], ["cd", "de"], ["fg", "gh"]] * ``` * - * The size of the output column will be the total number of ngrams generated from - * the input strings column. + * All null row entries are ignored and the corresponding output row will be empty. * - * All null row entries are ignored and the output contains all valid rows. - * - * @throw cudf::logic_error if `ngrams < 2` + * @throw std::invalid_argument if `ngrams < 2` * @throw cudf::logic_error if there are not enough characters to generate any ngrams * * @param input Strings column to produce ngrams from @@ -84,13 +84,13 @@ std::unique_ptr generate_ngrams( * Default is 2 = bigram. * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory - * @return New strings columns of tokens + * @return Lists column of strings */ std::unique_ptr generate_character_ngrams( cudf::strings_column_view const& input, - cudf::size_type ngrams = 2, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + cudf::size_type ngrams = 2, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Hashes ngrams of characters within each string @@ -123,9 +123,9 @@ std::unique_ptr generate_character_ngrams( */ std::unique_ptr hash_character_ngrams( cudf::strings_column_view const& input, - cudf::size_type ngrams = 5, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + cudf::size_type ngrams = 5, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace nvtext diff --git a/cpp/include/nvtext/jaccard.hpp b/cpp/include/nvtext/jaccard.hpp index 19d6c111200..649c17f0b1c 100644 --- a/cpp/include/nvtext/jaccard.hpp +++ b/cpp/include/nvtext/jaccard.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,8 @@ #include #include +#include + namespace nvtext { /** * @addtogroup nvtext_jaccard @@ -72,8 +74,8 @@ std::unique_ptr jaccard_index( cudf::strings_column_view const& input1, cudf::strings_column_view const& input2, cudf::size_type width, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace nvtext diff --git a/cpp/include/nvtext/minhash.hpp b/cpp/include/nvtext/minhash.hpp index 47c625b5079..7d3f6059454 100644 --- a/cpp/include/nvtext/minhash.hpp +++ b/cpp/include/nvtext/minhash.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,8 @@ #include #include +#include + namespace nvtext { /** * @addtogroup nvtext_minhash @@ -53,7 +55,7 @@ std::unique_ptr minhash( cudf::numeric_scalar seed = 0, cudf::size_type width = 4, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the minhash values for each string per seed @@ -83,9 +85,9 @@ std::unique_ptr minhash( std::unique_ptr minhash( cudf::strings_column_view const& input, cudf::device_span seeds, - cudf::size_type width = 4, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + cudf::size_type width = 4, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the minhash value for each string @@ -114,7 +116,7 @@ std::unique_ptr minhash64( cudf::numeric_scalar seed = 0, cudf::size_type width = 4, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the minhash values for each string per seed @@ -144,9 +146,9 @@ std::unique_ptr minhash64( std::unique_ptr minhash64( cudf::strings_column_view const& input, cudf::device_span seeds, - cudf::size_type width = 4, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + cudf::size_type width = 4, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace nvtext diff --git a/cpp/include/nvtext/ngrams_tokenize.hpp b/cpp/include/nvtext/ngrams_tokenize.hpp index 9d76ef8689f..09ce323a7ae 100644 --- a/cpp/include/nvtext/ngrams_tokenize.hpp +++ b/cpp/include/nvtext/ngrams_tokenize.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,8 @@ #include #include +#include + namespace nvtext { /** * @addtogroup nvtext_ngrams @@ -80,8 +82,8 @@ std::unique_ptr ngrams_tokenize( cudf::size_type ngrams, cudf::string_scalar const& delimiter, cudf::string_scalar const& separator, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace nvtext diff --git a/cpp/include/nvtext/normalize.hpp b/cpp/include/nvtext/normalize.hpp index 3cbff5c744b..e5967e78318 100644 --- a/cpp/include/nvtext/normalize.hpp +++ b/cpp/include/nvtext/normalize.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,8 @@ #include #include +#include + //! NVText APIs namespace nvtext { /** @@ -51,8 +53,8 @@ namespace nvtext { */ std::unique_ptr normalize_spaces( cudf::strings_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Normalizes strings characters for tokenizing. @@ -102,8 +104,8 @@ std::unique_ptr normalize_spaces( std::unique_ptr normalize_characters( cudf::strings_column_view const& input, bool do_lower_case, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace nvtext diff --git a/cpp/include/nvtext/replace.hpp b/cpp/include/nvtext/replace.hpp index 88cf7d41901..aac21346c72 100644 --- a/cpp/include/nvtext/replace.hpp +++ b/cpp/include/nvtext/replace.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,8 @@ #include #include +#include + //! NVText APIs namespace nvtext { /** @@ -88,7 +90,7 @@ std::unique_ptr replace_tokens( cudf::strings_column_view const& replacements, cudf::string_scalar const& delimiter = cudf::string_scalar{""}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Removes tokens whose lengths are less than a specified number of characters. @@ -137,7 +139,7 @@ std::unique_ptr filter_tokens( cudf::string_scalar const& replacement = cudf::string_scalar{""}, cudf::string_scalar const& delimiter = cudf::string_scalar{""}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace nvtext diff --git a/cpp/include/nvtext/stemmer.hpp b/cpp/include/nvtext/stemmer.hpp index 0e1759fdc5a..20b81aba661 100644 --- a/cpp/include/nvtext/stemmer.hpp +++ b/cpp/include/nvtext/stemmer.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,8 @@ #include #include +#include + namespace nvtext { /** * @addtogroup nvtext_stemmer @@ -79,8 +81,8 @@ std::unique_ptr is_letter( cudf::strings_column_view const& input, letter_type ltype, cudf::size_type character_index, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns boolean column indicating if character at `indices[i]` of `input[i]` @@ -132,8 +134,8 @@ std::unique_ptr is_letter( cudf::strings_column_view const& input, letter_type ltype, cudf::column_view const& indices, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the Porter Stemmer measurements of a strings column. @@ -166,8 +168,8 @@ std::unique_ptr is_letter( */ std::unique_ptr porter_stemmer_measure( cudf::strings_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace nvtext diff --git a/cpp/include/nvtext/subword_tokenize.hpp b/cpp/include/nvtext/subword_tokenize.hpp index 72a899d70b4..a4e06495a1d 100644 --- a/cpp/include/nvtext/subword_tokenize.hpp +++ b/cpp/include/nvtext/subword_tokenize.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,8 @@ #include #include +#include + namespace nvtext { /** @@ -65,7 +67,7 @@ struct hashed_vocabulary { */ std::unique_ptr load_vocabulary_file( std::string const& filename_hashed_vocabulary, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Result object for the subword_tokenize functions. @@ -155,7 +157,7 @@ tokenizer_result subword_tokenize( uint32_t stride, bool do_lower_case, bool do_truncate, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace nvtext diff --git a/cpp/include/nvtext/tokenize.hpp b/cpp/include/nvtext/tokenize.hpp index 107fefcc3bf..ea1b9c716f0 100644 --- a/cpp/include/nvtext/tokenize.hpp +++ b/cpp/include/nvtext/tokenize.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,8 @@ #include #include +#include + namespace nvtext { /** * @addtogroup nvtext_tokenize @@ -60,7 +62,7 @@ std::unique_ptr tokenize( cudf::strings_column_view const& input, cudf::string_scalar const& delimiter = cudf::string_scalar{""}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a single column of strings by tokenizing the input strings @@ -95,8 +97,8 @@ std::unique_ptr tokenize( std::unique_ptr tokenize( cudf::strings_column_view const& input, cudf::strings_column_view const& delimiters, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the number of tokens in each string of a strings column. @@ -127,7 +129,7 @@ std::unique_ptr count_tokens( cudf::strings_column_view const& input, cudf::string_scalar const& delimiter = cudf::string_scalar{""}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the number of tokens in each string of a strings column @@ -158,8 +160,8 @@ std::unique_ptr count_tokens( std::unique_ptr count_tokens( cudf::strings_column_view const& input, cudf::strings_column_view const& delimiters, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns a single column of strings by converting each character to a string. @@ -183,8 +185,8 @@ std::unique_ptr count_tokens( */ std::unique_ptr character_tokenize( cudf::strings_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Creates a strings column from a strings column of tokens and an @@ -225,7 +227,7 @@ std::unique_ptr detokenize( cudf::column_view const& row_indices, cudf::string_scalar const& separator = cudf::string_scalar(" "), rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Vocabulary object to be used with nvtext::tokenize_with_vocabulary @@ -246,8 +248,8 @@ struct tokenize_vocabulary { * @param mr Device memory resource used to allocate the returned column's device memory */ tokenize_vocabulary(cudf::strings_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); ~tokenize_vocabulary(); struct tokenize_vocabulary_impl; @@ -269,8 +271,8 @@ struct tokenize_vocabulary { */ std::unique_ptr load_vocabulary( cudf::strings_column_view const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Returns the token ids for the input string by looking up each delimited @@ -301,9 +303,9 @@ std::unique_ptr tokenize_with_vocabulary( cudf::strings_column_view const& input, tokenize_vocabulary const& vocabulary, cudf::string_scalar const& delimiter, - cudf::size_type default_id = -1, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + cudf::size_type default_id = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of tokenize group } // namespace nvtext diff --git a/cpp/scripts/run-cmake-format.sh b/cpp/scripts/run-cmake-format.sh index f3e21779aa5..603880954a6 100755 --- a/cpp/scripts/run-cmake-format.sh +++ b/cpp/scripts/run-cmake-format.sh @@ -1,6 +1,5 @@ #!/bin/bash - -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # This script is a wrapper for cmakelang that may be used with pre-commit. The # wrapping is necessary because RAPIDS libraries split configuration for @@ -45,6 +44,7 @@ fi DEFAULT_FORMAT_FILE_LOCATIONS=( "${CUDF_BUILD_DIR:-${HOME}}/_deps/rapids-cmake-src/cmake-format-rapids-cmake.json" + "${CUDF_BUILD_DIR:-cpp/build}/latest/_deps/rapids-cmake-src/cmake-format-rapids-cmake.json" "cpp/libcudf_kafka/build/_deps/rapids-cmake-src/cmake-format-rapids-cmake.json" ) diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index be91c3b4d08..ac31f9045fe 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -39,6 +39,7 @@ #include #include +#include #include @@ -56,7 +57,7 @@ std::pair scalar_col_valid_mask_and( column_view const& col, scalar const& s, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (col.is_empty()) return std::pair(rmm::device_buffer{0, stream, mr}, 0); @@ -76,9 +77,9 @@ std::pair scalar_col_valid_mask_and( */ inline bool is_null_dependent(binary_operator op) { - return op == binary_operator::NULL_EQUALS || op == binary_operator::NULL_MIN || - op == binary_operator::NULL_MAX || op == binary_operator::NULL_LOGICAL_AND || - op == binary_operator::NULL_LOGICAL_OR; + return op == binary_operator::NULL_EQUALS || op == binary_operator::NULL_NOT_EQUALS || + op == binary_operator::NULL_MIN || op == binary_operator::NULL_MAX || + op == binary_operator::NULL_LOGICAL_AND || op == binary_operator::NULL_LOGICAL_OR; } /** @@ -108,7 +109,8 @@ bool is_comparison_binop(binary_operator op) op == binary_operator::GREATER or // operator > op == binary_operator::LESS_EQUAL or // operator <= op == binary_operator::GREATER_EQUAL or // operator >= - op == binary_operator::NULL_EQUALS; // 2 null = true; 1 null = false; else == + op == binary_operator::NULL_EQUALS or // 2 null = true; 1 null = false; else == + op == binary_operator::NULL_NOT_EQUALS; // 2 null = false; 1 null = true; else != } /** @@ -179,7 +181,7 @@ void fixed_point_binary_operation_validation(binary_operator op, /** * @copydoc cudf::binary_operation(column_view const&, column_view const&, - * binary_operator, data_type, rmm::mr::device_memory_resource*) + * binary_operator, data_type, rmm::device_async_resource_ref) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -189,7 +191,7 @@ std::unique_ptr binary_operation(LhsType const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if constexpr (std::is_same_v and std::is_same_v) CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes don't match"); @@ -250,7 +252,7 @@ std::unique_ptr make_fixed_width_column_for_output(scalar const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (binops::is_null_dependent(op)) { return make_fixed_width_column(output_type, rhs.size(), mask_state::ALL_VALID, stream, mr); @@ -277,7 +279,7 @@ std::unique_ptr make_fixed_width_column_for_output(column_view const& lh binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (binops::is_null_dependent(op)) { return make_fixed_width_column(output_type, lhs.size(), mask_state::ALL_VALID, stream, mr); @@ -304,7 +306,7 @@ std::unique_ptr make_fixed_width_column_for_output(column_view const& lh binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (binops::is_null_dependent(op)) { return make_fixed_width_column(output_type, rhs.size(), mask_state::ALL_VALID, stream, mr); @@ -320,7 +322,7 @@ std::unique_ptr binary_operation(scalar const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return binops::compiled::binary_operation( lhs, rhs, op, output_type, stream, mr); @@ -330,7 +332,7 @@ std::unique_ptr binary_operation(column_view const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return binops::compiled::binary_operation( lhs, rhs, op, output_type, stream, mr); @@ -340,7 +342,7 @@ std::unique_ptr binary_operation(column_view const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return binops::compiled::binary_operation( lhs, rhs, op, output_type, stream, mr); @@ -351,7 +353,7 @@ std::unique_ptr binary_operation(column_view const& lhs, std::string const& ptx, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // Check for datatype auto is_type_supported_ptx = [](data_type type) -> bool { @@ -405,7 +407,7 @@ std::unique_ptr binary_operation(scalar const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); @@ -415,7 +417,7 @@ std::unique_ptr binary_operation(column_view const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); @@ -425,7 +427,7 @@ std::unique_ptr binary_operation(column_view const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); @@ -436,7 +438,7 @@ std::unique_ptr binary_operation(column_view const& lhs, std::string const& ptx, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::binary_operation(lhs, rhs, ptx, output_type, stream, mr); diff --git a/cpp/src/binaryop/compiled/NullNotEquals.cu b/cpp/src/binaryop/compiled/NullNotEquals.cu new file mode 100644 index 00000000000..34f73cca48a --- /dev/null +++ b/cpp/src/binaryop/compiled/NullNotEquals.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_view&, + column_view const&, + column_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} // namespace cudf::binops::compiled diff --git a/cpp/src/binaryop/compiled/binary_ops.cu b/cpp/src/binaryop/compiled/binary_ops.cu index 1429635b803..ba0253ec853 100644 --- a/cpp/src/binaryop/compiled/binary_ops.cu +++ b/cpp/src/binaryop/compiled/binary_ops.cu @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -50,7 +51,7 @@ struct scalar_as_column_view { template ())> return_type operator()(scalar const& s, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource*) + rmm::device_async_resource_ref) { auto& h_scalar_type_view = static_cast&>(const_cast(s)); auto col_v = column_view(s.type(), @@ -61,7 +62,7 @@ struct scalar_as_column_view { return std::pair{col_v, std::unique_ptr(nullptr)}; } template ())> - return_type operator()(scalar const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + return_type operator()(scalar const&, rmm::cuda_stream_view, rmm::device_async_resource_ref) { CUDF_FAIL("Unsupported type"); } @@ -69,7 +70,7 @@ struct scalar_as_column_view { // specialization for cudf::string_view template <> scalar_as_column_view::return_type scalar_as_column_view::operator()( - scalar const& s, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) + scalar const& s, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { using T = cudf::string_view; auto& h_scalar_type_view = static_cast&>(const_cast(s)); @@ -96,7 +97,7 @@ scalar_as_column_view::return_type scalar_as_column_view::operator() scalar_as_column_view::return_type scalar_as_column_view::operator()( - scalar const& s, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) + scalar const& s, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { auto col = make_column_from_scalar(s, 1, stream, mr); return std::pair{col->view(), std::move(col)}; @@ -114,7 +115,7 @@ scalar_as_column_view::return_type scalar_as_column_view::operator() string_null_min_max(scalar const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // hard-coded to only work with cudf::string_view so we don't explode compile times CUDF_EXPECTS(lhs.type().id() == cudf::type_id::STRING, "Invalid/Unsupported lhs datatype"); @@ -280,7 +281,7 @@ std::unique_ptr string_null_min_max(column_view const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // hard-coded to only work with cudf::string_view so we don't explode compile times CUDF_EXPECTS(lhs.type().id() == cudf::type_id::STRING, "Invalid/Unsupported lhs datatype"); @@ -297,7 +298,7 @@ std::unique_ptr string_null_min_max(column_view const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // hard-coded to only work with cudf::string_view so we don't explode compile times CUDF_EXPECTS(lhs.type().id() == cudf::type_id::STRING, "Invalid/Unsupported lhs datatype"); @@ -355,6 +356,7 @@ case binary_operator::LOG_BASE: apply_binary_op(out, l case binary_operator::ATAN2: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; case binary_operator::PMOD: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; case binary_operator::NULL_EQUALS: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::NULL_NOT_EQUALS: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; case binary_operator::NULL_MAX: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; case binary_operator::NULL_MIN: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; case binary_operator::NULL_LOGICAL_AND: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; @@ -411,8 +413,9 @@ void apply_sorting_struct_binary_op(mutable_column_view& out, // Struct child column type and structure mismatches are caught within the two_table_comparator switch (op) { case binary_operator::EQUAL: [[fallthrough]]; + case binary_operator::NOT_EQUAL: [[fallthrough]]; case binary_operator::NULL_EQUALS: [[fallthrough]]; - case binary_operator::NOT_EQUAL: + case binary_operator::NULL_NOT_EQUALS: detail::apply_struct_equality_op( out, lhs, diff --git a/cpp/src/binaryop/compiled/binary_ops.cuh b/cpp/src/binaryop/compiled/binary_ops.cuh index d605c877d3f..5177e7d4bda 100644 --- a/cpp/src/binaryop/compiled/binary_ops.cuh +++ b/cpp/src/binaryop/compiled/binary_ops.cuh @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -69,13 +70,17 @@ struct typed_casted_writer { if constexpr (mutable_column_device_view::has_element_accessor() and std::is_constructible_v) { col.element(i) = static_cast(val); - } else if constexpr (is_fixed_point() and - (is_fixed_point() or - std::is_constructible_v)) { - if constexpr (is_fixed_point()) - col.data()[i] = val.rescaled(numeric::scale_type{col.type().scale()}).value(); - else - col.data()[i] = Element{val, numeric::scale_type{col.type().scale()}}.value(); + } else if constexpr (is_fixed_point()) { + auto const scale = numeric::scale_type{col.type().scale()}; + if constexpr (is_fixed_point()) { + col.data()[i] = val.rescaled(scale).value(); + } else if constexpr (cuda::std::is_constructible_v) { + col.data()[i] = Element{val, scale}.value(); + } else if constexpr (cuda::std::is_floating_point_v) { + col.data()[i] = convert_floating_to_fixed(val, scale).value(); + } + } else if constexpr (cuda::std::is_floating_point_v and is_fixed_point()) { + col.data()[i] = convert_fixed_to_floating(val); } } }; @@ -104,6 +109,7 @@ struct ops_wrapper { type_dispatcher(rhs.type(), type_casted_accessor{}, i, rhs, is_rhs_scalar); auto result = [&]() { if constexpr (std::is_same_v or + std::is_same_v or std::is_same_v or std::is_same_v or std::is_same_v or diff --git a/cpp/src/binaryop/compiled/binary_ops.hpp b/cpp/src/binaryop/compiled/binary_ops.hpp index 47fd50c5d97..ceeba9cf817 100644 --- a/cpp/src/binaryop/compiled/binary_ops.hpp +++ b/cpp/src/binaryop/compiled/binary_ops.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include @@ -37,21 +38,21 @@ std::unique_ptr string_null_min_max(scalar const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); std::unique_ptr string_null_min_max(column_view const& lhs, scalar const& rhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); std::unique_ptr string_null_min_max(column_view const& lhs, column_view const& rhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Performs a binary operation between a string scalar and a string @@ -77,7 +78,7 @@ std::unique_ptr binary_operation(scalar const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Performs a binary operation between a string column and a string @@ -103,7 +104,7 @@ std::unique_ptr binary_operation(column_view const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Performs a binary operation between two string columns. @@ -128,7 +129,7 @@ std::unique_ptr binary_operation(column_view const& lhs, binary_operator op, data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); void binary_operation(mutable_column_view& out, scalar const& lhs, @@ -193,7 +194,7 @@ void apply_binary_op(mutable_column_view& out, * @brief Deploys single type or double type dispatcher that runs equality operation on each element * of @p lhs and @p rhs columns. * - * Comparison operators are EQUAL, NOT_EQUAL, NULL_EQUALS. + * Comparison operators are EQUAL, NOT_EQUAL, NULL_EQUALS, NULL_NOT_EQUALS. * @p out type is boolean. * * This template is instantiated for each binary operator. diff --git a/cpp/src/binaryop/compiled/operation.cuh b/cpp/src/binaryop/compiled/operation.cuh index 214803dc415..43b4bd232c4 100644 --- a/cpp/src/binaryop/compiled/operation.cuh +++ b/cpp/src/binaryop/compiled/operation.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -422,15 +422,26 @@ struct NullEquals { TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) -> decltype(x == y) { output_valid = true; - if (!lhs_valid && !rhs_valid) return true; if (lhs_valid && rhs_valid) return x == y; - return false; + return !lhs_valid && !rhs_valid; } // To allow std::is_invocable_v = true template __device__ inline auto operator()(TypeLhs x, TypeRhs y) -> decltype(x == y); }; +struct NullNotEquals { + template + __device__ inline auto operator()( + TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) -> decltype(x != y) + { + return !NullEquals{}(x, y, lhs_valid, rhs_valid, output_valid); + } + // To allow std::is_invocable_v = true + template + __device__ inline auto operator()(TypeLhs x, TypeRhs y) -> decltype(x != y); +}; + struct NullMax { template (out); case binary_operator::GREATER_EQUAL: return bool_op(out); case binary_operator::NULL_EQUALS: return bool_op(out); + case binary_operator::NULL_NOT_EQUALS: + return bool_op(out); case binary_operator::NULL_LOGICAL_AND: return bool_op(out); case binary_operator::NULL_LOGICAL_OR: diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 806beeb4efe..d0faeea8336 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -79,7 +80,7 @@ namespace detail { rmm::device_buffer create_null_mask(size_type size, mask_state state, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { size_type mask_size{0}; @@ -157,7 +158,7 @@ void set_null_mask(bitmask_type* bitmask, rmm::device_buffer create_null_mask(size_type size, mask_state state, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return detail::create_null_mask(size, state, stream, mr); } @@ -211,7 +212,7 @@ rmm::device_buffer copy_bitmask(bitmask_type const* mask, size_type begin_bit, size_type end_bit, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS(begin_bit >= 0, "Invalid range."); @@ -235,7 +236,7 @@ rmm::device_buffer copy_bitmask(bitmask_type const* mask, // Create a bitmask from a column view rmm::device_buffer copy_bitmask(column_view const& view, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); rmm::device_buffer null_mask{0, stream, mr}; @@ -268,8 +269,8 @@ CUDF_KERNEL void count_set_bits_kernel(bitmask_type const* bitmask, auto const first_word_index{word_index(first_bit_index)}; auto const last_word_index{word_index(last_bit_index)}; - thread_index_type const tid = grid_1d::global_thread_id(); - thread_index_type const stride = grid_1d::grid_stride(); + thread_index_type const tid = grid_1d::global_thread_id(); + thread_index_type const stride = grid_1d::grid_stride(); thread_index_type thread_word_index = tid + first_word_index; size_type thread_count{0}; @@ -432,7 +433,7 @@ std::pair bitmask_and(host_span begin_bits, size_type mask_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return bitmask_binop( [] __device__(bitmask_type left, bitmask_type right) { return left & right; }, @@ -446,7 +447,7 @@ std::pair bitmask_and(host_span bitmask_and(table_view const& view, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); rmm::device_buffer null_mask{0, stream, mr}; @@ -479,7 +480,7 @@ std::pair bitmask_and(table_view const& view, // Returns the bitwise OR of the null masks of all columns in the table view std::pair bitmask_or(table_view const& view, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); rmm::device_buffer null_mask{0, stream, mr}; @@ -512,7 +513,7 @@ std::pair bitmask_or(table_view const& view, void set_all_valid_null_masks(column_view const& input, column& output, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (input.nullable()) { auto mask = detail::create_null_mask(output.size(), mask_state::ALL_VALID, stream, mr); @@ -531,7 +532,7 @@ rmm::device_buffer copy_bitmask(bitmask_type const* mask, size_type begin_bit, size_type end_bit, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::copy_bitmask(mask, begin_bit, end_bit, stream, mr); @@ -540,7 +541,7 @@ rmm::device_buffer copy_bitmask(bitmask_type const* mask, // Create a bitmask from a column view rmm::device_buffer copy_bitmask(column_view const& view, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::copy_bitmask(view, stream, mr); @@ -548,7 +549,7 @@ rmm::device_buffer copy_bitmask(column_view const& view, std::pair bitmask_and(table_view const& view, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::bitmask_and(view, stream, mr); @@ -556,7 +557,7 @@ std::pair bitmask_and(table_view const& view, std::pair bitmask_or(table_view const& view, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::bitmask_or(view, stream, mr); diff --git a/cpp/src/column/column.cu b/cpp/src/column/column.cu index d4a8fff69e2..90f719b9516 100644 --- a/cpp/src/column/column.cu +++ b/cpp/src/column/column.cu @@ -35,6 +35,7 @@ #include #include +#include #include @@ -46,9 +47,7 @@ namespace cudf { // Copy ctor w/ optional stream/mr -column::column(column const& other, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +column::column(column const& other, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) : _type{other._type}, _size{other._size}, _data{other._data, stream, mr}, @@ -160,7 +159,7 @@ namespace { struct create_column_from_view { cudf::column_view view; rmm::cuda_stream_view stream{cudf::get_default_stream()}; - rmm::mr::device_memory_resource* mr; + rmm::device_async_resource_ref mr; template >* = nullptr> @@ -254,7 +253,7 @@ struct create_column_from_view { } // anonymous namespace // Copy from a view -column::column(column_view view, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) +column::column(column_view view, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) : // Move is needed here because the dereference operator of unique_ptr returns // an lvalue reference, which would otherwise dispatch to the copy constructor column{std::move(*type_dispatcher(view.type(), create_column_from_view{view, stream, mr}))} diff --git a/cpp/src/column/column_factories.cpp b/cpp/src/column/column_factories.cpp index d8da6a95aa4..e40056fc8a1 100644 --- a/cpp/src/column/column_factories.cpp +++ b/cpp/src/column/column_factories.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,8 @@ #include #include +#include + #include namespace cudf { @@ -75,7 +77,7 @@ std::unique_ptr make_numeric_column(data_type type, size_type size, mask_state state, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS(is_numeric(type), "Invalid, non-numeric type."); @@ -95,7 +97,7 @@ std::unique_ptr make_fixed_point_column(data_type type, size_type size, mask_state state, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS(is_fixed_point(type), "Invalid, non-fixed_point type."); @@ -115,7 +117,7 @@ std::unique_ptr make_timestamp_column(data_type type, size_type size, mask_state state, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS(is_timestamp(type), "Invalid, non-timestamp type."); @@ -135,7 +137,7 @@ std::unique_ptr make_duration_column(data_type type, size_type size, mask_state state, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS(is_duration(type), "Invalid, non-duration type."); @@ -155,7 +157,7 @@ std::unique_ptr make_fixed_width_column(data_type type, size_type size, mask_state state, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS(is_fixed_width(type), "Invalid, non-fixed-width type."); @@ -171,7 +173,7 @@ std::unique_ptr make_fixed_width_column(data_type type, std::unique_ptr make_dictionary_from_scalar(scalar const& s, size_type size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (size == 0) return make_empty_column(type_id::DICTIONARY32); CUDF_EXPECTS(size >= 0, "Column size cannot be negative."); diff --git a/cpp/src/column/column_factories.cu b/cpp/src/column/column_factories.cu index 0e65a131e67..bad20d6817c 100644 --- a/cpp/src/column/column_factories.cu +++ b/cpp/src/column/column_factories.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,8 @@ #include #include +#include + #include namespace cudf { @@ -33,7 +35,7 @@ struct column_from_scalar_dispatch { std::unique_ptr operator()(scalar const& value, size_type size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { if (size == 0) return make_empty_column(value.type()); if (!value.is_valid(stream)) @@ -51,7 +53,7 @@ std::unique_ptr column_from_scalar_dispatch::operator() column_from_scalar_dispatch::operator() std::unique_ptr column_from_scalar_dispatch::operator()( - scalar const&, size_type, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) const + scalar const&, size_type, rmm::cuda_stream_view, rmm::device_async_resource_ref) const { CUDF_FAIL("dictionary not supported when creating from scalar"); } @@ -78,7 +80,7 @@ std::unique_ptr column_from_scalar_dispatch::operator()(&value); return lists::detail::make_lists_column_from_scalar(*lv, size, stream, mr); @@ -89,7 +91,7 @@ std::unique_ptr column_from_scalar_dispatch::operator() const&>(value); @@ -113,7 +115,7 @@ std::unique_ptr column_from_scalar_dispatch::operator() make_column_from_scalar(scalar const& s, size_type size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return type_dispatcher(s.type(), column_from_scalar_dispatch{}, s, size, stream, mr); } diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index b1d850e0b27..47e74a5cb48 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -30,9 +30,12 @@ #include #include #include +#include +#include #include #include +#include #include #include @@ -118,8 +121,8 @@ CUDF_KERNEL void concatenate_masks_kernel(column_device_view const* views, size_type number_of_mask_bits, size_type* out_valid_count) { - auto tidx = cudf::detail::grid_1d::global_thread_id(); - auto const stride = cudf::detail::grid_1d::grid_stride(); + auto tidx = cudf::detail::grid_1d::global_thread_id(); + auto const stride = cudf::detail::grid_1d::grid_stride(); auto active_mask = __ballot_sync(0xFFFF'FFFFu, tidx < number_of_mask_bits); size_type warp_valid_count = 0; @@ -241,7 +244,7 @@ template std::unique_ptr fused_concatenate(host_span views, bool const has_nulls, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { using mask_policy = cudf::mask_allocation_policy; @@ -288,7 +291,7 @@ template std::unique_ptr for_each_concatenate(host_span views, bool const has_nulls, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { size_type const total_element_count = std::accumulate(views.begin(), views.end(), 0, [](auto accumulator, auto const& v) { @@ -321,7 +324,7 @@ std::unique_ptr for_each_concatenate(host_span views, struct concatenate_dispatch { host_span views; rmm::cuda_stream_view stream; - rmm::mr::device_memory_resource* mr; + rmm::device_async_resource_ref mr; // fixed width template @@ -460,12 +463,9 @@ void traverse_children::operator()(host_span */ void bounds_and_type_check(host_span cols, rmm::cuda_stream_view stream) { - CUDF_EXPECTS(std::all_of(cols.begin(), - cols.end(), - [expected_type = cols.front().type()](auto const& c) { - return c.type() == expected_type; - }), - "Type mismatch in columns to concatenate."); + CUDF_EXPECTS(cudf::all_have_same_types(cols.begin(), cols.end()), + "Type mismatch in columns to concatenate.", + cudf::data_type_error); // total size of all concatenated rows size_t const total_row_count = @@ -485,7 +485,7 @@ void bounds_and_type_check(host_span cols, rmm::cuda_stream_v // Concatenates the elements from a vector of column_views std::unique_ptr concatenate(host_span columns_to_concat, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(not columns_to_concat.empty(), "Unexpected empty list of columns to concatenate."); @@ -504,7 +504,7 @@ std::unique_ptr concatenate(host_span columns_to_conc std::unique_ptr
concatenate(host_span tables_to_concat, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (tables_to_concat.empty()) { return std::make_unique
(); } @@ -533,7 +533,7 @@ std::unique_ptr
concatenate(host_span tables_to_concat, rmm::device_buffer concatenate_masks(host_span views, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { bool const has_nulls = std::any_of(views.begin(), views.end(), [](column_view const col) { return col.has_nulls(); }); @@ -558,7 +558,7 @@ rmm::device_buffer concatenate_masks(host_span views, rmm::device_buffer concatenate_masks(host_span views, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::concatenate_masks(views, stream, mr); @@ -567,7 +567,7 @@ rmm::device_buffer concatenate_masks(host_span views, // Concatenates the elements from a vector of column_views std::unique_ptr concatenate(host_span columns_to_concat, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::concatenate(columns_to_concat, stream, mr); @@ -575,7 +575,7 @@ std::unique_ptr concatenate(host_span columns_to_conc std::unique_ptr
concatenate(host_span tables_to_concat, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::concatenate(tables_to_concat, stream, mr); diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index 23224d3225d..37db2c74790 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -48,6 +49,7 @@ #include #include +#include #include namespace cudf { @@ -988,7 +990,7 @@ struct packed_split_indices_and_src_buf_info { std::size_t num_partitions, cudf::size_type num_src_bufs, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* temp_mr) + rmm::device_async_resource_ref temp_mr) : indices_size( cudf::util::round_up_safe((num_partitions + 1) * sizeof(size_type), split_align)), src_buf_info_size( @@ -1046,7 +1048,7 @@ struct packed_partition_buf_size_and_dst_buf_info { packed_partition_buf_size_and_dst_buf_info(std::size_t num_partitions, std::size_t num_bufs, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* temp_mr) + rmm::device_async_resource_ref temp_mr) : stream(stream), buf_sizes_size{cudf::util::round_up_safe(num_partitions * sizeof(std::size_t), split_align)}, dst_buf_info_size{cudf::util::round_up_safe(num_bufs * sizeof(dst_buf_info), split_align)}, @@ -1097,7 +1099,7 @@ struct packed_src_and_dst_pointers { std::size_t num_partitions, cudf::size_type num_src_bufs, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* temp_mr) + rmm::device_async_resource_ref temp_mr) : stream(stream), src_bufs_size{cudf::util::round_up_safe(num_src_bufs * sizeof(uint8_t*), split_align)}, dst_bufs_size{cudf::util::round_up_safe(num_partitions * sizeof(uint8_t*), split_align)}, @@ -1139,7 +1141,7 @@ struct packed_src_and_dst_pointers { /** * @brief Create an instance of `packed_src_and_dst_pointers` populating destination - * partitition buffers (if any) from `out_buffers`. In the chunked_pack case + * partition buffers (if any) from `out_buffers`. In the chunked_pack case * `out_buffers` is empty, and the destination pointer is provided separately * to the `copy_partitions` kernel. * @@ -1158,7 +1160,7 @@ std::unique_ptr setup_src_and_dst_pointers( cudf::size_type num_src_bufs, std::vector& out_buffers, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* temp_mr) + rmm::device_async_resource_ref temp_mr) { auto src_and_dst_pointers = std::make_unique( input, num_partitions, num_src_bufs, stream, temp_mr); @@ -1195,7 +1197,7 @@ std::unique_ptr compute_splits( cudf::size_type num_src_bufs, std::size_t num_bufs, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* temp_mr) + rmm::device_async_resource_ref temp_mr) { auto partition_buf_size_and_dst_buf_info = std::make_unique( @@ -1366,7 +1368,7 @@ struct chunk_iteration_state { std::size_t num_partitions, std::size_t user_buffer_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* temp_mr); + rmm::device_async_resource_ref temp_mr); /** * @brief As of the time of the call, return the starting 1MB batch index, and the @@ -1426,7 +1428,7 @@ std::unique_ptr chunk_iteration_state::create( std::size_t num_partitions, std::size_t user_buffer_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* temp_mr) + rmm::device_async_resource_ref temp_mr) { rmm::device_uvector d_batch_offsets(num_bufs + 1, stream, temp_mr); @@ -1646,7 +1648,7 @@ std::unique_ptr compute_batches(int num_bufs, std::size_t num_partitions, std::size_t user_buffer_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* temp_mr) + rmm::device_async_resource_ref temp_mr) { // Since we parallelize at one block per copy, performance is vulnerable to situations where we // have small numbers of copies to do (a combination of small numbers of splits and/or columns), @@ -1769,8 +1771,8 @@ struct contiguous_split_state { contiguous_split_state(cudf::table_view const& input, std::size_t user_buffer_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr, - rmm::mr::device_memory_resource* temp_mr) + std::optional mr, + rmm::device_async_resource_ref temp_mr) : contiguous_split_state(input, {}, user_buffer_size, stream, mr, temp_mr) { } @@ -1778,8 +1780,8 @@ struct contiguous_split_state { contiguous_split_state(cudf::table_view const& input, std::vector const& splits, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr, - rmm::mr::device_memory_resource* temp_mr) + std::optional mr, + rmm::device_async_resource_ref temp_mr) : contiguous_split_state(input, splits, 0, stream, mr, temp_mr) { } @@ -1897,8 +1899,8 @@ struct contiguous_split_state { std::vector const& splits, std::size_t user_buffer_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr, - rmm::mr::device_memory_resource* temp_mr) + std::optional mr, + rmm::device_async_resource_ref temp_mr) : input(input), user_buffer_size(user_buffer_size), stream(stream), @@ -1936,7 +1938,8 @@ struct contiguous_split_state { std::transform(h_buf_sizes, h_buf_sizes + num_partitions, std::back_inserter(out_buffers), - [stream = stream, mr = mr](std::size_t bytes) { + [stream = stream, + mr = mr.value_or(rmm::mr::get_current_device_resource())](std::size_t bytes) { return rmm::device_buffer{bytes, stream, mr}; }); } @@ -2014,11 +2017,11 @@ struct contiguous_split_state { cudf::table_view const input; ///< The input table_view to operate on std::size_t const user_buffer_size; ///< The size of the user buffer for the chunked_pack case rmm::cuda_stream_view const stream; - rmm::mr::device_memory_resource* const mr; ///< The memory resource for any data returned + std::optional mr; ///< The resource for any data returned // this resource defaults to `mr` for the contiguous_split case, but it can be useful for the // `chunked_pack` case to allocate scratch/temp memory in a pool - rmm::mr::device_memory_resource* const temp_mr; ///< The memory resource for scratch/temp space + rmm::device_async_resource_ref const temp_mr; ///< The memory resource for scratch/temp space // whether the table was empty to begin with (0 rows or 0 columns) and should be metadata-only bool const is_empty; ///< True if the source table has 0 rows or 0 columns @@ -2062,7 +2065,7 @@ struct contiguous_split_state { std::vector contiguous_split(cudf::table_view const& input, std::vector const& splits, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // `temp_mr` is the same as `mr` for contiguous_split as it allocates all // of its memory from the default memory resource in cuDF @@ -2075,7 +2078,7 @@ std::vector contiguous_split(cudf::table_view const& input, std::vector contiguous_split(cudf::table_view const& input, std::vector const& splits, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::contiguous_split(input, splits, cudf::get_default_stream(), mr); @@ -2083,14 +2086,14 @@ std::vector contiguous_split(cudf::table_view const& input, chunked_pack::chunked_pack(cudf::table_view const& input, std::size_t user_buffer_size, - rmm::mr::device_memory_resource* temp_mr) + rmm::device_async_resource_ref temp_mr) { CUDF_EXPECTS(user_buffer_size >= desired_batch_size, "The output buffer size must be at least 1MB in size"); - // We pass `nullptr` for the first `mr` in `contiguous_split_state` to indicate + // We pass `std::nullopt` for the first `mr` in `contiguous_split_state` to indicate // that it does not allocate any user-bound data for the `chunked_pack` case. state = std::make_unique( - input, user_buffer_size, cudf::get_default_stream(), nullptr, temp_mr); + input, user_buffer_size, cudf::get_default_stream(), std::nullopt, temp_mr); } // required for the unique_ptr to work with a incomplete type (contiguous_split_state) @@ -2115,7 +2118,7 @@ std::unique_ptr> chunked_pack::build_metadata() const std::unique_ptr chunked_pack::create(cudf::table_view const& input, std::size_t user_buffer_size, - rmm::mr::device_memory_resource* temp_mr) + rmm::device_async_resource_ref temp_mr) { return std::make_unique(input, user_buffer_size, temp_mr); } diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp index 490a1ccb254..98ee6aa8f68 100644 --- a/cpp/src/copying/copy.cpp +++ b/cpp/src/copying/copy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ #include #include +#include #include @@ -119,10 +120,11 @@ std::unique_ptr allocate_like(column_view const& input, size_type size, mask_allocation_policy mask_alloc, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - CUDF_EXPECTS(is_fixed_width(input.type()), "Expects only fixed-width type column"); + CUDF_EXPECTS( + is_fixed_width(input.type()), "Expects only fixed-width type column", cudf::data_type_error); mask_state allocate_mask = should_allocate_mask(mask_alloc, input.nullable()); return std::make_unique(input.type(), @@ -176,7 +178,7 @@ std::unique_ptr
empty_like(table_view const& input_table) std::unique_ptr allocate_like(column_view const& input, mask_allocation_policy mask_alloc, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::allocate_like(input, input.size(), mask_alloc, stream, mr); @@ -186,7 +188,7 @@ std::unique_ptr allocate_like(column_view const& input, size_type size, mask_allocation_policy mask_alloc, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::allocate_like(input, size, mask_alloc, stream, mr); diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index 8299c211fad..e86a1f8d6f1 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -26,10 +26,12 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -77,7 +79,7 @@ struct copy_if_else_functor_impl bool right_nullable, Filter filter, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto p_lhs = get_iterable_device_view{}(lhs_h, stream); auto p_rhs = get_iterable_device_view{}(rhs_h, stream); @@ -110,7 +112,7 @@ struct copy_if_else_functor_impl { bool right_nullable, Filter filter, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { using T = string_view; @@ -162,7 +164,7 @@ std::unique_ptr scatter_gather_based_if_else(cudf::column_view const& lh size_type size, Filter is_left, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto gather_map = rmm::device_uvector{static_cast(size), stream}; auto const gather_map_end = thrust::copy_if(rmm::exec_policy(stream), @@ -196,7 +198,7 @@ std::unique_ptr scatter_gather_based_if_else(cudf::scalar const& lhs, size_type size, Filter is_left, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto scatter_map = rmm::device_uvector{static_cast(size), stream}; auto const scatter_map_end = thrust::copy_if(rmm::exec_policy(stream), @@ -225,7 +227,7 @@ std::unique_ptr scatter_gather_based_if_else(cudf::column_view const& lh size_type size, Filter is_left, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return scatter_gather_based_if_else(rhs, lhs, size, logical_not{is_left}, stream, mr); } @@ -236,7 +238,7 @@ std::unique_ptr scatter_gather_based_if_else(cudf::scalar const& lhs, size_type size, Filter is_left, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto rhs_col = cudf::make_column_from_scalar(rhs, size, stream, mr); return scatter_gather_based_if_else(lhs, rhs_col->view(), size, is_left, stream, mr); @@ -252,7 +254,7 @@ struct copy_if_else_functor_impl { bool, Filter filter, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return scatter_gather_based_if_else(lhs, rhs, size, filter, stream, mr); } @@ -268,7 +270,7 @@ struct copy_if_else_functor_impl { bool, Filter filter, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return scatter_gather_based_if_else(lhs, rhs, size, filter, stream, mr); } @@ -284,7 +286,7 @@ struct copy_if_else_functor_impl { bool, Filter filter, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return scatter_gather_based_if_else(lhs, rhs, size, filter, stream, mr); } @@ -303,7 +305,7 @@ struct copy_if_else_functor { bool right_nullable, Filter filter, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { copy_if_else_functor_impl copier{}; return copier(lhs, rhs, size, left_nullable, right_nullable, filter, stream, mr); @@ -318,7 +320,7 @@ std::unique_ptr copy_if_else(Left const& lhs, bool right_nullable, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(boolean_mask.type() == data_type(type_id::BOOL8), "Boolean mask column must be of type type_id::BOOL8", @@ -356,14 +358,15 @@ std::unique_ptr copy_if_else(column_view const& lhs, column_view const& rhs, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(boolean_mask.size() == lhs.size(), "Boolean mask column must be the same size as lhs and rhs columns", std::invalid_argument); - CUDF_EXPECTS(lhs.size() == rhs.size(), "Both columns must be of the size", std::invalid_argument); CUDF_EXPECTS( - lhs.type() == rhs.type(), "Both inputs must be of the same type", cudf::data_type_error); + lhs.size() == rhs.size(), "Both columns must be of the same size", std::invalid_argument); + CUDF_EXPECTS( + cudf::have_same_types(lhs, rhs), "Both inputs must be of the same type", cudf::data_type_error); return copy_if_else(lhs, rhs, lhs.has_nulls(), rhs.has_nulls(), boolean_mask, stream, mr); } @@ -372,16 +375,13 @@ std::unique_ptr copy_if_else(scalar const& lhs, column_view const& rhs, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(boolean_mask.size() == rhs.size(), "Boolean mask column must be the same size as rhs column", std::invalid_argument); - - auto rhs_type = - cudf::is_dictionary(rhs.type()) ? cudf::dictionary_column_view(rhs).keys_type() : rhs.type(); CUDF_EXPECTS( - lhs.type() == rhs_type, "Both inputs must be of the same type", cudf::data_type_error); + cudf::have_same_types(rhs, lhs), "Both inputs must be of the same type", cudf::data_type_error); return copy_if_else(lhs, rhs, !lhs.is_valid(stream), rhs.has_nulls(), boolean_mask, stream, mr); } @@ -390,16 +390,13 @@ std::unique_ptr copy_if_else(column_view const& lhs, scalar const& rhs, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(boolean_mask.size() == lhs.size(), "Boolean mask column must be the same size as lhs column", std::invalid_argument); - - auto lhs_type = - cudf::is_dictionary(lhs.type()) ? cudf::dictionary_column_view(lhs).keys_type() : lhs.type(); CUDF_EXPECTS( - lhs_type == rhs.type(), "Both inputs must be of the same type", cudf::data_type_error); + cudf::have_same_types(lhs, rhs), "Both inputs must be of the same type", cudf::data_type_error); return copy_if_else(lhs, rhs, lhs.has_nulls(), !rhs.is_valid(stream), boolean_mask, stream, mr); } @@ -408,10 +405,10 @@ std::unique_ptr copy_if_else(scalar const& lhs, scalar const& rhs, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS( - lhs.type() == rhs.type(), "Both inputs must be of the same type", cudf::data_type_error); + cudf::have_same_types(lhs, rhs), "Both inputs must be of the same type", cudf::data_type_error); return copy_if_else( lhs, rhs, !lhs.is_valid(stream), !rhs.is_valid(stream), boolean_mask, stream, mr); } @@ -422,7 +419,7 @@ std::unique_ptr copy_if_else(column_view const& lhs, column_view const& rhs, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::copy_if_else(lhs, rhs, boolean_mask, stream, mr); @@ -432,7 +429,7 @@ std::unique_ptr copy_if_else(scalar const& lhs, column_view const& rhs, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::copy_if_else(lhs, rhs, boolean_mask, stream, mr); @@ -442,7 +439,7 @@ std::unique_ptr copy_if_else(column_view const& lhs, scalar const& rhs, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::copy_if_else(lhs, rhs, boolean_mask, stream, mr); @@ -452,7 +449,7 @@ std::unique_ptr copy_if_else(scalar const& lhs, scalar const& rhs, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::copy_if_else(lhs, rhs, boolean_mask, stream, mr); diff --git a/cpp/src/copying/copy_range.cu b/cpp/src/copying/copy_range.cu index 038646d8cf4..dd18f99a3c8 100644 --- a/cpp/src/copying/copy_range.cu +++ b/cpp/src/copying/copy_range.cu @@ -32,8 +32,10 @@ #include #include #include +#include #include +#include #include @@ -98,7 +100,7 @@ struct out_of_place_copy_range_dispatch { cudf::size_type source_end, cudf::size_type target_begin, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { auto p_ret = std::make_unique(target, stream, mr); if ((!p_ret->nullable()) && source.has_nulls(source_begin, source_end)) { @@ -119,7 +121,7 @@ struct out_of_place_copy_range_dispatch { std::enable_if_t(), std::unique_ptr> operator()(Args...) { - CUDF_FAIL("Unsupported type for out of place copy."); + CUDF_FAIL("Unsupported type for out of place copy.", cudf::data_type_error); } }; @@ -129,7 +131,7 @@ std::unique_ptr out_of_place_copy_range_dispatch::operator() out_of_place_copy_range_dispatch::operator() copy_range(column_view const& source, size_type source_end, size_type target_begin, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS((source_begin >= 0) && (source_end <= source.size()) && (source_begin <= source_end) && (target_begin >= 0) && (target_begin <= target.size() - (source_end - source_begin)), "Range is out of bounds.", std::out_of_range); - CUDF_EXPECTS(target.type() == source.type(), "Data type mismatch.", cudf::data_type_error); + CUDF_EXPECTS(cudf::have_same_types(target, source), "Data type mismatch.", cudf::data_type_error); return cudf::type_dispatcher( target.type(), @@ -270,7 +273,7 @@ std::unique_ptr copy_range(column_view const& source, size_type source_end, size_type target_begin, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::copy_range(source, target, source_begin, source_end, target_begin, stream, mr); diff --git a/cpp/src/copying/gather.cu b/cpp/src/copying/gather.cu index 78748e5a00b..5eb039419df 100644 --- a/cpp/src/copying/gather.cu +++ b/cpp/src/copying/gather.cu @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -39,7 +40,7 @@ std::unique_ptr
gather(table_view const& source_table, out_of_bounds_policy bounds_policy, negative_index_policy neg_indices, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(not gather_map.has_nulls(), "gather_map contains nulls", std::invalid_argument); @@ -66,7 +67,7 @@ std::unique_ptr
gather(table_view const& source_table, out_of_bounds_policy bounds_policy, negative_index_policy neg_indices, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(gather_map.size() <= static_cast(std::numeric_limits::max()), "gather map size exceeds the column size limit", @@ -85,7 +86,7 @@ std::unique_ptr
gather(table_view const& source_table, column_view const& gather_map, out_of_bounds_policy bounds_policy, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index 2e804415439..b8860da479c 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -29,6 +29,7 @@ #include #include +#include #include @@ -42,7 +43,7 @@ struct get_element_functor { std::unique_ptr operator()(column_view const& input, size_type index, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto s = make_fixed_width_scalar(data_type(type_to_id()), stream, mr); @@ -65,7 +66,7 @@ struct get_element_functor { std::unique_ptr operator()(column_view const& input, size_type index, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto device_col = column_device_view::create(input, stream); @@ -89,7 +90,7 @@ struct get_element_functor { std::unique_ptr operator()(column_view const& input, size_type index, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto dict_view = dictionary_column_view(input); auto indices_iter = detail::indexalator_factory::make_input_iterator(dict_view.indices()); @@ -124,7 +125,7 @@ struct get_element_functor { std::unique_ptr operator()(column_view const& input, size_type index, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { bool valid = is_element_valid_sync(input, index, stream); auto const child_col_idx = lists_column_view::child_column_index; @@ -148,7 +149,7 @@ struct get_element_functor { std::unique_ptr operator()(column_view const& input, size_type index, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { using Type = typename T::rep; @@ -178,7 +179,7 @@ struct get_element_functor { std::unique_ptr operator()(column_view const& input, size_type index, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { bool valid = is_element_valid_sync(input, index, stream); auto row_contents = @@ -193,7 +194,7 @@ struct get_element_functor { std::unique_ptr get_element(column_view const& input, size_type index, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(index >= 0 and index < input.size(), "Index out of bounds", std::out_of_range); return type_dispatcher(input.type(), get_element_functor{}, input, index, stream, mr); @@ -204,7 +205,7 @@ std::unique_ptr get_element(column_view const& input, std::unique_ptr get_element(column_view const& input, size_type index, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::get_element(input, index, stream, mr); diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp index e4de4a43b68..b0208a58896 100644 --- a/cpp/src/copying/pack.cpp +++ b/cpp/src/copying/pack.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace detail { @@ -144,7 +145,7 @@ void build_column_metadata(metadata_builder& mb, */ packed_columns pack(cudf::table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // do a contiguous_split with no splits to get the memory for the table // arranged as we want it @@ -260,7 +261,7 @@ void metadata_builder::clear() { return impl->clear(); } /** * @copydoc cudf::pack */ -packed_columns pack(cudf::table_view const& input, rmm::mr::device_memory_resource* mr) +packed_columns pack(cudf::table_view const& input, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::pack(input, cudf::get_default_stream(), mr); diff --git a/cpp/src/copying/purge_nonempty_nulls.cu b/cpp/src/copying/purge_nonempty_nulls.cu index 620a03d8be5..d69d214a881 100644 --- a/cpp/src/copying/purge_nonempty_nulls.cu +++ b/cpp/src/copying/purge_nonempty_nulls.cu @@ -18,6 +18,8 @@ #include #include +#include + #include #include @@ -87,7 +89,7 @@ bool has_nonempty_nulls(cudf::column_view const& input, rmm::cuda_stream_view st std::unique_ptr purge_nonempty_nulls(column_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // If not compound types (LIST/STRING/STRUCT/DICTIONARY) then just copy the input into output. if (!cudf::is_compound(input.type())) { return std::make_unique(input, stream, mr); } @@ -132,11 +134,11 @@ bool has_nonempty_nulls(column_view const& input, rmm::cuda_stream_view stream) } /** - * @copydoc cudf::purge_nonempty_nulls(column_view const&, rmm::mr::device_memory_resource*) + * @copydoc cudf::purge_nonempty_nulls(column_view const&, rmm::device_async_resource_ref) */ std::unique_ptr purge_nonempty_nulls(column_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return detail::purge_nonempty_nulls(input, stream, mr); } diff --git a/cpp/src/copying/reverse.cu b/cpp/src/copying/reverse.cu index 78d1b54882c..d3d42e35e26 100644 --- a/cpp/src/copying/reverse.cu +++ b/cpp/src/copying/reverse.cu @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -37,7 +38,7 @@ namespace cudf { namespace detail { std::unique_ptr
reverse(table_view const& source_table, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { size_type num_rows = source_table.num_rows(); auto elements = make_counting_transform_iterator( @@ -51,7 +52,7 @@ std::unique_ptr
reverse(table_view const& source_table, std::unique_ptr reverse(column_view const& source_column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return std::move( cudf::detail::reverse(table_view({source_column}), stream, mr)->release().front()); @@ -60,7 +61,7 @@ std::unique_ptr reverse(column_view const& source_column, std::unique_ptr
reverse(table_view const& source_table, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::reverse(source_table, stream, mr); @@ -68,7 +69,7 @@ std::unique_ptr
reverse(table_view const& source_table, std::unique_ptr reverse(column_view const& source_column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::reverse(source_column, stream, mr); diff --git a/cpp/src/copying/sample.cu b/cpp/src/copying/sample.cu index 0211f97deb3..f8e3a9a83e3 100644 --- a/cpp/src/copying/sample.cu +++ b/cpp/src/copying/sample.cu @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -40,7 +41,7 @@ std::unique_ptr
sample(table_view const& input, sample_with_replacement replacement, int64_t const seed, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(n >= 0, "expected number of samples should be non-negative"); auto const num_rows = input.num_rows(); @@ -92,7 +93,7 @@ std::unique_ptr
sample(table_view const& input, sample_with_replacement replacement, int64_t const seed, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::sample(input, n, replacement, seed, stream, mr); diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index 7931df4c9f0..993ee074f14 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -32,8 +32,11 @@ #include #include #include +#include +#include #include +#include #include #include @@ -77,7 +80,7 @@ void scatter_scalar_bitmask_inplace(std::reference_wrapper const& size_type num_scatter_rows, column& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { constexpr size_type block_size = 256; size_type const grid_size = grid_1d(num_scatter_rows, block_size).num_blocks; @@ -109,9 +112,9 @@ struct column_scalar_scatterer_impl { size_type scatter_rows, column_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { - CUDF_EXPECTS(source.get().type() == target.type(), + CUDF_EXPECTS(cudf::have_same_types(target, source.get()), "scalar and column types must match", cudf::data_type_error); @@ -142,9 +145,11 @@ struct column_scalar_scatterer_impl { size_type scatter_rows, column_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { - CUDF_EXPECTS(source.get().type() == target.type(), "scalar and column types must match"); + CUDF_EXPECTS(cudf::have_same_types(target, source.get()), + "scalar and column types must match", + cudf::data_type_error); auto const scalar_impl = static_cast(&source.get()); auto const source_view = string_view(scalar_impl->data(), scalar_impl->size()); @@ -164,8 +169,11 @@ struct column_scalar_scatterer_impl { size_type scatter_rows, column_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { + CUDF_EXPECTS(source.get().type() == target.type(), + "scalar and column types must match", + cudf::data_type_error); auto result = lists::detail::scatter(source, scatter_iter, scatter_iter + scatter_rows, target, stream, mr); @@ -181,7 +189,7 @@ struct column_scalar_scatterer_impl { size_type scatter_rows, column_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { auto dict_target = dictionary::detail::add_keys(dictionary_column_view(target), @@ -233,7 +241,7 @@ struct column_scalar_scatterer { size_type scatter_rows, column_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { column_scalar_scatterer_impl scatterer{}; return scatterer(source, scatter_iter, scatter_rows, target, stream, mr); @@ -247,8 +255,12 @@ struct column_scalar_scatterer_impl { size_type scatter_rows, column_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { + CUDF_EXPECTS(source.get().type() == target.type(), + "scalar and column types must match", + cudf::data_type_error); + // For each field of `source`, copy construct a scalar from the field // and dispatch to the corresponding scalar scatterer @@ -297,7 +309,7 @@ std::unique_ptr
scatter(table_view const& source, column_view const& scatter_map, table_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(source.num_columns() == target.num_columns(), "Number of columns in source and target not equal", @@ -305,12 +317,7 @@ std::unique_ptr
scatter(table_view const& source, CUDF_EXPECTS(scatter_map.size() <= source.num_rows(), "Size of scatter map must be equal to or less than source rows", std::invalid_argument); - CUDF_EXPECTS(std::equal(source.begin(), - source.end(), - target.begin(), - [](auto const& col1, auto const& col2) { - return col1.type().id() == col2.type().id(); - }), + CUDF_EXPECTS(cudf::have_same_types(source, target), "Column types do not match between source and target", cudf::data_type_error); CUDF_EXPECTS(not scatter_map.has_nulls(), "Scatter map contains nulls", std::invalid_argument); @@ -327,7 +334,7 @@ std::unique_ptr
scatter(table_view const& source, device_span const scatter_map, table_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(scatter_map.size() <= static_cast(std::numeric_limits::max()), "scatter map size exceeds the column size limit", @@ -344,7 +351,7 @@ std::unique_ptr
scatter(std::vector> column_view const& indices, table_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(source.size() == static_cast(target.num_columns()), "Number of scalars in source and number of columns in target not equal", @@ -396,7 +403,7 @@ std::unique_ptr boolean_mask_scatter(column_view const& input, column_view const& target, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto indices = cudf::make_numeric_column( data_type{type_id::INT32}, target.size(), mask_state::UNALLOCATED, stream); @@ -421,7 +428,7 @@ std::unique_ptr boolean_mask_scatter(scalar const& input, column_view const& target, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return detail::copy_if_else(input, target, boolean_mask, stream, mr); } @@ -430,7 +437,7 @@ std::unique_ptr
boolean_mask_scatter(table_view const& input, table_view const& target, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(input.num_columns() == target.num_columns(), "Mismatch in number of input columns and target columns", @@ -442,14 +449,9 @@ std::unique_ptr
boolean_mask_scatter(table_view const& input, "Mask must be of Boolean type", cudf::data_type_error); // Count valid pair of input and columns as per type at each column index i - CUDF_EXPECTS( - std::all_of(thrust::counting_iterator(0), - thrust::counting_iterator(target.num_columns()), - [&input, &target](auto index) { - return ((input.column(index).type().id()) == (target.column(index).type().id())); - }), - "Type mismatch in input column and target column", - cudf::data_type_error); + CUDF_EXPECTS(cudf::have_same_types(input, target), + "Type mismatch in input column and target column", + cudf::data_type_error); if (target.num_rows() != 0) { std::vector> out_columns(target.num_columns()); @@ -473,7 +475,7 @@ std::unique_ptr
boolean_mask_scatter( table_view const& target, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(static_cast(input.size()) == target.num_columns(), "Mismatch in number of scalars and target columns", @@ -486,14 +488,13 @@ std::unique_ptr
boolean_mask_scatter( cudf::data_type_error); // Count valid pair of input and columns as per type at each column/scalar index i - CUDF_EXPECTS( - std::all_of(thrust::counting_iterator(0), - thrust::counting_iterator(target.num_columns()), - [&input, &target](auto index) { - return (input[index].get().type().id() == target.column(index).type().id()); - }), - "Type mismatch in input scalar and target column", - cudf::data_type_error); + CUDF_EXPECTS(std::all_of(thrust::counting_iterator(0), + thrust::counting_iterator(target.num_columns()), + [&input, &target](auto index) { + return cudf::have_same_types(target.column(index), input[index].get()); + }), + "Type mismatch in input scalar and target column", + cudf::data_type_error); if (target.num_rows() != 0) { std::vector> out_columns(target.num_columns()); @@ -518,7 +519,7 @@ std::unique_ptr
scatter(table_view const& source, column_view const& scatter_map, table_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::scatter(source, scatter_map, target, stream, mr); @@ -528,7 +529,7 @@ std::unique_ptr
scatter(std::vector> column_view const& indices, table_view const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::scatter(source, indices, target, stream, mr); @@ -538,7 +539,7 @@ std::unique_ptr
boolean_mask_scatter(table_view const& input, table_view const& target, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::boolean_mask_scatter(input, target, boolean_mask, stream, mr); @@ -549,7 +550,7 @@ std::unique_ptr
boolean_mask_scatter( table_view const& target, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::boolean_mask_scatter(input, target, boolean_mask, stream, mr); diff --git a/cpp/src/copying/segmented_shift.cu b/cpp/src/copying/segmented_shift.cu index dd2733cf7e9..b7abc60f240 100644 --- a/cpp/src/copying/segmented_shift.cu +++ b/cpp/src/copying/segmented_shift.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -73,7 +74,7 @@ struct segmented_shift_functor() size_type offset, scalar const& fill_value, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto values_device_view = column_device_view::create(segmented_values, stream); bool nullable = not fill_value.is_valid(stream) or segmented_values.nullable(); @@ -102,7 +103,7 @@ struct segmented_shift_functor { size_type offset, scalar const& fill_value, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto values_device_view = column_device_view::create(segmented_values, stream); auto input_iterator = make_optional_iterator( @@ -129,7 +130,7 @@ struct segmented_shift_functor_forwarder { size_type offset, scalar const& fill_value, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { segmented_shift_functor shifter; return shifter(segmented_values, segment_offsets, offset, fill_value, stream, mr); @@ -143,7 +144,7 @@ std::unique_ptr segmented_shift(column_view const& segmented_values, size_type offset, scalar const& fill_value, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (segmented_values.is_empty()) { return empty_like(segmented_values); } if (offset == 0) { return std::make_unique(segmented_values, stream, mr); }; diff --git a/cpp/src/copying/shift.cu b/cpp/src/copying/shift.cu index 8e013bb1212..91254f21170 100644 --- a/cpp/src/copying/shift.cu +++ b/cpp/src/copying/shift.cu @@ -26,10 +26,12 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -51,7 +53,7 @@ std::pair create_null_mask(column_device_view con size_type offset, scalar const& fill_value, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto const size = input.size(); auto func_validity = @@ -81,7 +83,7 @@ struct shift_functor { size_type offset, scalar const& fill_value, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto output = cudf::strings::detail::shift( cudf::strings_column_view(input), offset, fill_value, stream, mr); @@ -101,7 +103,7 @@ struct shift_functor { size_type offset, scalar const& fill_value, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { using ScalarType = cudf::scalar_type_t; auto& scalar = static_cast(fill_value); @@ -155,9 +157,9 @@ std::unique_ptr shift(column_view const& input, size_type offset, scalar const& fill_value, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { - CUDF_EXPECTS(input.type() == fill_value.type(), + CUDF_EXPECTS(cudf::have_same_types(input, fill_value), "shift requires each fill value type to match the corresponding column type.", cudf::data_type_error); @@ -173,7 +175,7 @@ std::unique_ptr shift(column_view const& input, size_type offset, scalar const& fill_value, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::shift(input, offset, fill_value, stream, mr); diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 371663c41ee..7629cad79a9 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,6 +35,7 @@ #include #include +#include #include #include @@ -254,7 +255,7 @@ struct dispatch_round { rounding_frequency component, cudf::column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { auto size = column.size(); auto output_col_type = data_type{cudf::type_to_id()}; @@ -319,7 +320,7 @@ struct launch_functor { template std::unique_ptr apply_datetime_op(column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(is_timestamp(column.type()), "Column type should be timestamp"); auto size = column.size(); @@ -355,7 +356,7 @@ struct add_calendrical_months_functor { column_view timestamp_column, MonthIterator months_begin, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { auto size = timestamp_column.size(); auto output_col_type = timestamp_column.type(); @@ -386,7 +387,7 @@ struct add_calendrical_months_functor { std::unique_ptr add_calendrical_months(column_view const& timestamp_column, column_view const& months_column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(is_timestamp(timestamp_column.type()), "Column type should be timestamp"); CUDF_EXPECTS( @@ -413,7 +414,7 @@ std::unique_ptr add_calendrical_months(column_view const& timestamp_colu std::unique_ptr add_calendrical_months(column_view const& timestamp_column, scalar const& months, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(is_timestamp(timestamp_column.type()), "Column type should be timestamp"); CUDF_EXPECTS(months.type().id() == type_id::INT16 or months.type().id() == type_id::INT32, @@ -442,7 +443,7 @@ std::unique_ptr round_general(rounding_function round_kind, rounding_frequency component, column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return cudf::type_dispatcher( column.type(), dispatch_round{}, round_kind, component, column, stream, mr); @@ -450,7 +451,7 @@ std::unique_ptr round_general(rounding_function round_kind, std::unique_ptr extract_year(column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return detail::apply_datetime_op< detail::extract_component_operator, @@ -459,7 +460,7 @@ std::unique_ptr extract_year(column_view const& column, std::unique_ptr extract_month(column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return detail::apply_datetime_op< detail::extract_component_operator, @@ -468,7 +469,7 @@ std::unique_ptr extract_month(column_view const& column, std::unique_ptr extract_day(column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return detail::apply_datetime_op< detail::extract_component_operator, @@ -477,7 +478,7 @@ std::unique_ptr extract_day(column_view const& column, std::unique_ptr extract_weekday(column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return detail::apply_datetime_op< detail::extract_component_operator, @@ -486,7 +487,7 @@ std::unique_ptr extract_weekday(column_view const& column, std::unique_ptr extract_hour(column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return detail::apply_datetime_op< detail::extract_component_operator, @@ -495,7 +496,7 @@ std::unique_ptr extract_hour(column_view const& column, std::unique_ptr extract_minute(column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return detail::apply_datetime_op< detail::extract_component_operator, @@ -504,7 +505,7 @@ std::unique_ptr extract_minute(column_view const& column, std::unique_ptr extract_second(column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return detail::apply_datetime_op< detail::extract_component_operator, @@ -513,7 +514,7 @@ std::unique_ptr extract_second(column_view const& column, std::unique_ptr extract_millisecond_fraction(column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return detail::apply_datetime_op< detail::extract_component_operator, @@ -522,7 +523,7 @@ std::unique_ptr extract_millisecond_fraction(column_view const& column, std::unique_ptr extract_microsecond_fraction(column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return detail::apply_datetime_op< detail::extract_component_operator, @@ -531,7 +532,7 @@ std::unique_ptr extract_microsecond_fraction(column_view const& column, std::unique_ptr extract_nanosecond_fraction(column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return detail::apply_datetime_op< detail::extract_component_operator, @@ -540,7 +541,7 @@ std::unique_ptr extract_nanosecond_fraction(column_view const& column, std::unique_ptr last_day_of_month(column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return detail::apply_datetime_op(column, stream, mr); @@ -548,7 +549,7 @@ std::unique_ptr last_day_of_month(column_view const& column, std::unique_ptr day_of_year(column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return detail::apply_datetime_op( column, stream, mr); @@ -556,21 +557,21 @@ std::unique_ptr day_of_year(column_view const& column, std::unique_ptr is_leap_year(column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return apply_datetime_op(column, stream, mr); } std::unique_ptr days_in_month(column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return apply_datetime_op(column, stream, mr); } std::unique_ptr extract_quarter(column_view const& column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return apply_datetime_op(column, stream, mr); } @@ -579,7 +580,7 @@ std::unique_ptr extract_quarter(column_view const& column, std::unique_ptr ceil_datetimes(column_view const& column, rounding_frequency freq, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::round_general( @@ -588,7 +589,7 @@ std::unique_ptr ceil_datetimes(column_view const& column, std::unique_ptr floor_datetimes(column_view const& column, rounding_frequency freq, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::round_general( @@ -597,88 +598,85 @@ std::unique_ptr floor_datetimes(column_view const& column, std::unique_ptr round_datetimes(column_view const& column, rounding_frequency freq, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::round_general( detail::rounding_function::ROUND, freq, column, cudf::get_default_stream(), mr); } -std::unique_ptr extract_year(column_view const& column, rmm::mr::device_memory_resource* mr) +std::unique_ptr extract_year(column_view const& column, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::extract_year(column, cudf::get_default_stream(), mr); } -std::unique_ptr extract_month(column_view const& column, - rmm::mr::device_memory_resource* mr) +std::unique_ptr extract_month(column_view const& column, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::extract_month(column, cudf::get_default_stream(), mr); } -std::unique_ptr extract_day(column_view const& column, rmm::mr::device_memory_resource* mr) +std::unique_ptr extract_day(column_view const& column, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::extract_day(column, cudf::get_default_stream(), mr); } std::unique_ptr extract_weekday(column_view const& column, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::extract_weekday(column, cudf::get_default_stream(), mr); } -std::unique_ptr extract_hour(column_view const& column, rmm::mr::device_memory_resource* mr) +std::unique_ptr extract_hour(column_view const& column, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::extract_hour(column, cudf::get_default_stream(), mr); } -std::unique_ptr extract_minute(column_view const& column, - rmm::mr::device_memory_resource* mr) +std::unique_ptr extract_minute(column_view const& column, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::extract_minute(column, cudf::get_default_stream(), mr); } -std::unique_ptr extract_second(column_view const& column, - rmm::mr::device_memory_resource* mr) +std::unique_ptr extract_second(column_view const& column, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::extract_second(column, cudf::get_default_stream(), mr); } std::unique_ptr extract_millisecond_fraction(column_view const& column, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::extract_millisecond_fraction(column, cudf::get_default_stream(), mr); } std::unique_ptr extract_microsecond_fraction(column_view const& column, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::extract_microsecond_fraction(column, cudf::get_default_stream(), mr); } std::unique_ptr extract_nanosecond_fraction(column_view const& column, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::extract_nanosecond_fraction(column, cudf::get_default_stream(), mr); } std::unique_ptr last_day_of_month(column_view const& column, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::last_day_of_month(column, cudf::get_default_stream(), mr); } -std::unique_ptr day_of_year(column_view const& column, rmm::mr::device_memory_resource* mr) +std::unique_ptr day_of_year(column_view const& column, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::day_of_year(column, cudf::get_default_stream(), mr); @@ -686,7 +684,7 @@ std::unique_ptr day_of_year(column_view const& column, rmm::mr::device_m std::unique_ptr add_calendrical_months(cudf::column_view const& timestamp_column, cudf::column_view const& months_column, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::add_calendrical_months( @@ -695,27 +693,26 @@ std::unique_ptr add_calendrical_months(cudf::column_view const& ti std::unique_ptr add_calendrical_months(cudf::column_view const& timestamp_column, cudf::scalar const& months, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::add_calendrical_months(timestamp_column, months, cudf::get_default_stream(), mr); } -std::unique_ptr is_leap_year(column_view const& column, rmm::mr::device_memory_resource* mr) +std::unique_ptr is_leap_year(column_view const& column, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::is_leap_year(column, cudf::get_default_stream(), mr); } -std::unique_ptr days_in_month(column_view const& column, - rmm::mr::device_memory_resource* mr) +std::unique_ptr days_in_month(column_view const& column, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::days_in_month(column, cudf::get_default_stream(), mr); } std::unique_ptr extract_quarter(column_view const& column, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::extract_quarter(column, cudf::get_default_stream(), mr); diff --git a/cpp/src/datetime/timezone.cpp b/cpp/src/datetime/timezone.cpp index a75eea7172f..a3471485293 100644 --- a/cpp/src/datetime/timezone.cpp +++ b/cpp/src/datetime/timezone.cpp @@ -19,6 +19,8 @@ #include #include +#include + #include #include #include @@ -379,7 +381,7 @@ static int64_t get_transition_time(dst_transition_s const& trans, int year) std::unique_ptr
make_timezone_transition_table(std::optional tzif_dir, std::string_view timezone_name, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::make_timezone_transition_table( @@ -391,7 +393,7 @@ namespace detail { std::unique_ptr
make_timezone_transition_table(std::optional tzif_dir, std::string_view timezone_name, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (timezone_name == "UTC" || timezone_name.empty()) { // Return an empty table for UTC diff --git a/cpp/src/dictionary/add_keys.cu b/cpp/src/dictionary/add_keys.cu index 3973100aced..0ed9006f88b 100644 --- a/cpp/src/dictionary/add_keys.cu +++ b/cpp/src/dictionary/add_keys.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,8 +29,11 @@ #include #include #include +#include +#include #include +#include namespace cudf { namespace dictionary { @@ -49,11 +52,12 @@ namespace detail { std::unique_ptr add_keys(dictionary_column_view const& dictionary_column, column_view const& new_keys, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(!new_keys.has_nulls(), "Keys must not have nulls"); auto old_keys = dictionary_column.keys(); // [a,b,c,d,f] - CUDF_EXPECTS(new_keys.type() == old_keys.type(), "Keys must be the same type"); + CUDF_EXPECTS( + cudf::have_same_types(new_keys, old_keys), "Keys must be the same type", cudf::data_type_error); // first, concatenate the keys together // [a,b,c,d,f] + [d,b,e] = [a,b,c,d,f,d,b,e] auto combined_keys = cudf::detail::concatenate( @@ -131,7 +135,7 @@ std::unique_ptr add_keys(dictionary_column_view const& dictionary_column std::unique_ptr add_keys(dictionary_column_view const& dictionary_column, column_view const& keys, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::add_keys(dictionary_column, keys, stream, mr); diff --git a/cpp/src/dictionary/decode.cu b/cpp/src/dictionary/decode.cu index 8ce741c4a91..9f05593fc40 100644 --- a/cpp/src/dictionary/decode.cu +++ b/cpp/src/dictionary/decode.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ #include #include +#include namespace cudf { namespace dictionary { @@ -46,7 +47,7 @@ struct indices_handler_fn { */ std::unique_ptr decode(dictionary_column_view const& source, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (source.is_empty()) return make_empty_column(type_id::EMPTY); @@ -77,7 +78,7 @@ std::unique_ptr decode(dictionary_column_view const& source, std::unique_ptr decode(dictionary_column_view const& source, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::decode(source, stream, mr); diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu index 17295fb0345..fdc3d9d0ecf 100644 --- a/cpp/src/dictionary/detail/concatenate.cu +++ b/cpp/src/dictionary/detail/concatenate.cu @@ -26,11 +26,14 @@ #include #include #include +#include +#include #include #include #include #include +#include #include #include @@ -81,13 +84,13 @@ struct compute_children_offsets_fn { } /** - * @brief Return the first keys().type of the dictionary columns. + * @brief Return the first keys() of the dictionary columns. */ - data_type get_keys_type() + column_view get_keys() { auto const view(*std::find_if( columns_ptrs.begin(), columns_ptrs.end(), [](auto pcv) { return pcv->size() > 0; })); - return dictionary_column_view(*view).keys().type(); + return dictionary_column_view(*view).keys(); } /** @@ -140,7 +143,7 @@ struct dispatch_compute_indices { offsets_pair const* d_offsets, size_type const* d_map_to_keys, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto keys_view = column_device_view::create(all_keys, stream); auto indices_view = column_device_view::create(all_indices, stream); @@ -206,21 +209,23 @@ struct dispatch_compute_indices { std::unique_ptr concatenate(host_span columns, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // exception here is the same behavior as in cudf::concatenate CUDF_EXPECTS(not columns.empty(), "Unexpected empty list of columns to concatenate."); // concatenate the keys (and check the keys match) compute_children_offsets_fn child_offsets_fn{columns}; - auto keys_type = child_offsets_fn.get_keys_type(); + auto expected_keys = child_offsets_fn.get_keys(); std::vector keys_views(columns.size()); - std::transform(columns.begin(), columns.end(), keys_views.begin(), [keys_type](auto cv) { + std::transform(columns.begin(), columns.end(), keys_views.begin(), [expected_keys](auto cv) { auto dict_view = dictionary_column_view(cv); // empty column may not have keys so we create an empty column_view place-holder - if (dict_view.is_empty()) return column_view{keys_type, 0, nullptr, nullptr, 0}; + if (dict_view.is_empty()) return column_view{expected_keys.type(), 0, nullptr, nullptr, 0}; auto keys = dict_view.keys(); - CUDF_EXPECTS(keys.type() == keys_type, "key types of all dictionary columns must match"); + CUDF_EXPECTS(cudf::have_same_types(keys, expected_keys), + "key types of all dictionary columns must match", + cudf::data_type_error); return keys; }); auto all_keys = @@ -274,7 +279,7 @@ std::unique_ptr concatenate(host_span columns, // now recompute the indices values for the new keys_column; // the keys offsets (pair.first) are for mapping to the input keys - auto indices_column = type_dispatcher(keys_type, + auto indices_column = type_dispatcher(expected_keys.type(), dispatch_compute_indices{}, all_keys->view(), // old keys all_indices->view(), // old indices diff --git a/cpp/src/dictionary/detail/merge.cu b/cpp/src/dictionary/detail/merge.cu index 2fe21680873..c65aa5d1101 100644 --- a/cpp/src/dictionary/detail/merge.cu +++ b/cpp/src/dictionary/detail/merge.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ #include #include +#include #include @@ -36,7 +37,7 @@ std::unique_ptr merge(dictionary_column_view const& lcol, dictionary_column_view const& rcol, cudf::detail::index_vector const& row_order, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto const lcol_iter = cudf::detail::indexalator_factory::make_input_iterator(lcol.indices()); auto const rcol_iter = cudf::detail::indexalator_factory::make_input_iterator(rcol.indices()); diff --git a/cpp/src/dictionary/dictionary_factories.cu b/cpp/src/dictionary/dictionary_factories.cu index f70423a13a9..37f8fa7a05b 100644 --- a/cpp/src/dictionary/dictionary_factories.cu +++ b/cpp/src/dictionary/dictionary_factories.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include +#include namespace cudf { namespace { @@ -30,7 +31,7 @@ struct dispatch_create_indices { template ()>* = nullptr> std::unique_ptr operator()(column_view const& indices, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(std::is_unsigned(), "indices must be an unsigned type"); column_view indices_view{ @@ -40,7 +41,7 @@ struct dispatch_create_indices { template ()>* = nullptr> std::unique_ptr operator()(column_view const&, rmm::cuda_stream_view, - rmm::mr::device_memory_resource*) + rmm::device_async_resource_ref) { CUDF_FAIL("indices must be an integer type."); } @@ -50,7 +51,7 @@ struct dispatch_create_indices { std::unique_ptr make_dictionary_column(column_view const& keys_column, column_view const& indices_column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(!keys_column.has_nulls(), "keys column must not have nulls"); if (keys_column.is_empty()) return make_empty_column(type_id::DICTIONARY32); @@ -117,7 +118,7 @@ struct make_unsigned_fn { std::unique_ptr make_dictionary_column(std::unique_ptr keys, std::unique_ptr indices, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(!keys->has_nulls(), "keys column must not have nulls"); diff --git a/cpp/src/dictionary/encode.cu b/cpp/src/dictionary/encode.cu index c92b57f0cac..ff29d83b80a 100644 --- a/cpp/src/dictionary/encode.cu +++ b/cpp/src/dictionary/encode.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ #include #include +#include namespace cudf { namespace dictionary { @@ -41,7 +42,7 @@ namespace detail { std::unique_ptr encode(column_view const& input_column, data_type indices_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(is_unsigned(indices_type), "indices must be type unsigned integer"); CUDF_EXPECTS(input_column.type().id() != type_id::DICTIONARY32, @@ -90,7 +91,7 @@ data_type get_indices_type_for_size(size_type keys_size) std::unique_ptr encode(column_view const& input_column, data_type indices_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::encode(input_column, indices_type, stream, mr); diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu index 86b70f1119b..35387efa56b 100644 --- a/cpp/src/dictionary/remove_keys.cu +++ b/cpp/src/dictionary/remove_keys.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,9 +26,12 @@ #include #include #include +#include +#include #include #include +#include #include #include @@ -59,7 +62,7 @@ template std::unique_ptr remove_keys_fn(dictionary_column_view const& dictionary_column, KeysKeeper keys_to_keep_fn, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto const keys_view = dictionary_column.keys(); auto const indices_type = dictionary_column.indices().type(); @@ -150,11 +153,13 @@ std::unique_ptr remove_keys_fn(dictionary_column_view const& dictionary_ std::unique_ptr remove_keys(dictionary_column_view const& dictionary_column, column_view const& keys_to_remove, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(!keys_to_remove.has_nulls(), "keys_to_remove must not have nulls"); auto const keys_view = dictionary_column.keys(); - CUDF_EXPECTS(keys_view.type() == keys_to_remove.type(), "keys types must match"); + CUDF_EXPECTS(cudf::have_same_types(keys_view, keys_to_remove), + "keys types must match", + cudf::data_type_error); // locate keys to remove by searching the keys column auto const matches = cudf::detail::contains(keys_to_remove, keys_view, stream, mr); @@ -166,7 +171,7 @@ std::unique_ptr remove_keys(dictionary_column_view const& dictionary_col std::unique_ptr remove_unused_keys(dictionary_column_view const& dictionary_column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // locate the keys to remove auto const keys_size = dictionary_column.keys_size(); @@ -196,7 +201,7 @@ std::unique_ptr remove_unused_keys(dictionary_column_view const& diction std::unique_ptr remove_keys(dictionary_column_view const& dictionary_column, column_view const& keys_to_remove, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::remove_keys(dictionary_column, keys_to_remove, stream, mr); @@ -204,7 +209,7 @@ std::unique_ptr remove_keys(dictionary_column_view const& dictionary_col std::unique_ptr remove_unused_keys(dictionary_column_view const& dictionary_column, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::remove_unused_keys(dictionary_column, stream, mr); diff --git a/cpp/src/dictionary/replace.cu b/cpp/src/dictionary/replace.cu index 7069993866c..bc17dfd4bab 100644 --- a/cpp/src/dictionary/replace.cu +++ b/cpp/src/dictionary/replace.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,8 +24,11 @@ #include #include #include +#include +#include #include +#include namespace cudf { namespace dictionary { @@ -52,7 +55,7 @@ template std::unique_ptr replace_indices(column_view const& input, ReplacementIter replacement_iter, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto const input_view = column_device_view::create(input, stream); auto const d_input = *input_view; @@ -74,16 +77,18 @@ std::unique_ptr replace_indices(column_view const& input, /** * @copydoc cudf::dictionary::detail::replace_nulls(cudf::column_view const&,cudf::column_view - * const& rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + * const& rmm::cuda_stream_view, rmm::device_async_resource_ref) */ std::unique_ptr replace_nulls(dictionary_column_view const& input, dictionary_column_view const& replacement, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (input.is_empty()) { return cudf::empty_like(input.parent()); } if (!input.has_nulls()) { return std::make_unique(input.parent(), stream, mr); } - CUDF_EXPECTS(input.keys().type() == replacement.keys().type(), "keys must match"); + CUDF_EXPECTS(cudf::have_same_types(input.keys(), replacement.keys()), + "keys must match", + cudf::data_type_error); CUDF_EXPECTS(replacement.size() == input.size(), "column sizes must match"); // first combine the keys so both input dictionaries have the same set @@ -107,18 +112,20 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, /** * @copydoc cudf::dictionary::detail::replace_nulls(cudf::column_view const&,cudf::scalar - * const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + * const&, rmm::cuda_stream_view, rmm::device_async_resource_ref) */ std::unique_ptr replace_nulls(dictionary_column_view const& input, scalar const& replacement, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (input.is_empty()) { return cudf::empty_like(input.parent()); } if (!input.has_nulls() || !replacement.is_valid(stream)) { return std::make_unique(input.parent(), stream, mr); } - CUDF_EXPECTS(input.keys().type() == replacement.type(), "keys must match scalar type"); + CUDF_EXPECTS(cudf::have_same_types(input.parent(), replacement), + "keys must match scalar type", + cudf::data_type_error); // first add the replacement to the keys so only the indices need to be processed auto input_matched = dictionary::detail::add_keys( diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu index e35aded1984..231619836f9 100644 --- a/cpp/src/dictionary/search.cu +++ b/cpp/src/dictionary/search.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,11 +19,14 @@ #include #include #include +#include #include +#include #include #include #include +#include #include #include @@ -40,7 +43,7 @@ struct dispatch_scalar_index { std::unique_ptr operator()(size_type index, bool is_valid, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return std::make_unique>(index, is_valid, stream, mr); } @@ -69,12 +72,14 @@ struct find_index_fn { std::unique_ptr operator()(dictionary_column_view const& input, scalar const& key, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { - if (!key.is_valid(stream)) + if (!key.is_valid(stream)) { return type_dispatcher(input.indices().type(), dispatch_scalar_index{}, 0, false, stream, mr); - CUDF_EXPECTS(input.keys().type() == key.type(), - "search key type must match dictionary keys type"); + } + CUDF_EXPECTS(cudf::have_same_types(input.parent(), key), + "search key type must match dictionary keys type", + cudf::data_type_error); using ScalarType = cudf::scalar_type_t; auto find_key = static_cast(key).value(stream); @@ -96,7 +101,7 @@ struct find_index_fn { std::unique_ptr operator()(dictionary_column_view const&, scalar const&, rmm::cuda_stream_view, - rmm::mr::device_memory_resource*) const + rmm::device_async_resource_ref) const { CUDF_FAIL( "dictionary, list_view, and struct_view columns cannot be the keys column of a dictionary"); @@ -111,12 +116,14 @@ struct find_insert_index_fn { std::unique_ptr operator()(dictionary_column_view const& input, scalar const& key, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { - if (!key.is_valid(stream)) + if (!key.is_valid(stream)) { return type_dispatcher(input.indices().type(), dispatch_scalar_index{}, 0, false, stream, mr); - CUDF_EXPECTS(input.keys().type() == key.type(), - "search key type must match dictionary keys type"); + } + CUDF_EXPECTS(cudf::have_same_types(input.parent(), key), + "search key type must match dictionary keys type", + cudf::data_type_error); using ScalarType = cudf::scalar_type_t; auto find_key = static_cast(key).value(stream); @@ -138,7 +145,7 @@ struct find_insert_index_fn { std::unique_ptr operator()(dictionary_column_view const&, scalar const&, rmm::cuda_stream_view, - rmm::mr::device_memory_resource*) const + rmm::device_async_resource_ref) const { CUDF_FAIL("dictionary, list_view, and struct_view columns cannot be the keys for a dictionary"); } @@ -149,7 +156,7 @@ struct find_insert_index_fn { std::unique_ptr get_index(dictionary_column_view const& dictionary, scalar const& key, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (dictionary.is_empty()) return std::make_unique>(0, false, stream, mr); @@ -160,7 +167,7 @@ std::unique_ptr get_index(dictionary_column_view const& dictionary, std::unique_ptr get_insert_index(dictionary_column_view const& dictionary, scalar const& key, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (dictionary.is_empty()) return std::make_unique>(0, false, stream, mr); @@ -175,7 +182,7 @@ std::unique_ptr get_insert_index(dictionary_column_view const& dictionar std::unique_ptr get_index(dictionary_column_view const& dictionary, scalar const& key, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::get_index(dictionary, key, stream, mr); diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index b49cf7850b1..08a33d40abe 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,9 +29,12 @@ #include #include #include +#include +#include #include #include +#include #include #include @@ -61,7 +64,7 @@ struct dispatch_compute_indices { operator()(dictionary_column_view const& input, column_view const& new_keys, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto dictionary_view = column_device_view::create(input.parent(), stream); auto dictionary_itr = make_dictionary_iterator(*dictionary_view); @@ -115,15 +118,15 @@ struct dispatch_compute_indices { } // namespace -// std::unique_ptr set_keys(dictionary_column_view const& dictionary_column, column_view const& new_keys, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(!new_keys.has_nulls(), "keys parameter must not have nulls"); auto keys = dictionary_column.keys(); - CUDF_EXPECTS(keys.type() == new_keys.type(), "keys types must match"); + CUDF_EXPECTS( + cudf::have_same_types(keys, new_keys), "keys types must match", cudf::data_type_error); // copy the keys -- use cudf::distinct to make sure there are no duplicates, // then sort the results. @@ -177,7 +180,7 @@ std::unique_ptr set_keys(dictionary_column_view const& dictionary_column std::vector> match_dictionaries( cudf::host_span input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { std::vector keys(input.size()); std::transform(input.begin(), input.end(), keys.begin(), [](auto& col) { return col.keys(); }); @@ -191,7 +194,7 @@ std::vector> match_dictionaries( } std::pair>, std::vector> match_dictionaries( - std::vector tables, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) + std::vector tables, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { // Make a copy of all the column views from each table_view std::vector> updated_columns; @@ -242,7 +245,7 @@ std::pair>, std::vector> match_d std::unique_ptr set_keys(dictionary_column_view const& dictionary_column, column_view const& keys, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::set_keys(dictionary_column, keys, stream, mr); @@ -251,7 +254,7 @@ std::unique_ptr set_keys(dictionary_column_view const& dictionary_column std::vector> match_dictionaries( cudf::host_span input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::match_dictionaries(input, stream, mr); diff --git a/cpp/src/filling/calendrical_month_sequence.cu b/cpp/src/filling/calendrical_month_sequence.cu index 80badb7d566..3e6d693dde5 100644 --- a/cpp/src/filling/calendrical_month_sequence.cu +++ b/cpp/src/filling/calendrical_month_sequence.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include +#include namespace cudf { namespace detail { @@ -30,7 +31,7 @@ std::unique_ptr calendrical_month_sequence(size_type size, scalar const& init, size_type months, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return type_dispatcher( init.type(), calendrical_month_sequence_functor{}, size, init, months, stream, mr); @@ -41,7 +42,7 @@ std::unique_ptr calendrical_month_sequence(size_type size, scalar const& init, size_type months, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::calendrical_month_sequence(size, init, months, stream, mr); diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu index 42d1f7592ec..1fc9ed31c09 100644 --- a/cpp/src/filling/fill.cu +++ b/cpp/src/filling/fill.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,9 +33,11 @@ #include #include #include +#include #include #include +#include #include @@ -107,9 +109,9 @@ struct out_of_place_fill_range_dispatch { std::unique_ptr operator()(cudf::size_type begin, cudf::size_type end, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { - CUDF_EXPECTS(input.type() == value.type(), "Data type mismatch."); + CUDF_EXPECTS(cudf::have_same_types(input, value), "Data type mismatch.", cudf::data_type_error); auto p_ret = std::make_unique(input, stream, mr); if (end != begin) { // otherwise no fill @@ -134,9 +136,9 @@ std::unique_ptr out_of_place_fill_range_dispatch::operator(); auto p_scalar = static_cast(&value); return cudf::strings::detail::fill( @@ -148,11 +150,12 @@ std::unique_ptr out_of_place_fill_range_dispatch::operator()(input, stream, mr); cudf::dictionary_column_view const target(input); - CUDF_EXPECTS(target.keys().type() == value.type(), "Data type mismatch."); + CUDF_EXPECTS( + cudf::have_same_types(target.parent(), value), "Data type mismatch.", cudf::data_type_error); // if the scalar is invalid, then just copy the column and fill the null mask if (!value.is_valid(stream)) { @@ -218,7 +221,8 @@ void fill_in_place(mutable_column_view& destination, "Range is out of bounds."); CUDF_EXPECTS(destination.nullable() || value.is_valid(stream), "destination should be nullable or value should be non-null."); - CUDF_EXPECTS(destination.type() == value.type(), "Data type mismatch."); + CUDF_EXPECTS( + cudf::have_same_types(destination, value), "Data type mismatch.", cudf::data_type_error); if (end != begin) { // otherwise no-op cudf::type_dispatcher( @@ -233,7 +237,7 @@ std::unique_ptr fill(column_view const& input, size_type end, scalar const& value, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS((begin >= 0) && (end <= input.size()) && (begin <= end), "Range is out of bounds."); @@ -258,7 +262,7 @@ std::unique_ptr fill(column_view const& input, size_type end, scalar const& value, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::fill(input, begin, end, value, stream, mr); diff --git a/cpp/src/filling/repeat.cu b/cpp/src/filling/repeat.cu index 87cc0f21d0e..ff4005d9366 100644 --- a/cpp/src/filling/repeat.cu +++ b/cpp/src/filling/repeat.cu @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -102,7 +103,7 @@ namespace detail { std::unique_ptr
repeat(table_view const& input_table, column_view const& count, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(input_table.num_rows() == count.size(), "in and count must have equal size"); CUDF_EXPECTS(not count.has_nulls(), "count cannot contain nulls"); @@ -131,7 +132,7 @@ std::unique_ptr
repeat(table_view const& input_table, std::unique_ptr
repeat(table_view const& input_table, size_type count, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if ((input_table.num_rows() == 0) || (count == 0)) { return cudf::empty_like(input_table); } @@ -154,7 +155,7 @@ std::unique_ptr
repeat(table_view const& input_table, std::unique_ptr
repeat(table_view const& input_table, column_view const& count, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::repeat(input_table, count, stream, mr); @@ -163,7 +164,7 @@ std::unique_ptr
repeat(table_view const& input_table, std::unique_ptr
repeat(table_view const& input_table, size_type count, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::repeat(input_table, count, stream, mr); diff --git a/cpp/src/filling/sequence.cu b/cpp/src/filling/sequence.cu index 99a17f8b0e0..ee1745b8498 100644 --- a/cpp/src/filling/sequence.cu +++ b/cpp/src/filling/sequence.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,9 +24,11 @@ #include #include #include +#include #include #include +#include #include #include @@ -66,7 +68,7 @@ struct sequence_functor { scalar const& init, scalar const& step, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto result = make_fixed_width_column(init.type(), size, mask_state::UNALLOCATED, stream, mr); auto result_device_view = mutable_column_device_view::create(*result, stream); @@ -92,7 +94,7 @@ struct sequence_functor { std::unique_ptr operator()(size_type size, scalar const& init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto result = make_fixed_width_column(init.type(), size, mask_state::UNALLOCATED, stream, mr); auto result_device_view = mutable_column_device_view::create(*result, stream); @@ -125,9 +127,11 @@ std::unique_ptr sequence(size_type size, scalar const& init, scalar const& step, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { - CUDF_EXPECTS(init.type() == step.type(), "init and step must be of the same type."); + CUDF_EXPECTS(cudf::have_same_types(init, step), + "init and step must be of the same type.", + cudf::data_type_error); CUDF_EXPECTS(size >= 0, "size must be >= 0"); CUDF_EXPECTS(is_numeric(init.type()), "Input scalar types must be numeric"); @@ -137,7 +141,7 @@ std::unique_ptr sequence(size_type size, std::unique_ptr sequence(size_type size, scalar const& init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(size >= 0, "size must be >= 0"); CUDF_EXPECTS(is_numeric(init.type()), "init scalar type must be numeric"); @@ -151,7 +155,7 @@ std::unique_ptr sequence(size_type size, scalar const& init, scalar const& step, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::sequence(size, init, step, stream, mr); @@ -160,7 +164,7 @@ std::unique_ptr sequence(size_type size, std::unique_ptr sequence(size_type size, scalar const& init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::sequence(size, init, stream, mr); diff --git a/cpp/src/groupby/common/utils.hpp b/cpp/src/groupby/common/utils.hpp index 09b85c74f08..82c3c08b501 100644 --- a/cpp/src/groupby/common/utils.hpp +++ b/cpp/src/groupby/common/utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,8 @@ #include #include +#include + #include #include @@ -31,7 +33,7 @@ template inline std::vector extract_results(host_span requests, cudf::detail::result_cache& cache, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { std::vector results(requests.size()); std::unordered_map>, diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index e3c021eb66a..e43dfcb4d98 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,9 +36,11 @@ #include #include #include +#include #include #include +#include #include @@ -65,7 +67,7 @@ groupby::groupby(table_view const& keys, std::pair, std::vector> groupby::dispatch_aggregation( host_span requests, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // If sort groupby has been called once on this groupby object, then // always use sort groupby from now on. Because once keys are sorted, @@ -193,7 +195,7 @@ void verify_valid_requests(host_span requests) // Compute aggregation requests std::pair, std::vector> groupby::aggregate( - host_span requests, rmm::mr::device_memory_resource* mr) + host_span requests, rmm::device_async_resource_ref mr) { return aggregate(requests, cudf::get_default_stream(), mr); } @@ -202,7 +204,7 @@ std::pair, std::vector> groupby::aggr std::pair, std::vector> groupby::aggregate( host_span requests, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS( @@ -220,7 +222,7 @@ std::pair, std::vector> groupby::aggr // Compute scan requests std::pair, std::vector> groupby::scan( - host_span requests, rmm::mr::device_memory_resource* mr) + host_span requests, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS( @@ -236,7 +238,7 @@ std::pair, std::vector> groupby::scan return sort_scan(requests, cudf::get_default_stream(), mr); } -groupby::groups groupby::get_groups(table_view values, rmm::mr::device_memory_resource* mr) +groupby::groups groupby::get_groups(table_view values, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); auto const stream = cudf::get_default_stream(); @@ -262,7 +264,7 @@ groupby::groups groupby::get_groups(table_view values, rmm::mr::device_memory_re std::pair, std::unique_ptr
> groupby::replace_nulls( table_view const& values, host_span replace_policies, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS(_keys.num_rows() == values.num_rows(), @@ -306,17 +308,20 @@ std::pair, std::unique_ptr
> groupby::shift( table_view const& values, host_span offsets, std::vector> const& fill_values, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS(values.num_columns() == static_cast(fill_values.size()), "Mismatch number of fill_values and columns."); - CUDF_EXPECTS( - std::all_of(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(values.num_columns()), - [&](auto i) { return values.column(i).type() == fill_values[i].get().type(); }), - "values and fill_value should have the same type."); - + CUDF_EXPECTS(std::equal(values.begin(), + values.end(), + fill_values.cbegin(), + fill_values.cend(), + [](auto const& col, auto const& scalar) { + return cudf::have_same_types(col, scalar.get()); + }), + "values and fill_value should have the same type.", + cudf::data_type_error); auto stream = cudf::get_default_stream(); std::vector> results; auto const& group_offsets = helper().group_offsets(stream); diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index acc1b087510..4f75ab19c66 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -44,6 +44,7 @@ #include #include +#include #include #include @@ -190,7 +191,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final SetType set; bitmask_type const* __restrict__ row_bitmask; rmm::cuda_stream_view stream; - rmm::mr::device_memory_resource* mr; + rmm::device_async_resource_ref mr; public: using cudf::detail::aggregation_finalizer::visit; @@ -202,7 +203,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final SetType set, bitmask_type const* row_bitmask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) : col(col), sparse_results(sparse_results), dense_results(dense_results), @@ -398,7 +399,7 @@ void sparse_to_dense_results(table_view const& keys, bool keys_have_nulls, null_policy include_null_keys, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto row_bitmask = cudf::detail::bitmask_and(keys, stream, rmm::mr::get_current_device_resource()).first; @@ -551,7 +552,7 @@ std::unique_ptr
groupby(table_view const& keys, bool const keys_have_nulls, null_policy const include_null_keys, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto const num_keys = keys.num_rows(); auto const null_keys_are_equal = null_equality::EQUAL; @@ -654,7 +655,7 @@ std::pair, std::vector> groupby( host_span requests, null_policy include_null_keys, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { cudf::detail::result_cache cache(requests.size()); diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 2d6f99de25a..ba59616babe 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -37,6 +37,7 @@ #include #include +#include #include #include @@ -797,7 +798,7 @@ void aggregate_result_functor::operator()(aggregatio std::pair, std::vector> groupby::sort_aggregate( host_span requests, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // We're going to start by creating a cache of results so that aggs that // depend on other aggs will not have to be recalculated. e.g. mean depends on diff --git a/cpp/src/groupby/sort/functors.hpp b/cpp/src/groupby/sort/functors.hpp index be36956b929..057085fe85d 100644 --- a/cpp/src/groupby/sort/functors.hpp +++ b/cpp/src/groupby/sort/functors.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include @@ -42,7 +43,7 @@ struct store_result_functor { sort::sort_groupby_helper& helper, cudf::detail::result_cache& cache, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr, + rmm::device_async_resource_ref mr, sorted keys_are_sorted = sorted::NO) : helper(helper), cache(cache), @@ -98,8 +99,8 @@ struct store_result_functor { cudf::detail::result_cache& cache; ///< cache of results to store into column_view const& values; ///< Column of values to group and aggregate - rmm::cuda_stream_view stream; ///< CUDA stream on which to execute kernels - rmm::mr::device_memory_resource* mr; ///< Memory resource to allocate space for results + rmm::cuda_stream_view stream; ///< CUDA stream on which to execute kernels + rmm::device_async_resource_ref mr; ///< Memory resource to allocate space for results sorted keys_are_sorted; ///< Whether the keys are sorted std::unique_ptr sorted_values; ///< Memoised grouped and sorted values diff --git a/cpp/src/groupby/sort/group_argmax.cu b/cpp/src/groupby/sort/group_argmax.cu index a9c098bcf61..a1d197b1307 100644 --- a/cpp/src/groupby/sort/group_argmax.cu +++ b/cpp/src/groupby/sort/group_argmax.cu @@ -20,6 +20,7 @@ #include #include +#include #include @@ -31,7 +32,7 @@ std::unique_ptr group_argmax(column_view const& values, cudf::device_span group_labels, column_view const& key_sort_order, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto indices = type_dispatcher(values.type(), group_reduction_dispatcher{}, diff --git a/cpp/src/groupby/sort/group_argmin.cu b/cpp/src/groupby/sort/group_argmin.cu index 53a514ac8a7..03243bef836 100644 --- a/cpp/src/groupby/sort/group_argmin.cu +++ b/cpp/src/groupby/sort/group_argmin.cu @@ -20,6 +20,7 @@ #include #include +#include #include @@ -31,7 +32,7 @@ std::unique_ptr group_argmin(column_view const& values, cudf::device_span group_labels, column_view const& key_sort_order, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto indices = type_dispatcher(values.type(), group_reduction_dispatcher{}, diff --git a/cpp/src/groupby/sort/group_collect.cu b/cpp/src/groupby/sort/group_collect.cu index f95ad72f453..555c5d3ad41 100644 --- a/cpp/src/groupby/sort/group_collect.cu +++ b/cpp/src/groupby/sort/group_collect.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -50,7 +51,7 @@ std::pair, std::unique_ptr> purge_null_entries( column_view const& offsets, size_type num_groups, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto values_device_view = column_device_view::create(values, stream); @@ -91,7 +92,7 @@ std::unique_ptr group_collect(column_view const& values, size_type num_groups, null_policy null_handling, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto [child_column, offsets_column] = [null_handling, num_groups, &values, &group_offsets, stream, mr] { diff --git a/cpp/src/groupby/sort/group_correlation.cu b/cpp/src/groupby/sort/group_correlation.cu index 4389b833c33..152aa98a8b9 100644 --- a/cpp/src/groupby/sort/group_correlation.cu +++ b/cpp/src/groupby/sort/group_correlation.cu @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -120,7 +121,7 @@ std::unique_ptr group_covariance(column_view const& values_0, size_type min_periods, size_type ddof, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { using result_type = id_to_type; static_assert( @@ -181,7 +182,7 @@ std::unique_ptr group_correlation(column_view const& covariance, column_view const& stddev_0, column_view const& stddev_1, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { using result_type = id_to_type; CUDF_EXPECTS(covariance.type().id() == type_id::FLOAT64, "Covariance result must be FLOAT64"); diff --git a/cpp/src/groupby/sort/group_count.cu b/cpp/src/groupby/sort/group_count.cu index 2f289c8c8a7..56a4943e272 100644 --- a/cpp/src/groupby/sort/group_count.cu +++ b/cpp/src/groupby/sort/group_count.cu @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -37,7 +38,7 @@ std::unique_ptr group_count_valid(column_view const& values, cudf::device_span group_labels, size_type num_groups, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(num_groups >= 0, "number of groups cannot be negative"); CUDF_EXPECTS(static_cast(values.size()) == group_labels.size(), @@ -80,7 +81,7 @@ std::unique_ptr group_count_valid(column_view const& values, std::unique_ptr group_count_all(cudf::device_span group_offsets, size_type num_groups, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(num_groups >= 0, "number of groups cannot be negative"); diff --git a/cpp/src/groupby/sort/group_count_scan.cu b/cpp/src/groupby/sort/group_count_scan.cu index 2e8fd41d984..c076f21e1f8 100644 --- a/cpp/src/groupby/sort/group_count_scan.cu +++ b/cpp/src/groupby/sort/group_count_scan.cu @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -30,7 +31,7 @@ namespace groupby { namespace detail { std::unique_ptr count_scan(cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { std::unique_ptr result = make_fixed_width_column( data_type{type_id::INT32}, group_labels.size(), mask_state::UNALLOCATED, stream, mr); diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu index 67c30adcd47..1000ec0d470 100644 --- a/cpp/src/groupby/sort/group_histogram.cu +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -26,6 +26,7 @@ #include #include +#include #include @@ -38,7 +39,7 @@ std::unique_ptr build_histogram(column_view const& values, std::optional const& partial_counts, size_type num_groups, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(static_cast(values.size()) == group_labels.size(), "Size of values column should be the same as that of group labels.", @@ -89,7 +90,7 @@ std::unique_ptr group_histogram(column_view const& values, cudf::device_span group_labels, size_type num_groups, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // Empty group should be handled before reaching here. CUDF_EXPECTS(num_groups > 0, "Group should not be empty.", std::invalid_argument); @@ -101,7 +102,7 @@ std::unique_ptr group_merge_histogram(column_view const& values, cudf::device_span group_offsets, size_type num_groups, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // Empty group should be handled before reaching here. CUDF_EXPECTS(num_groups > 0, "Group should not be empty.", std::invalid_argument); diff --git a/cpp/src/groupby/sort/group_m2.cu b/cpp/src/groupby/sort/group_m2.cu index 70b05100fb0..77f33486284 100644 --- a/cpp/src/groupby/sort/group_m2.cu +++ b/cpp/src/groupby/sort/group_m2.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -88,7 +89,7 @@ struct m2_functor { column_view const& group_means, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { using result_type = cudf::detail::target_type_t; auto result = make_numeric_column(data_type(type_to_id()), @@ -133,7 +134,7 @@ std::unique_ptr group_m2(column_view const& values, column_view const& group_means, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto values_type = cudf::is_dictionary(values.type()) ? dictionary_column_view(values).keys().type() diff --git a/cpp/src/groupby/sort/group_max.cu b/cpp/src/groupby/sort/group_max.cu index 148188f5fdf..60b071c25ff 100644 --- a/cpp/src/groupby/sort/group_max.cu +++ b/cpp/src/groupby/sort/group_max.cu @@ -17,6 +17,7 @@ #include "groupby/sort/group_single_pass_reduction_util.cuh" #include +#include namespace cudf { namespace groupby { @@ -25,7 +26,7 @@ std::unique_ptr group_max(column_view const& values, size_type num_groups, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto values_type = cudf::is_dictionary(values.type()) ? dictionary_column_view(values).keys().type() diff --git a/cpp/src/groupby/sort/group_max_scan.cu b/cpp/src/groupby/sort/group_max_scan.cu index 8679ab09df6..270059cfcad 100644 --- a/cpp/src/groupby/sort/group_max_scan.cu +++ b/cpp/src/groupby/sort/group_max_scan.cu @@ -17,6 +17,7 @@ #include "groupby/sort/group_scan_util.cuh" #include +#include namespace cudf { namespace groupby { @@ -25,7 +26,7 @@ std::unique_ptr max_scan(column_view const& values, size_type num_groups, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return type_dispatcher(values.type(), group_scan_dispatcher{}, diff --git a/cpp/src/groupby/sort/group_merge_lists.cu b/cpp/src/groupby/sort/group_merge_lists.cu index 2c72128dbfb..92cce1aa00e 100644 --- a/cpp/src/groupby/sort/group_merge_lists.cu +++ b/cpp/src/groupby/sort/group_merge_lists.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include #include @@ -30,7 +31,7 @@ std::unique_ptr group_merge_lists(column_view const& values, cudf::device_span group_offsets, size_type num_groups, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(values.type().id() == type_id::LIST, "Input to `group_merge_lists` must be a lists column."); diff --git a/cpp/src/groupby/sort/group_merge_m2.cu b/cpp/src/groupby/sort/group_merge_m2.cu index a580c9dac9d..4ad8fa5ff07 100644 --- a/cpp/src/groupby/sort/group_merge_m2.cu +++ b/cpp/src/groupby/sort/group_merge_m2.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -131,7 +132,7 @@ std::unique_ptr group_merge_m2(column_view const& values, cudf::device_span group_offsets, size_type num_groups, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(values.type().id() == type_id::STRUCT, "Input to `group_merge_m2` must be a structs column."); diff --git a/cpp/src/groupby/sort/group_min.cu b/cpp/src/groupby/sort/group_min.cu index 3939fc41b65..22aaf664168 100644 --- a/cpp/src/groupby/sort/group_min.cu +++ b/cpp/src/groupby/sort/group_min.cu @@ -17,6 +17,7 @@ #include "groupby/sort/group_single_pass_reduction_util.cuh" #include +#include namespace cudf { namespace groupby { @@ -25,7 +26,7 @@ std::unique_ptr group_min(column_view const& values, size_type num_groups, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto values_type = cudf::is_dictionary(values.type()) ? dictionary_column_view(values).keys().type() diff --git a/cpp/src/groupby/sort/group_min_scan.cu b/cpp/src/groupby/sort/group_min_scan.cu index 7d2a88fb038..4ddc10a2e5a 100644 --- a/cpp/src/groupby/sort/group_min_scan.cu +++ b/cpp/src/groupby/sort/group_min_scan.cu @@ -17,6 +17,7 @@ #include "groupby/sort/group_scan_util.cuh" #include +#include namespace cudf { namespace groupby { @@ -25,7 +26,7 @@ std::unique_ptr min_scan(column_view const& values, size_type num_groups, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return type_dispatcher(values.type(), group_scan_dispatcher{}, diff --git a/cpp/src/groupby/sort/group_nth_element.cu b/cpp/src/groupby/sort/group_nth_element.cu index 694c052e42d..1bc1eef908c 100644 --- a/cpp/src/groupby/sort/group_nth_element.cu +++ b/cpp/src/groupby/sort/group_nth_element.cu @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -49,7 +50,7 @@ std::unique_ptr group_nth_element(column_view const& values, size_type n, null_policy null_handling, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(static_cast(values.size()) == group_labels.size(), "Size of values column should be same as that of group labels"); diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu index 1a5f1691d5b..de11e70719a 100644 --- a/cpp/src/groupby/sort/group_nunique.cu +++ b/cpp/src/groupby/sort/group_nunique.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -78,7 +79,7 @@ std::unique_ptr group_nunique(column_view const& values, cudf::device_span group_offsets, null_policy null_handling, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(num_groups >= 0, "number of groups cannot be negative"); CUDF_EXPECTS(static_cast(values.size()) == group_labels.size(), diff --git a/cpp/src/groupby/sort/group_product.cu b/cpp/src/groupby/sort/group_product.cu index c53362f2095..83ca1059325 100644 --- a/cpp/src/groupby/sort/group_product.cu +++ b/cpp/src/groupby/sort/group_product.cu @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace groupby { @@ -28,7 +29,7 @@ std::unique_ptr group_product(column_view const& values, size_type num_groups, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto values_type = cudf::is_dictionary(values.type()) ? dictionary_column_view(values).keys().type() diff --git a/cpp/src/groupby/sort/group_product_scan.cu b/cpp/src/groupby/sort/group_product_scan.cu index e1a615730dd..40c53ceeff1 100644 --- a/cpp/src/groupby/sort/group_product_scan.cu +++ b/cpp/src/groupby/sort/group_product_scan.cu @@ -17,6 +17,7 @@ #include "groupby/sort/group_scan_util.cuh" #include +#include namespace cudf { namespace groupby { @@ -25,7 +26,7 @@ std::unique_ptr product_scan(column_view const& values, size_type num_groups, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return type_dispatcher(values.type(), group_scan_dispatcher{}, diff --git a/cpp/src/groupby/sort/group_quantiles.cu b/cpp/src/groupby/sort/group_quantiles.cu index a6bc2d5b38d..3156dfaadd0 100644 --- a/cpp/src/groupby/sort/group_quantiles.cu +++ b/cpp/src/groupby/sort/group_quantiles.cu @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -90,7 +91,7 @@ struct quantiles_functor { device_span quantile, interpolation interpolation, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { using ResultType = cudf::detail::target_type_t; @@ -161,7 +162,7 @@ std::unique_ptr group_quantiles(column_view const& values, std::vector const& quantiles, interpolation interp, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto dv_quantiles = cudf::detail::make_device_uvector_async( quantiles, stream, rmm::mr::get_current_device_resource()); diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index 5cf7844410e..0b65889f127 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ #include #include +#include #include #include @@ -100,7 +101,7 @@ std::unique_ptr rank_generator(column_view const& grouped_values, scan_operator scan_op, bool has_nulls, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto const grouped_values_view = table_view{{grouped_values}}; auto const comparator = @@ -155,7 +156,7 @@ std::unique_ptr min_rank_scan(column_view const& grouped_values, device_span group_labels, device_span group_offsets, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return rank_generator( grouped_values, @@ -176,7 +177,7 @@ std::unique_ptr max_rank_scan(column_view const& grouped_values, device_span group_labels, device_span group_offsets, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return rank_generator( grouped_values, @@ -197,7 +198,7 @@ std::unique_ptr first_rank_scan(column_view const& grouped_values, device_span group_labels, device_span group_offsets, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto ranks = make_fixed_width_column( data_type{type_to_id()}, group_labels.size(), mask_state::UNALLOCATED, stream, mr); @@ -218,7 +219,7 @@ std::unique_ptr average_rank_scan(column_view const& grouped_values, device_span group_labels, device_span group_offsets, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto max_rank = max_rank_scan(grouped_values, value_order, @@ -251,7 +252,7 @@ std::unique_ptr dense_rank_scan(column_view const& grouped_values, device_span group_labels, device_span group_offsets, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return rank_generator( grouped_values, @@ -272,7 +273,7 @@ std::unique_ptr group_rank_to_percentage(rank_method const method, device_span group_labels, device_span group_offsets, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(percentage != rank_percentage::NONE, "Percentage cannot be NONE"); auto ranks = make_fixed_width_column( diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index 3aa79f226a3..5e76dc3135a 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include @@ -52,7 +53,7 @@ std::unique_ptr group_sum(column_view const& values, size_type num_groups, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate groupwise product @@ -75,7 +76,7 @@ std::unique_ptr group_product(column_view const& values, size_type num_groups, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate groupwise minimum value @@ -98,7 +99,7 @@ std::unique_ptr group_min(column_view const& values, size_type num_groups, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate groupwise maximum value @@ -121,7 +122,7 @@ std::unique_ptr group_max(column_view const& values, size_type num_groups, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate group-wise indices of maximum values. @@ -146,7 +147,7 @@ std::unique_ptr group_argmax(column_view const& values, cudf::device_span group_labels, column_view const& key_sort_order, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate group-wise indices of minimum values. @@ -171,7 +172,7 @@ std::unique_ptr group_argmin(column_view const& values, cudf::device_span group_labels, column_view const& key_sort_order, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate number of non-null values in each group of @@ -195,7 +196,7 @@ std::unique_ptr group_count_valid(column_view const& values, cudf::device_span group_labels, size_type num_groups, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate number of values in each group of @p values @@ -215,7 +216,7 @@ std::unique_ptr group_count_valid(column_view const& values, std::unique_ptr group_count_all(cudf::device_span group_offsets, size_type num_groups, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to compute histogram for each group in @p values. * @@ -242,7 +243,7 @@ std::unique_ptr group_histogram(column_view const& values, cudf::device_span group_labels, size_type num_groups, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate sum of squares of differences from means. @@ -266,7 +267,7 @@ std::unique_ptr group_m2(column_view const& values, column_view const& group_means, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate groupwise variance @@ -296,7 +297,7 @@ std::unique_ptr group_var(column_view const& values, cudf::device_span group_labels, size_type ddof, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate groupwise quantiles @@ -326,7 +327,7 @@ std::unique_ptr group_quantiles(column_view const& values, std::vector const& quantiles, interpolation interp, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate number of unique values in each group of @@ -358,7 +359,7 @@ std::unique_ptr group_nunique(column_view const& values, cudf::device_span group_offsets, null_policy null_handling, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate nth values in each group of @p values @@ -393,7 +394,7 @@ std::unique_ptr group_nth_element(column_view const& values, size_type n, null_policy null_handling, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to collect grouped values into a lists column * @@ -418,7 +419,7 @@ std::unique_ptr group_collect(column_view const& values, size_type num_groups, null_policy null_handling, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to merge grouped lists into one list. @@ -441,7 +442,7 @@ std::unique_ptr group_merge_lists(column_view const& values, cudf::device_span group_offsets, size_type num_groups, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to merge grouped M2 values corresponding to the same key. @@ -467,7 +468,7 @@ std::unique_ptr group_merge_m2(column_view const& values, cudf::device_span group_offsets, size_type num_groups, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to merge multiple output of HISTOGRAM aggregation. @@ -494,7 +495,7 @@ std::unique_ptr group_merge_histogram(column_view const& values, cudf::device_span group_offsets, size_type num_groups, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to find covariance of child columns of a non-nullable struct column. @@ -521,7 +522,7 @@ std::unique_ptr group_covariance(column_view const& values_0, size_type min_periods, size_type ddof, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to find correlation from covariance and standard deviation. @@ -536,7 +537,7 @@ std::unique_ptr group_correlation(column_view const& covariance, column_view const& stddev_0, column_view const& stddev_1, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace groupby diff --git a/cpp/src/groupby/sort/group_replace_nulls.cu b/cpp/src/groupby/sort/group_replace_nulls.cu index 49557164230..566507da230 100644 --- a/cpp/src/groupby/sort/group_replace_nulls.cu +++ b/cpp/src/groupby/sort/group_replace_nulls.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -40,7 +41,7 @@ std::unique_ptr group_replace_nulls(cudf::column_view const& grouped_val device_span group_labels, cudf::replace_policy replace_policy, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { cudf::size_type size = grouped_value.size(); diff --git a/cpp/src/groupby/sort/group_scan.hpp b/cpp/src/groupby/sort/group_scan.hpp index fd53046f7e2..6f2daae5f9d 100644 --- a/cpp/src/groupby/sort/group_scan.hpp +++ b/cpp/src/groupby/sort/group_scan.hpp @@ -21,6 +21,7 @@ #include #include +#include #include @@ -40,7 +41,7 @@ std::unique_ptr sum_scan(column_view const& values, size_type num_groups, device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate groupwise cumulative product @@ -57,7 +58,7 @@ std::unique_ptr product_scan(column_view const& values, size_type num_groups, device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate groupwise cumulative minimum value @@ -72,7 +73,7 @@ std::unique_ptr min_scan(column_view const& values, size_type num_groups, device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate groupwise cumulative maximum value @@ -87,7 +88,7 @@ std::unique_ptr max_scan(column_view const& values, size_type num_groups, device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate cumulative number of values in each group @@ -99,7 +100,7 @@ std::unique_ptr max_scan(column_view const& values, */ std::unique_ptr count_scan(device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate groupwise min rank value @@ -118,7 +119,7 @@ std::unique_ptr min_rank_scan(column_view const& grouped_values, device_span group_labels, device_span group_offsets, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate groupwise max rank value @@ -128,14 +129,14 @@ std::unique_ptr min_rank_scan(column_view const& grouped_values, * device_span group_labels, * device_span group_offsets, * rmm::cuda_stream_view stream, - * rmm::mr::device_memory_resource* mr) + * rmm::device_async_resource_ref mr) */ std::unique_ptr max_rank_scan(column_view const& grouped_values, column_view const& value_order, device_span group_labels, device_span group_offsets, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate groupwise first rank value @@ -145,14 +146,14 @@ std::unique_ptr max_rank_scan(column_view const& grouped_values, * device_span group_labels, * device_span group_offsets, * rmm::cuda_stream_view stream, - * rmm::mr::device_memory_resource* mr) + * rmm::device_async_resource_ref mr) */ std::unique_ptr first_rank_scan(column_view const& grouped_values, column_view const& value_order, device_span group_labels, device_span group_offsets, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate groupwise average rank value @@ -162,14 +163,14 @@ std::unique_ptr first_rank_scan(column_view const& grouped_values, * device_span group_labels, * device_span group_offsets, * rmm::cuda_stream_view stream, - * rmm::mr::device_memory_resource* mr) + * rmm::device_async_resource_ref mr) */ std::unique_ptr average_rank_scan(column_view const& grouped_values, column_view const& value_order, device_span group_labels, device_span group_offsets, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Internal API to calculate groupwise dense rank value @@ -186,7 +187,7 @@ std::unique_ptr dense_rank_scan(column_view const& grouped_values, device_span group_labels, device_span group_offsets, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Convert groupwise rank to groupwise percentage rank @@ -209,7 +210,7 @@ std::unique_ptr group_rank_to_percentage(rank_method const method, device_span group_labels, device_span group_offsets, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace detail } // namespace groupby diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index 2ebc8ba7d5d..b360ba2c45d 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -60,7 +61,7 @@ struct group_scan_dispatcher { size_type num_groups, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return group_scan_functor::invoke(values, num_groups, group_labels, stream, mr); } @@ -89,7 +90,7 @@ struct group_scan_functor() size_type num_groups, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { using DeviceType = device_storage_type_t; using OpType = cudf::detail::corresponding_operator_t; @@ -145,7 +146,7 @@ struct group_scan_functor group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { using OpType = cudf::detail::corresponding_operator_t; @@ -191,7 +192,7 @@ struct group_scan_functor group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (values.is_empty()) { return cudf::empty_like(values); } diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 42d4b654346..5e892710d3b 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -30,6 +30,7 @@ #include #include +#include #include #include @@ -116,7 +117,7 @@ struct group_reduction_dispatcher { size_type num_groups, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return group_reduction_functor::invoke(values, num_groups, group_labels, stream, mr); } @@ -149,7 +150,7 @@ struct group_reduction_functor< size_type num_groups, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { using SourceDType = device_storage_type_t; @@ -218,7 +219,7 @@ struct group_reduction_functor< size_type num_groups, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // This is be expected to be size_type. using ResultType = cudf::detail::target_type_t; diff --git a/cpp/src/groupby/sort/group_std.cu b/cpp/src/groupby/sort/group_std.cu index 30b6f67dffe..70f64186f21 100644 --- a/cpp/src/groupby/sort/group_std.cu +++ b/cpp/src/groupby/sort/group_std.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -104,7 +105,7 @@ struct var_functor { cudf::device_span group_labels, size_type ddof, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { using ResultType = cudf::detail::target_type_t; @@ -175,7 +176,7 @@ std::unique_ptr group_var(column_view const& values, cudf::device_span group_labels, size_type ddof, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto values_type = cudf::is_dictionary(values.type()) ? dictionary_column_view(values).keys().type() diff --git a/cpp/src/groupby/sort/group_sum.cu b/cpp/src/groupby/sort/group_sum.cu index 0af7cb22159..316b6f395bb 100644 --- a/cpp/src/groupby/sort/group_sum.cu +++ b/cpp/src/groupby/sort/group_sum.cu @@ -20,6 +20,7 @@ #include #include +#include namespace cudf { namespace groupby { @@ -28,7 +29,7 @@ std::unique_ptr group_sum(column_view const& values, size_type num_groups, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto values_type = cudf::is_dictionary(values.type()) ? dictionary_column_view(values).keys().type() diff --git a/cpp/src/groupby/sort/group_sum_scan.cu b/cpp/src/groupby/sort/group_sum_scan.cu index 2efa1185899..01c4d0c2c4a 100644 --- a/cpp/src/groupby/sort/group_sum_scan.cu +++ b/cpp/src/groupby/sort/group_sum_scan.cu @@ -17,6 +17,7 @@ #include "groupby/sort/group_scan_util.cuh" #include +#include namespace cudf { namespace groupby { @@ -25,7 +26,7 @@ std::unique_ptr sum_scan(column_view const& values, size_type num_groups, cudf::device_span group_labels, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return type_dispatcher(values.type(), group_scan_dispatcher{}, diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index 45c232aa3aa..f211c61b3b7 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -35,6 +35,7 @@ #include #include +#include #include @@ -207,7 +208,7 @@ void scan_result_functor::operator()(aggregation const& agg) std::pair, std::vector> groupby::sort_scan( host_span requests, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // We're going to start by creating a cache of results so that aggs that // depend on other aggs will not have to be recalculated. e.g. mean depends on diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu index 1e6c7a9393f..4da1da089cd 100644 --- a/cpp/src/groupby/sort/sort_helper.cu +++ b/cpp/src/groupby/sort/sort_helper.cu @@ -35,6 +35,7 @@ #include #include +#include #include #include @@ -248,7 +249,7 @@ column_view sort_groupby_helper::keys_bitmask_column(rmm::cuda_stream_view strea } sort_groupby_helper::column_ptr sort_groupby_helper::sorted_values( - column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) + column_view const& values, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { column_ptr values_sort_order = cudf::detail::stable_sorted_order(table_view({unsorted_keys_labels(stream), values}), @@ -272,7 +273,7 @@ sort_groupby_helper::column_ptr sort_groupby_helper::sorted_values( } sort_groupby_helper::column_ptr sort_groupby_helper::grouped_values( - column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) + column_view const& values, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { auto gather_map = key_sort_order(stream); @@ -287,7 +288,7 @@ sort_groupby_helper::column_ptr sort_groupby_helper::grouped_values( } std::unique_ptr
sort_groupby_helper::unique_keys(rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto idx_data = key_sort_order(stream).data(); @@ -305,7 +306,7 @@ std::unique_ptr
sort_groupby_helper::unique_keys(rmm::cuda_stream_view st } std::unique_ptr
sort_groupby_helper::sorted_keys(rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return cudf::detail::gather(_keys, key_sort_order(stream), diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu deleted file mode 100644 index 68e02ef3cf4..00000000000 --- a/cpp/src/hash/hashing.cu +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include - -#include - -namespace cudf { -namespace hashing { -namespace detail { - -std::unique_ptr hash(table_view const& input, - hash_id hash_function, - uint32_t seed, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - switch (hash_function) { - case (hash_id::HASH_MURMUR3): return murmurhash3_x86_32(input, seed, stream, mr); - case (hash_id::HASH_SPARK_MURMUR3): return spark_murmurhash3_x86_32(input, seed, stream, mr); - case (hash_id::HASH_MD5): return md5(input, stream, mr); - default: CUDF_FAIL("Unsupported hash function."); - } -} - -} // namespace detail -} // namespace hashing - -std::unique_ptr hash(table_view const& input, - hash_id hash_function, - uint32_t seed, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return hashing::detail::hash(input, hash_function, seed, stream, mr); -} - -} // namespace cudf diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index b34455905d9..0b559e8e86c 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -29,6 +29,7 @@ #include #include +#include #include #include @@ -284,7 +285,7 @@ inline bool md5_leaf_type_check(data_type dt) std::unique_ptr md5(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (input.num_columns() == 0 || input.num_rows() == 0) { // Return the MD5 hash of a zero-length input. @@ -308,7 +309,7 @@ std::unique_ptr md5(table_view const& input, // Result column allocation and creation auto begin = thrust::make_constant_iterator(digest_size); auto [offsets_column, bytes] = - cudf::detail::make_offsets_child_column(begin, begin + input.num_rows(), stream, mr); + cudf::strings::detail::make_offsets_child_column(begin, begin + input.num_rows(), stream, mr); rmm::device_uvector chars(bytes, stream, mr); auto d_chars = chars.data(); @@ -321,7 +322,7 @@ std::unique_ptr md5(table_view const& input, thrust::make_counting_iterator(0), thrust::make_counting_iterator(input.num_rows()), [d_chars, device_input = *device_input] __device__(auto row_index) { - MD5Hasher hasher(d_chars + (row_index * digest_size)); + MD5Hasher hasher(d_chars + (static_cast(row_index) * digest_size)); for (auto const& col : device_input) { if (col.is_valid(row_index)) { if (col.type().id() == type_id::LIST) { @@ -349,7 +350,7 @@ std::unique_ptr md5(table_view const& input, std::unique_ptr md5(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::md5(input, stream, mr); diff --git a/cpp/src/hash/murmurhash3_x64_128.cu b/cpp/src/hash/murmurhash3_x64_128.cu index 1fc469686e1..6c91532a193 100644 --- a/cpp/src/hash/murmurhash3_x64_128.cu +++ b/cpp/src/hash/murmurhash3_x64_128.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include @@ -109,7 +110,7 @@ class murmur_device_row_hasher { std::unique_ptr
murmurhash3_x64_128(table_view const& input, uint64_t seed, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto output1 = make_numeric_column( data_type(type_id::UINT64), input.num_rows(), mask_state::UNALLOCATED, stream, mr); @@ -140,7 +141,7 @@ std::unique_ptr
murmurhash3_x64_128(table_view const& input, std::unique_ptr
murmurhash3_x64_128(table_view const& input, uint64_t seed, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::murmurhash3_x64_128(input, seed, stream, mr); diff --git a/cpp/src/hash/murmurhash3_x86_32.cu b/cpp/src/hash/murmurhash3_x86_32.cu index a6ab301a86e..eac72f5d995 100644 --- a/cpp/src/hash/murmurhash3_x86_32.cu +++ b/cpp/src/hash/murmurhash3_x86_32.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include +#include #include @@ -33,7 +34,7 @@ namespace detail { std::unique_ptr murmurhash3_x86_32(table_view const& input, uint32_t seed, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto output = make_numeric_column(data_type(type_to_id()), input.num_rows(), @@ -62,7 +63,7 @@ std::unique_ptr murmurhash3_x86_32(table_view const& input, std::unique_ptr murmurhash3_x86_32(table_view const& input, uint32_t seed, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::murmurhash3_x86_32(input, seed, stream, mr); diff --git a/cpp/src/hash/sha1_hash.cu b/cpp/src/hash/sha1_hash.cu index 71253d279b9..f7609eb26af 100644 --- a/cpp/src/hash/sha1_hash.cu +++ b/cpp/src/hash/sha1_hash.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -62,7 +63,7 @@ struct SHA1Hash : HashBase { std::unique_ptr sha1(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return sha_hash(input, stream, mr); } @@ -71,7 +72,7 @@ std::unique_ptr sha1(table_view const& input, std::unique_ptr sha1(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::sha1(input, stream, mr); diff --git a/cpp/src/hash/sha224_hash.cu b/cpp/src/hash/sha224_hash.cu index 61480a78776..cf04504a489 100644 --- a/cpp/src/hash/sha224_hash.cu +++ b/cpp/src/hash/sha224_hash.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -63,7 +64,7 @@ struct SHA224Hash : HashBase { std::unique_ptr sha224(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return sha_hash(input, stream, mr); } @@ -72,7 +73,7 @@ std::unique_ptr sha224(table_view const& input, std::unique_ptr sha224(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::sha224(input, stream, mr); diff --git a/cpp/src/hash/sha256_hash.cu b/cpp/src/hash/sha256_hash.cu index b15cfe09d52..664913c0f4c 100644 --- a/cpp/src/hash/sha256_hash.cu +++ b/cpp/src/hash/sha256_hash.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -63,7 +64,7 @@ struct SHA256Hash : HashBase { std::unique_ptr sha256(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return sha_hash(input, stream, mr); } @@ -72,7 +73,7 @@ std::unique_ptr sha256(table_view const& input, std::unique_ptr sha256(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::sha256(input, stream, mr); diff --git a/cpp/src/hash/sha384_hash.cu b/cpp/src/hash/sha384_hash.cu index 3075d2c62f8..92192f501ec 100644 --- a/cpp/src/hash/sha384_hash.cu +++ b/cpp/src/hash/sha384_hash.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -70,7 +71,7 @@ struct SHA384Hash : HashBase { std::unique_ptr sha384(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return sha_hash(input, stream, mr); } @@ -79,7 +80,7 @@ std::unique_ptr sha384(table_view const& input, std::unique_ptr sha384(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::sha384(input, stream, mr); diff --git a/cpp/src/hash/sha512_hash.cu b/cpp/src/hash/sha512_hash.cu index d073cf1edca..244206aeeb9 100644 --- a/cpp/src/hash/sha512_hash.cu +++ b/cpp/src/hash/sha512_hash.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -70,7 +71,7 @@ struct SHA512Hash : HashBase { std::unique_ptr sha512(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return sha_hash(input, stream, mr); } @@ -79,7 +80,7 @@ std::unique_ptr sha512(table_view const& input, std::unique_ptr sha512(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::sha512(input, stream, mr); diff --git a/cpp/src/hash/sha_hash.cuh b/cpp/src/hash/sha_hash.cuh index 0a22ee34918..6976241057e 100644 --- a/cpp/src/hash/sha_hash.cuh +++ b/cpp/src/hash/sha_hash.cuh @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -503,7 +504,7 @@ bool inline sha_leaf_type_check(data_type dt) template std::unique_ptr sha_hash(table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { if (input.num_rows() == 0) { return cudf::make_empty_column(cudf::type_id::STRING); } @@ -517,7 +518,7 @@ std::unique_ptr sha_hash(table_view const& input, // Result column allocation and creation auto begin = thrust::make_constant_iterator(Hasher::digest_size); auto [offsets_column, bytes] = - cudf::detail::make_offsets_child_column(begin, begin + input.num_rows(), stream, mr); + cudf::strings::detail::make_offsets_child_column(begin, begin + input.num_rows(), stream, mr); auto chars = rmm::device_uvector(bytes, stream, mr); auto d_chars = chars.data(); @@ -525,19 +526,20 @@ std::unique_ptr sha_hash(table_view const& input, auto const device_input = table_device_view::create(input, stream); // Hash each row, hashing each element sequentially left to right - thrust::for_each(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(input.num_rows()), - [d_chars, device_input = *device_input] __device__(auto row_index) { - Hasher hasher(d_chars + (row_index * Hasher::digest_size)); - for (auto const& col : device_input) { - if (col.is_valid(row_index)) { - cudf::type_dispatcher( - col.type(), HasherDispatcher(&hasher, col), row_index); - } - } - hasher.finalize(); - }); + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.num_rows()), + [d_chars, device_input = *device_input] __device__(auto row_index) { + Hasher hasher(d_chars + (static_cast(row_index) * Hasher::digest_size)); + for (auto const& col : device_input) { + if (col.is_valid(row_index)) { + cudf::type_dispatcher( + col.type(), HasherDispatcher(&hasher, col), row_index); + } + } + hasher.finalize(); + }); return make_strings_column(input.num_rows(), std::move(offsets_column), chars.release(), 0, {}); } diff --git a/cpp/src/hash/spark_murmurhash3_x86_32.cu b/cpp/src/hash/spark_murmurhash3_x86_32.cu deleted file mode 100644 index c7992b4afa0..00000000000 --- a/cpp/src/hash/spark_murmurhash3_x86_32.cu +++ /dev/null @@ -1,442 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -namespace cudf { -namespace hashing { -namespace detail { - -namespace { - -using spark_hash_value_type = int32_t; - -template ())> -struct Spark_MurmurHash3_x86_32 { - using result_type = spark_hash_value_type; - - constexpr Spark_MurmurHash3_x86_32() = default; - constexpr Spark_MurmurHash3_x86_32(uint32_t seed) : m_seed(seed) {} - - [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const - { - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - h ^= h >> 16; - return h; - } - - [[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data, - cudf::size_type offset) const - { - // Read a 4-byte value from the data pointer as individual bytes for safe - // unaligned access (very likely for string types). - auto block = reinterpret_cast(data + offset); - return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24); - } - - [[nodiscard]] result_type __device__ inline operator()(Key const& key) const - { - return compute(key); - } - - template - result_type __device__ inline compute(T const& key) const - { - return compute_bytes(reinterpret_cast(&key), sizeof(T)); - } - - result_type __device__ inline compute_remaining_bytes(std::byte const* data, - cudf::size_type len, - cudf::size_type tail_offset, - result_type h) const - { - // Process remaining bytes that do not fill a four-byte chunk using Spark's approach - // (does not conform to normal MurmurHash3). - for (auto i = tail_offset; i < len; i++) { - // We require a two-step cast to get the k1 value from the byte. First, - // we must cast to a signed int8_t. Then, the sign bit is preserved when - // casting to uint32_t under 2's complement. Java preserves the sign when - // casting byte-to-int, but C++ does not. - uint32_t k1 = static_cast(std::to_integer(data[i])); - k1 *= c1; - k1 = rotate_bits_left(k1, rot_c1); - k1 *= c2; - h ^= k1; - h = rotate_bits_left(static_cast(h), rot_c2); - h = h * 5 + c3; - } - return h; - } - - result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const - { - constexpr cudf::size_type BLOCK_SIZE = 4; - cudf::size_type const nblocks = len / BLOCK_SIZE; - cudf::size_type const tail_offset = nblocks * BLOCK_SIZE; - result_type h = m_seed; - - // Process all four-byte chunks. - for (cudf::size_type i = 0; i < nblocks; i++) { - uint32_t k1 = getblock32(data, i * BLOCK_SIZE); - k1 *= c1; - k1 = rotate_bits_left(k1, rot_c1); - k1 *= c2; - h ^= k1; - h = rotate_bits_left(static_cast(h), rot_c2); - h = h * 5 + c3; - } - - h = compute_remaining_bytes(data, len, tail_offset, h); - - // Finalize hash. - h ^= len; - h = fmix32(h); - return h; - } - - private: - uint32_t m_seed{cudf::DEFAULT_HASH_SEED}; - static constexpr uint32_t c1 = 0xcc9e2d51; - static constexpr uint32_t c2 = 0x1b873593; - static constexpr uint32_t c3 = 0xe6546b64; - static constexpr uint32_t rot_c1 = 15; - static constexpr uint32_t rot_c2 = 13; -}; - -template <> -spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32::operator()( - bool const& key) const -{ - return compute(key); -} - -template <> -spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32::operator()( - int8_t const& key) const -{ - return compute(key); -} - -template <> -spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32::operator()( - uint8_t const& key) const -{ - return compute(key); -} - -template <> -spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32::operator()( - int16_t const& key) const -{ - return compute(key); -} - -template <> -spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32::operator()( - uint16_t const& key) const -{ - return compute(key); -} - -template <> -spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32::operator()( - float const& key) const -{ - return compute(normalize_nans(key)); -} - -template <> -spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32::operator()( - double const& key) const -{ - return compute(normalize_nans(key)); -} - -template <> -spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32::operator()( - cudf::string_view const& key) const -{ - auto const data = reinterpret_cast(key.data()); - auto const len = key.size_bytes(); - return compute_bytes(data, len); -} - -template <> -spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32::operator()( - numeric::decimal32 const& key) const -{ - return compute(key.value()); -} - -template <> -spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32::operator()( - numeric::decimal64 const& key) const -{ - return compute(key.value()); -} - -template <> -spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32::operator()( - numeric::decimal128 const& key) const -{ - // Generates the Spark MurmurHash3 hash value, mimicking the conversion: - // java.math.BigDecimal.valueOf(unscaled_value, _scale).unscaledValue().toByteArray() - // https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala#L381 - __int128_t const val = key.value(); - constexpr cudf::size_type key_size = sizeof(__int128_t); - std::byte const* data = reinterpret_cast(&val); - - // Small negative values start with 0xff..., small positive values start with 0x00... - bool const is_negative = val < 0; - std::byte const zero_value = is_negative ? std::byte{0xff} : std::byte{0x00}; - - // If the value can be represented with a shorter than 16-byte integer, the - // leading bytes of the little-endian value are truncated and are not hashed. - auto const reverse_begin = thrust::reverse_iterator(data + key_size); - auto const reverse_end = thrust::reverse_iterator(data); - auto const first_nonzero_byte = - thrust::find_if_not(thrust::seq, reverse_begin, reverse_end, [zero_value](std::byte const& v) { - return v == zero_value; - }).base(); - // Max handles special case of 0 and -1 which would shorten to 0 length otherwise - cudf::size_type length = - std::max(1, static_cast(thrust::distance(data, first_nonzero_byte))); - - // Preserve the 2's complement sign bit by adding a byte back on if necessary. - // e.g. 0x0000ff would shorten to 0x00ff. The 0x00 byte is retained to - // preserve the sign bit, rather than leaving an "f" at the front which would - // change the sign bit. However, 0x00007f would shorten to 0x7f. No extra byte - // is needed because the leftmost bit matches the sign bit. Similarly for - // negative values: 0xffff00 --> 0xff00 and 0xffff80 --> 0x80. - if ((length < key_size) && (is_negative ^ bool(data[length - 1] & std::byte{0x80}))) { ++length; } - - // Convert to big endian by reversing the range of nonzero bytes. Only those bytes are hashed. - __int128_t big_endian_value = 0; - auto big_endian_data = reinterpret_cast(&big_endian_value); - thrust::reverse_copy(thrust::seq, data, data + length, big_endian_data); - return compute_bytes(big_endian_data, length); -} - -/** - * @brief Computes the hash value of a row in the given table. - * - * This functor uses Spark conventions for Murmur hashing, which differs from - * the Murmur implementation used in the rest of libcudf. These differences - * include: - * - Serially using the output hash as an input seed for the next item - * - Ignorance of null values - * - * The serial use of hashes as seeds means that data of different nested types - * can exhibit hash collisions. For example, a row of an integer column - * containing a 1 will have the same hash as a lists column of integers - * containing a list of [1] and a struct column of a single integer column - * containing a struct of {1}. - * - * As a consequence of ignoring null values, inputs like [1], [1, null], and - * [null, 1] have the same hash (an expected hash collision). This kind of - * collision can also occur across a table of nullable columns and with nulls - * in structs ({1, null} and {null, 1} have the same hash). The seed value (the - * previous element's hash value) is returned as the hash if an element is - * null. - * - * For additional differences such as special tail processing and decimal type - * handling, refer to the Spark_MurmurHash3_x86_32 functor. - * - * @tparam hash_function Hash functor to use for hashing elements. Must be Spark_MurmurHash3_x86_32. - * @tparam Nullate A cudf::nullate type describing whether to check for nulls. - */ -template