From d17376aaa964c5256aa0cdcbac5fa15e0be4aca7 Mon Sep 17 00:00:00 2001 From: Paul Taylor <178183+trxcllnt@users.noreply.github.com> Date: Wed, 27 Sep 2023 06:25:34 -0700 Subject: [PATCH 1/5] Add RAFT devcontainers (#1791) This PR adds some [devcontainers](https://containers.dev/) to help simplify building the RAFT C++ and Python libraries. It also adds an optional job to the `pr.yaml` to [build the RAFT libs in each devcontainer](https://github.com/trxcllnt/raft/blob/fea/devcontainers/.github/workflows/pr.yaml#L96-L101), so the build caches are populated for devs by CI. A devcontainer can be launched by clicking the "Reopen in Container" button that VSCode shows when opening the repo (or by using the "Rebuild and Reopen in Container" command from the command palette): ![image](https://user-images.githubusercontent.com/178183/221771999-97ab29d5-e718-4e5f-b32f-2cdd51bba25c.png) Clicking this button will cause VSCode to prompt the user to select one of these devcontainer variants: ![image](https://github.com/rapidsai/rmm/assets/178183/68d4b264-4fc2-4008-92b6-cb4bdd19b29f) On startup, the devcontainer creates or updates the conda/pip environment using `raft/dependencies.yaml`. The envs/package caches are cached on the host via volume mounts, which are described in more detail in [`.devcontainer/README.md`](https://github.com/trxcllnt/raft/blob/fea/devcontainers/.devcontainer/README.md). The container includes convenience functions to clean, configure, and build the various RAFT components: ```shell $ clean-raft-cpp # only cleans the C++ build dir $ clean-pylibraft-python # only cleans the Python build dir $ clean-raft # cleans both C++ and Python build dirs $ configure-raft-cpp # only configures raft C++ lib $ build-raft-cpp # only builds raft C++ lib $ build-pylibraft-python # only builds raft Python lib $ build-raft # builds both C++ and Python libs ``` * The C++ build script is a small wrapper around `cmake -S ~/raft/cpp -B ~/raft/cpp/build` and `cmake --build ~/raft/cpp/build` * The Python build script is a small wrapper around `pip install --editable ~/raft/cpp` Unlike `build.sh`, these convenience scripts *don't* install the libraries after building them. Instead, they automatically inject the correct arguments to build the C++ libraries from source and use their build dirs as package roots: ```shell $ cmake -S ~/raft/cpp -B ~/raft/cpp/build $ CMAKE_ARGS="-Draft_ROOT=~/raft/cpp/build" \ # <-- this argument is automatic pip install -e ~/raft/cpp ``` Authors: - Paul Taylor (https://github.com/trxcllnt) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/raft/pull/1791 --- .devcontainer/Dockerfile | 30 ++++ .devcontainer/README.md | 64 ++++++++ .../cuda11.8-conda/devcontainer.json | 37 +++++ .devcontainer/cuda11.8-pip/devcontainer.json | 38 +++++ .../cuda12.0-conda/devcontainer.json | 37 +++++ .devcontainer/cuda12.0-pip/devcontainer.json | 38 +++++ .github/workflows/pr.yaml | 9 ++ .gitignore | 4 + ci/release/update-version.sh | 7 + .../all_cuda-118_arch-x86_64.yaml | 4 +- .../all_cuda-120_arch-x86_64.yaml | 4 +- .../bench_ann_cuda-118_arch-x86_64.yaml | 3 +- cpp/.clangd | 65 ++++++++ cpp/test/core/math_device.cu | 6 +- dependencies.yaml | 152 ++++++++++++++++-- python/raft-dask/CMakeLists.txt | 12 +- 16 files changed, 480 insertions(+), 30 deletions(-) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/README.md create mode 100644 .devcontainer/cuda11.8-conda/devcontainer.json create mode 100644 .devcontainer/cuda11.8-pip/devcontainer.json create mode 100644 .devcontainer/cuda12.0-conda/devcontainer.json create mode 100644 .devcontainer/cuda12.0-pip/devcontainer.json create mode 100644 cpp/.clangd diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000000..9d35e3f97f --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,30 @@ +# syntax=docker/dockerfile:1.5 + +ARG BASE +ARG PYTHON_PACKAGE_MANAGER=conda + +FROM ${BASE} as pip-base + +ENV DEFAULT_VIRTUAL_ENV=rapids + +FROM ${BASE} as conda-base + +ENV DEFAULT_CONDA_ENV=rapids + +FROM ${PYTHON_PACKAGE_MANAGER}-base + +ARG CUDA +ENV CUDAARCHS="RAPIDS" +ENV CUDA_VERSION="${CUDA_VERSION:-${CUDA}}" + +ARG PYTHON_PACKAGE_MANAGER +ENV PYTHON_PACKAGE_MANAGER="${PYTHON_PACKAGE_MANAGER}" + +ENV PYTHONSAFEPATH="1" +ENV PYTHONUNBUFFERED="1" +ENV PYTHONDONTWRITEBYTECODE="1" + +ENV SCCACHE_REGION="us-east-2" +ENV SCCACHE_BUCKET="rapids-sccache-devs" +ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai" +ENV HISTFILE="/home/coder/.cache/._bash_history" diff --git a/.devcontainer/README.md b/.devcontainer/README.md new file mode 100644 index 0000000000..3c76b8963d --- /dev/null +++ b/.devcontainer/README.md @@ -0,0 +1,64 @@ +# RAFT Development Containers + +This directory contains [devcontainer configurations](https://containers.dev/implementors/json_reference/) for using VSCode to [develop in a container](https://code.visualstudio.com/docs/devcontainers/containers) via the `Remote Containers` [extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) or [GitHub Codespaces](https://github.com/codespaces). + +This container is a turnkey development environment for building and testing the RAFT C++ and Python libraries. + +## Table of Contents + +* [Prerequisites](#prerequisites) +* [Host bind mounts](#host-bind-mounts) +* [Launch a Dev Container](#launch-a-dev-container) + +## Prerequisites + +* [VSCode](https://code.visualstudio.com/download) +* [VSCode Remote Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) + +## Host bind mounts + +By default, the following directories are bind-mounted into the devcontainer: + +* `${repo}:/home/coder/raft` +* `${repo}/../.aws:/home/coder/.aws` +* `${repo}/../.local:/home/coder/.local` +* `${repo}/../.cache:/home/coder/.cache` +* `${repo}/../.conda:/home/coder/.conda` +* `${repo}/../.config:/home/coder/.config` + +This ensures caches, configurations, dependencies, and your commits are persisted on the host across container runs. + +## Launch a Dev Container + +To launch a devcontainer from VSCode, open the RAFT repo and select the "Reopen in Container" button in the bottom right:
+ +Alternatively, open the VSCode command palette (typically `cmd/ctrl + shift + P`) and run the "Rebuild and Reopen in Container" command. + +## Using the devcontainer + +On startup, the devcontainer creates or updates the conda/pip environment using `raft/dependencies.yaml`. + +The container includes convenience functions to clean, configure, and build the various RAFT components: + +```shell +$ clean-raft-cpp # only cleans the C++ build dir +$ clean-pylibraft-python # only cleans the Python build dir +$ clean-raft # cleans both C++ and Python build dirs + +$ configure-raft-cpp # only configures raft C++ lib + +$ build-raft-cpp # only builds raft C++ lib +$ build-pylibraft-python # only builds raft Python lib +$ build-raft # builds both C++ and Python libs +``` + +* The C++ build script is a small wrapper around `cmake -S ~/raft/cpp -B ~/raft/cpp/build` and `cmake --build ~/raft/cpp/build` +* The Python build script is a small wrapper around `pip install --editable ~/raft/cpp` + +Unlike `build.sh`, these convenience scripts *don't* install the libraries after building them. Instead, they automatically inject the correct arguments to build the C++ libraries from source and use their build dirs as package roots: + +```shell +$ cmake -S ~/raft/cpp -B ~/raft/cpp/build +$ CMAKE_ARGS="-Draft_ROOT=~/raft/cpp/build" \ # <-- this argument is automatic + pip install -e ~/raft/cpp +``` diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json new file mode 100644 index 0000000000..8da9b5428a --- /dev/null +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -0,0 +1,37 @@ +{ + "build": { + "context": "${localWorkspaceFolder}/.devcontainer", + "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", + "args": { + "CUDA": "11.8", + "PYTHON_PACKAGE_MANAGER": "conda", + "BASE": "rapidsai/devcontainers:23.10-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04" + } + }, + "hostRequirements": {"gpu": "optional"}, + "features": { + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {} + }, + "overrideFeatureInstallOrder": [ + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" + ], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda11.8-envs}"], + "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], + "workspaceFolder": "/home/coder", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda11.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "ms-python.flake8", + "nvidia.nsight-vscode-edition" + ] + } + } +} diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json new file mode 100644 index 0000000000..0b3ec79e37 --- /dev/null +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -0,0 +1,38 @@ +{ + "build": { + "context": "${localWorkspaceFolder}/.devcontainer", + "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", + "args": { + "CUDA": "11.8", + "PYTHON_PACKAGE_MANAGER": "pip", + "BASE": "rapidsai/devcontainers:23.10-cpp-llvm16-cuda11.8-ubuntu22.04" + } + }, + "hostRequirements": {"gpu": "optional"}, + "features": { + "ghcr.io/rapidsai/devcontainers/features/ucx:23.10": {"version": "1.14.1"}, + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {} + }, + "overrideFeatureInstallOrder": [ + "ghcr.io/rapidsai/devcontainers/features/ucx", + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" + ], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs}"], + "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], + "workspaceFolder": "/home/coder", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "ms-python.flake8", + "nvidia.nsight-vscode-edition" + ] + } + } +} diff --git a/.devcontainer/cuda12.0-conda/devcontainer.json b/.devcontainer/cuda12.0-conda/devcontainer.json new file mode 100644 index 0000000000..f5af166b46 --- /dev/null +++ b/.devcontainer/cuda12.0-conda/devcontainer.json @@ -0,0 +1,37 @@ +{ + "build": { + "context": "${localWorkspaceFolder}/.devcontainer", + "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", + "args": { + "CUDA": "12.0", + "PYTHON_PACKAGE_MANAGER": "conda", + "BASE": "rapidsai/devcontainers:23.10-cpp-mambaforge-ubuntu22.04" + } + }, + "hostRequirements": {"gpu": "optional"}, + "features": { + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {} + }, + "overrideFeatureInstallOrder": [ + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" + ], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.0-envs}"], + "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], + "workspaceFolder": "/home/coder", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.0-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "ms-python.flake8", + "nvidia.nsight-vscode-edition" + ] + } + } +} diff --git a/.devcontainer/cuda12.0-pip/devcontainer.json b/.devcontainer/cuda12.0-pip/devcontainer.json new file mode 100644 index 0000000000..9f28002d38 --- /dev/null +++ b/.devcontainer/cuda12.0-pip/devcontainer.json @@ -0,0 +1,38 @@ +{ + "build": { + "context": "${localWorkspaceFolder}/.devcontainer", + "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", + "args": { + "CUDA": "12.0", + "PYTHON_PACKAGE_MANAGER": "pip", + "BASE": "rapidsai/devcontainers:23.10-cpp-llvm16-cuda12.0-ubuntu22.04" + } + }, + "hostRequirements": {"gpu": "optional"}, + "features": { + "ghcr.io/rapidsai/devcontainers/features/ucx:23.10": {"version": "1.14.1"}, + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {} + }, + "overrideFeatureInstallOrder": [ + "ghcr.io/rapidsai/devcontainers/features/ucx", + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" + ], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs}"], + "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], + "workspaceFolder": "/home/coder", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "ms-python.flake8", + "nvidia.nsight-vscode-edition" + ] + } + } +} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 4fa3c5df86..e539877851 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -22,6 +22,7 @@ jobs: - wheel-tests-pylibraft - wheel-build-raft-dask - wheel-tests-raft-dask + - devcontainer secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.10 checks: @@ -92,3 +93,11 @@ jobs: with: build_type: pull-request script: ci/test_wheel_raft_dask.sh + devcontainer: + secrets: inherit + uses: rapidsai/shared-action-workflows/.github/workflows/build-in-devcontainer.yaml@branch-23.10 + with: + build_command: | + sccache -z; + build-all -DBUILD_PRIMS_BENCH=ON -DBUILD_ANN_BENCH=ON --verbose; + sccache -s; diff --git a/.gitignore b/.gitignore index 7939fc1622..11b7bc3eba 100644 --- a/.gitignore +++ b/.gitignore @@ -62,3 +62,7 @@ _xml # sphinx _html _text + +# clang tooling +compile_commands.json +.clangd/ diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 7a69b95da1..a867a71f68 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -91,3 +91,10 @@ sed_runner "/^PROJECT_NUMBER/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" cpp/doxygen/Doxy sed_runner "/^set(RAFT_VERSION/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" docs/source/build.md sed_runner "/GIT_TAG.*branch-/ s|branch-.*|branch-${NEXT_SHORT_TAG}|g" docs/source/build.md sed_runner "/rapidsai\/raft/ s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" docs/source/developer_guide.md + +# .devcontainer files +find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r -d '' filename; do + sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}" + sed_runner "s@rapidsai/devcontainers/features/ucx:[0-9.]*@rapidsai/devcontainers/features/ucx:${NEXT_SHORT_TAG_PEP440}@" "${filename}" + sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}" +done diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 65b4232d83..93e8821575 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -10,7 +10,7 @@ dependencies: - breathe - c-compiler - clang-tools=16.0.6 -- clang=16.0.6 +- clang==16.0.6 - cmake>=3.26.4 - cuda-profiler-api=11.8.86 - cuda-python>=11.7.1,<12.0a0 @@ -43,6 +43,8 @@ dependencies: - numba>=0.57 - numpy>=1.21 - numpydoc +- nvcc_linux-64=11.8 +- pre-commit - pydata-sphinx-theme - pytest - pytest-cov diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 9db38ed1de..01d9bca5c2 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -10,9 +10,10 @@ dependencies: - breathe - c-compiler - clang-tools=16.0.6 -- clang=16.0.6 +- clang==16.0.6 - cmake>=3.26.4 - cuda-cudart-dev +- cuda-nvcc - cuda-profiler-api - cuda-python>=12.0,<13.0a0 - cuda-version=12.0 @@ -39,6 +40,7 @@ dependencies: - numba>=0.57 - numpy>=1.21 - numpydoc +- pre-commit - pydata-sphinx-theme - pytest - pytest-cov diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 5a9ef5bd32..4f1df12dfa 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -10,7 +10,7 @@ dependencies: - benchmark>=1.8.2 - c-compiler - clang-tools=16.0.6 -- clang=16.0.6 +- clang==16.0.6 - cmake>=3.26.4 - cuda-profiler-api=11.8.86 - cuda-version=11.8 @@ -34,6 +34,7 @@ dependencies: - nccl>=2.9.9 - ninja - nlohmann_json>=3.11.2 +- nvcc_linux-64=11.8 - scikit-build>=0.13.1 - sysroot_linux-64==2.17 name: bench_ann_cuda-118_arch-x86_64 diff --git a/cpp/.clangd b/cpp/.clangd new file mode 100644 index 0000000000..7c4fe036dd --- /dev/null +++ b/cpp/.clangd @@ -0,0 +1,65 @@ +# https://clangd.llvm.org/config + +# Apply a config conditionally to all C files +If: + PathMatch: .*\.(c|h)$ + +--- + +# Apply a config conditionally to all C++ files +If: + PathMatch: .*\.(c|h)pp + +--- + +# Apply a config conditionally to all CUDA files +If: + PathMatch: .*\.cuh? +CompileFlags: + Add: + - "-x" + - "cuda" + # No error on unknown CUDA versions + - "-Wno-unknown-cuda-version" + # Allow variadic CUDA functions + - "-Xclang=-fcuda-allow-variadic-functions" +Diagnostics: + Suppress: + - "variadic_device_fn" + - "attributes_not_allowed" + +--- + +# Tweak the clangd parse settings for all files +CompileFlags: + Add: + # report all errors + - "-ferror-limit=0" + - "-fmacro-backtrace-limit=0" + - "-ftemplate-backtrace-limit=0" + # Skip the CUDA version check + - "--no-cuda-version-check" + Remove: + # remove gcc's -fcoroutines + - -fcoroutines + # remove nvc++ flags unknown to clang + - "-gpu=*" + - "-stdpar*" + # remove nvcc flags unknown to clang + - "-arch*" + - "-gencode*" + - "--generate-code*" + - "-ccbin*" + - "-t=*" + - "--threads*" + - "-Xptxas*" + - "-Xcudafe*" + - "-Xfatbin*" + - "-Xcompiler*" + - "--diag-suppress*" + - "--diag_suppress*" + - "--compiler-options*" + - "--expt-extended-lambda" + - "--expt-relaxed-constexpr" + - "-forward-unknown-to-host-compiler" + - "-Werror=cross-execution-space-call" diff --git a/cpp/test/core/math_device.cu b/cpp/test/core/math_device.cu index 15c7b2b33a..8e3a9df01b 100644 --- a/cpp/test/core/math_device.cu +++ b/cpp/test/core/math_device.cu @@ -21,7 +21,9 @@ #include #include -#if _RAFT_HAS_CUDA +#include + +#ifdef _RAFT_HAS_CUDA #include #include #endif @@ -35,7 +37,7 @@ __global__ void math_eval_kernel(OutT* out, OpT op, Args... args) template auto math_eval(OpT op, Args&&... args) { - typedef decltype(op(args...)) OutT; + using OutT = cuda::std::invoke_result_t; auto stream = rmm::cuda_stream_default; rmm::device_scalar result(stream); math_eval_kernel<<<1, 1, 0, stream>>>(result.data(), op, std::forward(args)...); diff --git a/dependencies.yaml b/dependencies.yaml index 700a6db1bf..b827f11228 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -10,12 +10,15 @@ files: - build_pylibraft - cudatoolkit - develop + - checks + - build_wheels - test_libraft - docs - run_raft_dask - run_pylibraft - test_python_common - test_pylibraft + - cupy bench_ann: output: conda matrix: @@ -38,6 +41,7 @@ files: - py_version - test_python_common - test_pylibraft + - cupy checks: output: none includes: @@ -47,6 +51,7 @@ files: output: none includes: - test_pylibraft + - cupy - cudatoolkit - docs - py_version @@ -75,6 +80,7 @@ files: includes: - test_python_common - test_pylibraft + - cupy py_build_raft_dask: output: pyproject pyproject_dir: python/raft-dask @@ -145,11 +151,37 @@ dependencies: packages: - gcc_linux-aarch64=11.* - sysroot_linux-aarch64==2.17 + - output_types: conda + matrices: + - matrix: {cuda: "12.0"} + packages: [cuda-version=12.0, cuda-nvcc] + - matrix: {cuda: "11.8", arch: x86_64} + packages: [nvcc_linux-64=11.8] + - matrix: {cuda: "11.8", arch: aarch64} + packages: [nvcc_linux-aarch64=11.8] + - matrix: {cuda: "11.5", arch: x86_64} + packages: [nvcc_linux-64=11.5] + - matrix: {cuda: "11.5", arch: aarch64} + packages: [nvcc_linux-aarch64=11.5] + - matrix: {cuda: "11.4", arch: x86_64} + packages: [nvcc_linux-64=11.4] + - matrix: {cuda: "11.4", arch: aarch64} + packages: [nvcc_linux-aarch64=11.4] + - matrix: {cuda: "11.2", arch: x86_64} + packages: [nvcc_linux-64=11.2] + - matrix: {cuda: "11.2", arch: aarch64} + packages: [nvcc_linux-aarch64=11.2] + build_pylibraft: common: - - output_types: [conda, requirements, pyproject] + - output_types: [conda] packages: - - &rmm rmm==23.10.* + - &rmm_conda rmm==23.10.* + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for rmm-cu{11,12}. + - --extra-index-url=https://pypi.nvidia.com specific: - output_types: [conda, requirements, pyproject] matrices: @@ -160,6 +192,20 @@ dependencies: - matrix: # All CUDA 11 versions packages: - &cuda_python11 cuda-python>=11.7.1,<12.0a0 + - output_types: [requirements, pyproject] + matrices: + - matrix: {cuda: "12.2"} + packages: &build_pylibraft_packages_cu12 + - &rmm_cu12 rmm-cu12==23.10.* + - {matrix: {cuda: "12.1"}, packages: *build_pylibraft_packages_cu12} + - {matrix: {cuda: "12.0"}, packages: *build_pylibraft_packages_cu12} + - matrix: {cuda: "11.8"} + packages: &build_pylibraft_packages_cu11 + - &rmm_cu11 rmm-cu11==23.10.* + - {matrix: {cuda: "11.5"}, packages: *build_pylibraft_packages_cu11} + - {matrix: {cuda: "11.4"}, packages: *build_pylibraft_packages_cu11} + - {matrix: {cuda: "11.2"}, packages: *build_pylibraft_packages_cu11} + - {matrix: null, packages: [*rmm_conda] } checks: common: - output_types: [conda, requirements] @@ -167,11 +213,9 @@ dependencies: - pre-commit develop: common: - - output_types: [conda, requirements] - packages: - - clang=16.0.6 - - output_types: [conda] + - output_types: conda packages: + - clang==16.0.6 - clang-tools=16.0.6 nn_bench: common: @@ -265,6 +309,45 @@ dependencies: - *libcusolver114 - *libcusparse_dev114 - *libcusparse114 + + cupy: + common: + - output_types: conda + packages: + - cupy>=12.0.0 + specific: + - output_types: [requirements, pyproject] + matrices: + # All CUDA 12 + x86_64 versions + - matrix: {cuda: "12.2", arch: x86_64} + packages: &cupy_packages_cu12_x86_64 + - &cupy_cu12_x86_64 cupy-cuda12x>=12.0.0 + - {matrix: {cuda: "12.1", arch: x86_64}, packages: *cupy_packages_cu12_x86_64} + - {matrix: {cuda: "12.0", arch: x86_64}, packages: *cupy_packages_cu12_x86_64} + # All CUDA 12 + aarch64 versions + - matrix: {cuda: "12.2", arch: aarch64} + packages: &cupy_packages_cu12_aarch64 + - &cupy_cu12_aarch64 cupy-cuda12x -f https://pip.cupy.dev/aarch64 # TODO: Verify that this works. + - {matrix: {cuda: "12.1", arch: aarch64}, packages: *cupy_packages_cu12_aarch64} + - {matrix: {cuda: "12.0", arch: aarch64}, packages: *cupy_packages_cu12_aarch64} + + # All CUDA 11 + x86_64 versions + - matrix: {cuda: "11.8", arch: x86_64} + packages: &cupy_packages_cu11_x86_64 + - cupy-cuda11x>=12.0.0 + - {matrix: {cuda: "11.5", arch: x86_64}, packages: *cupy_packages_cu11_x86_64} + - {matrix: {cuda: "11.4", arch: x86_64}, packages: *cupy_packages_cu11_x86_64} + - {matrix: {cuda: "11.2", arch: x86_64}, packages: *cupy_packages_cu11_x86_64} + + # All CUDA 11 + aarch64 versions + - matrix: {cuda: "11.8", arch: aarch64} + packages: &cupy_packages_cu11_aarch64 + - cupy-cuda11x -f https://pip.cupy.dev/aarch64 # TODO: Verify that this works. + - {matrix: {cuda: "11.5", arch: aarch64}, packages: *cupy_packages_cu11_aarch64} + - {matrix: {cuda: "11.4", arch: aarch64}, packages: *cupy_packages_cu11_aarch64} + - {matrix: {cuda: "11.2", arch: aarch64}, packages: *cupy_packages_cu11_aarch64} + - {matrix: null, packages: [cupy-cuda11x>=12.0.0]} + test_libraft: common: - output_types: [conda] @@ -287,7 +370,7 @@ dependencies: - sphinx-markdown-tables build_wheels: common: - - output_types: pyproject + - output_types: [requirements, pyproject] packages: - wheel - setuptools @@ -311,7 +394,14 @@ dependencies: - output_types: [conda, pyproject] packages: - &numpy numpy>=1.21 - - *rmm + - output_types: [conda] + packages: + - *rmm_conda + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for cudf and rmm. + - --extra-index-url=https://pypi.nvidia.com specific: - output_types: [conda, requirements, pyproject] matrices: @@ -322,6 +412,20 @@ dependencies: - matrix: # All CUDA 11 versions packages: - *cuda_python11 + - output_types: [requirements, pyproject] + matrices: + - matrix: {cuda: "12.2"} + packages: &run_pylibraft_packages_cu12 + - *rmm_cu12 + - {matrix: {cuda: "12.1"}, packages: *run_pylibraft_packages_cu12} + - {matrix: {cuda: "12.0"}, packages: *run_pylibraft_packages_cu12} + - matrix: {cuda: "11.8"} + packages: &run_pylibraft_packages_cu11 + - *rmm_cu11 + - {matrix: {cuda: "11.5"}, packages: *run_pylibraft_packages_cu11} + - {matrix: {cuda: "11.4"}, packages: *run_pylibraft_packages_cu11} + - {matrix: {cuda: "11.2"}, packages: *run_pylibraft_packages_cu11} + - {matrix: null, packages: [*rmm_conda]} run_raft_dask: common: - output_types: [conda, pyproject] @@ -332,15 +436,37 @@ dependencies: - joblib>=0.11 - numba>=0.57 - *numpy - - ucx-py==0.34.* - output_types: conda packages: - dask-core>=2023.7.1 - ucx>=1.13.0 - ucx-proc=*=gpu + - &ucx_py_conda ucx-py==0.34.* - output_types: pyproject packages: - - pylibraft==23.10.* + - &pylibraft_conda pylibraft==23.10.* + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for cudf and rmm. + - --extra-index-url=https://pypi.nvidia.com + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: {cuda: "12.2"} + packages: &run_raft_dask_packages_cu12 + - &pylibraft_cu12 pylibraft-cu12==23.10.* + - &ucx_py_cu12 ucx-py-cu12==0.34.* + - {matrix: {cuda: "12.1"}, packages: *run_raft_dask_packages_cu12} + - {matrix: {cuda: "12.0"}, packages: *run_raft_dask_packages_cu12} + - matrix: {cuda: "11.8"} + packages: &run_raft_dask_packages_cu11 + - &pylibraft_cu11 pylibraft-cu11==23.10.* + - &ucx_py_cu11 ucx-py-cu11==0.34.* + - {matrix: {cuda: "11.5"}, packages: *run_raft_dask_packages_cu11} + - {matrix: {cuda: "11.4"}, packages: *run_raft_dask_packages_cu11} + - {matrix: {cuda: "11.2"}, packages: *run_raft_dask_packages_cu11} + - {matrix: null, packages: [*pylibraft_conda, *ucx_py_conda]} test_python_common: common: - output_types: [conda, requirements, pyproject] @@ -353,9 +479,3 @@ dependencies: packages: - scikit-learn - scipy - - output_types: conda - packages: - - cupy>=12.0.0 - - output_types: pyproject - packages: - - cupy-cuda11x>=12.0.0 diff --git a/python/raft-dask/CMakeLists.txt b/python/raft-dask/CMakeLists.txt index f9a20b46bb..af98f31857 100644 --- a/python/raft-dask/CMakeLists.txt +++ b/python/raft-dask/CMakeLists.txt @@ -17,6 +17,8 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) set(raft_dask_version 23.10.00) include(../../fetch_rapids.cmake) +include(rapids-cuda) +rapids_cuda_init_architectures(raft-dask-python) project( raft-dask-python @@ -25,7 +27,7 @@ project( # language to be enabled here. The test project that is built in scikit-build to verify # various linking options for the python library is hardcoded to build with C, so until # that is fixed we need to keep C. - C CXX + C CXX CUDA ) option(FIND_RAFT_CPP "Search for existing RAFT C++ installations before defaulting to local files" @@ -42,14 +44,6 @@ else() endif() if(NOT raft_FOUND) - # TODO: This will not be necessary once we upgrade to CMake 3.22, which will pull in the required - # languages for the C++ project even if this project does not require those languages. - include(rapids-cuda) - rapids_cuda_init_architectures(raft-dask) - enable_language(CUDA) - # Since raft-dask only enables CUDA optionally we need to manually include the file that - # rapids_cuda_init_architectures relies on `project` including. - include("${CMAKE_PROJECT_raft-dask_INCLUDE}") find_package(ucx REQUIRED) # raft-dask doesn't actually use raft libraries, it just needs the headers, so we can turn off all From e618fb07b49bf750357d4fc3c619385ad86e0a87 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 29 Sep 2023 09:20:59 -0500 Subject: [PATCH 2/5] Pin dask and distributed for 23.10 release (#1864) This PR pins `dask` and `distributed` to `2023.9.2` for `23.10` release. xref: https://github.com/rapidsai/cudf/pull/14225 Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Ray Douglass (https://github.com/raydouglass) - Peter Andreas Entschev (https://github.com/pentschev) - Ben Frederickson (https://github.com/benfred) --- ci/test_wheel_raft_dask.sh | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 6 +++--- conda/environments/all_cuda-120_arch-x86_64.yaml | 6 +++--- conda/recipes/raft-dask/meta.yaml | 6 +++--- dependencies.yaml | 6 +++--- python/raft-dask/pyproject.toml | 4 ++-- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/ci/test_wheel_raft_dask.sh b/ci/test_wheel_raft_dask.sh index 676d642de9..fd9668e968 100755 --- a/ci/test_wheel_raft_dask.sh +++ b/ci/test_wheel_raft_dask.sh @@ -12,7 +12,7 @@ RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels python -m pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl # Always install latest dask for testing -python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.10 +python -m pip install git+https://github.com/dask/dask.git@2023.9.2 git+https://github.com/dask/distributed.git@2023.9.2 git+https://github.com/rapidsai/dask-cuda.git@branch-23.10 # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install $(echo ./dist/raft_dask*.whl)[test] diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 93e8821575..739e1e9785 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -19,10 +19,10 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-core>=2023.7.1 +- dask-core==2023.9.2 - dask-cuda==23.10.* -- dask>=2023.7.1 -- distributed>=2023.7.1 +- dask==2023.9.2 +- distributed==2023.9.2 - doxygen>=1.8.20 - gcc_linux-64=11.* - gmock>=1.13.0 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 01d9bca5c2..321c17bf4f 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -20,10 +20,10 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-core>=2023.7.1 +- dask-core==2023.9.2 - dask-cuda==23.10.* -- dask>=2023.7.1 -- distributed>=2023.7.1 +- dask==2023.9.2 +- distributed==2023.9.2 - doxygen>=1.8.20 - gcc_linux-64=11.* - gmock>=1.13.0 diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml index c9caa4dd9b..04dfef5063 100644 --- a/conda/recipes/raft-dask/meta.yaml +++ b/conda/recipes/raft-dask/meta.yaml @@ -60,10 +60,10 @@ requirements: - cudatoolkit {% endif %} - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} - - dask >=2023.7.1 - - dask-core >=2023.7.1 + - dask ==2023.9.2 + - dask-core ==2023.9.2 - dask-cuda ={{ minor_version }} - - distributed >=2023.7.1 + - distributed ==2023.9.2 - joblib >=0.11 - nccl >=2.9.9 - pylibraft {{ version }} diff --git a/dependencies.yaml b/dependencies.yaml index b827f11228..3ad51a6377 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -430,15 +430,15 @@ dependencies: common: - output_types: [conda, pyproject] packages: - - dask>=2023.7.1 + - dask==2023.9.2 - dask-cuda==23.10.* - - distributed>=2023.7.1 + - distributed==2023.9.2 - joblib>=0.11 - numba>=0.57 - *numpy - output_types: conda packages: - - dask-core>=2023.7.1 + - dask-core==2023.9.2 - ucx>=1.13.0 - ucx-proc=*=gpu - &ucx_py_conda ucx-py==0.34.* diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml index bdbcf61e0f..3e0ffc2848 100644 --- a/python/raft-dask/pyproject.toml +++ b/python/raft-dask/pyproject.toml @@ -35,8 +35,8 @@ license = { text = "Apache 2.0" } requires-python = ">=3.9" dependencies = [ "dask-cuda==23.10.*", - "dask>=2023.7.1", - "distributed>=2023.7.1", + "dask==2023.9.2", + "distributed==2023.9.2", "joblib>=0.11", "numba>=0.57", "numpy>=1.21", From 1ee423b451cee386302f6d0c893f1feb902a840a Mon Sep 17 00:00:00 2001 From: tsuki <12711693+enp1s0@users.noreply.github.com> Date: Tue, 3 Oct 2023 00:39:45 +0800 Subject: [PATCH 3/5] [BUG] Fix a bug in the filtering operation in CAGRA multi-kernel (#1862) This PR fixes a bug in the filtering operations in the CAGRA multi-kernel search implementation. This bug caused the test of https://github.com/rapidsai/raft/pull/1837 to fail. Authors: - tsuki (https://github.com/enp1s0) Approvers: - Micka (https://github.com/lowener) - Corey J. Nolet (https://github.com/cjnolet) --- .../detail/cagra/search_multi_kernel.cuh | 23 +++++++++++++------ .../neighbors/detail/cagra/search_plan.cuh | 8 +++++++ .../raft/neighbors/detail/nn_descent.cuh | 3 +-- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index 5dcfcb3929..9392bde440 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -478,13 +478,15 @@ __global__ void apply_filter_kernel(INDEX_T* const result_indices_ptr, const INDEX_T query_id_offset, SAMPLE_FILTER_T sample_filter) { - const auto tid = threadIdx.x + blockIdx.x * blockDim.x; + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const auto tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid >= result_buffer_size * num_queries) { return; } const auto i = tid % result_buffer_size; const auto j = tid / result_buffer_size; const auto index = i + j * lds; - if (!sample_filter(query_id_offset + j, result_indices_ptr[index])) { + if (result_indices_ptr[index] != ~index_msb_1_mask && + !sample_filter(query_id_offset + j, result_indices_ptr[index])) { result_indices_ptr[index] = utils::get_max_value(); result_distances_ptr[index] = utils::get_max_value(); } @@ -788,12 +790,15 @@ struct search : search_plan_impl { auto result_indices_ptr = result_indices.data() + (iter & 0x1) * result_buffer_size; auto result_distances_ptr = result_distances.data() + (iter & 0x1) * result_buffer_size; - // Remove parent bit in search results - remove_parent_bit( - num_queries, itopk_size, result_indices_ptr, result_buffer_allocation_size, stream); + if constexpr (!std::is_same::value) { + // Remove parent bit in search results + remove_parent_bit(num_queries, + result_buffer_size, + result_indices.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + stream); - if (!std::is_same::value) { apply_filter( result_indices.data() + (iter & 0x1) * itopk_size, result_distances.data() + (iter & 0x1) * itopk_size, @@ -821,6 +826,10 @@ struct search : search_plan_impl { true, topk_hint.data(), stream); + } else { + // Remove parent bit in search results + remove_parent_bit( + num_queries, itopk_size, result_indices_ptr, result_buffer_allocation_size, stream); } // Copy results from working buffer to final buffer diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh index a0f346ab51..147b8b753d 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh @@ -291,6 +291,14 @@ struct search_plan_impl : public search_plan_impl_base { "`hashmap_max_fill_rate` must be equal to or greater than 0.1 and smaller than 0.9. " + std::to_string(hashmap_max_fill_rate) + " has been given."; } + if constexpr (!std::is_same::value) { + if (hashmap_mode == hash_mode::SMALL) { + error_message += "`SMALL` hash is not available when filtering"; + } else { + hashmap_mode = hash_mode::HASH; + } + } if (algo == search_algo::MULTI_CTA) { if (hashmap_mode == hash_mode::SMALL) { error_message += "`small_hash` is not available when 'search_mode' is \"multi-cta\""; diff --git a/cpp/include/raft/neighbors/detail/nn_descent.cuh b/cpp/include/raft/neighbors/detail/nn_descent.cuh index 3e4d0409bd..009ffd4684 100644 --- a/cpp/include/raft/neighbors/detail/nn_descent.cuh +++ b/cpp/include/raft/neighbors/detail/nn_descent.cuh @@ -1278,8 +1278,7 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out std::thread update_and_sample_thread(update_and_sample, it); - std::cout << "# GNND iteraton: " << it + 1 << "/" << build_config_.max_iterations << "\r"; - std::fflush(stdout); + RAFT_LOG_DEBUG("# GNND iteraton: %lu / %lu", it + 1, build_config_.max_iterations); // Reuse dists_buffer_ to save GPU memory. graph_buffer_ cannot be reused, because it // contains some information for local_join. From 120cff4bee8be9adbb46d09dfae23c3ad5b46870 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Mon, 2 Oct 2023 11:08:48 -0700 Subject: [PATCH 4/5] Add option to brute_force index to maintain reference to non-owning norms (#1865) This makes the faiss integration substantially easier, since we can just use the existing norms that have already been calculated in GpuDistanceParams::vectorNorms - rather than require an owned copy that lives in the brute force index. Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) --- .../raft/neighbors/brute_force_types.hpp | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/neighbors/brute_force_types.hpp b/cpp/include/raft/neighbors/brute_force_types.hpp index cc934b7a98..19dd6b8350 100644 --- a/cpp/include/raft/neighbors/brute_force_types.hpp +++ b/cpp/include/raft/neighbors/brute_force_types.hpp @@ -66,11 +66,11 @@ struct index : ann::index { /** Dataset norms */ [[nodiscard]] inline auto norms() const -> device_vector_view { - return make_const_mdspan(norms_.value().view()); + return norms_view_.value(); } /** Whether ot not this index has dataset norms */ - [[nodiscard]] inline bool has_norms() const noexcept { return norms_.has_value(); } + [[nodiscard]] inline bool has_norms() const noexcept { return norms_view_.has_value(); } [[nodiscard]] inline T metric_arg() const noexcept { return metric_arg_; } @@ -102,10 +102,30 @@ struct index : ann::index { norms_(std::move(norms)), metric_arg_(metric_arg) { + if (norms_) { norms_view_ = make_const_mdspan(norms_.value().view()); } update_dataset(res, dataset); resource::sync_stream(res); } + /** Construct a brute force index from dataset + * + * This class stores a non-owning reference to the dataset and norms here. + * Having precomputed norms gives us a performance advantage at query time. + */ + index(raft::resources const& res, + raft::device_matrix_view dataset_view, + std::optional> norms_view, + raft::distance::DistanceType metric, + T metric_arg = 0.0) + : ann::index(), + metric_(metric), + dataset_(make_device_matrix(res, 0, 0)), + dataset_view_(dataset_view), + norms_view_(norms_view), + metric_arg_(metric_arg) + { + } + private: /** * Replace the dataset with a new dataset. @@ -135,6 +155,7 @@ struct index : ann::index { raft::distance::DistanceType metric_; raft::device_matrix dataset_; std::optional> norms_; + std::optional> norms_view_; raft::device_matrix_view dataset_view_; T metric_arg_; }; From bf3f85ad17d469a9476cff31a53f292effd11520 Mon Sep 17 00:00:00 2001 From: Micka Date: Tue, 3 Oct 2023 14:59:25 +0200 Subject: [PATCH 5/5] [FEA] Add `bitset_filter` for CAGRA indices removal (#1837) Closes #1600. To be merged after #1803 and #1811 This PR adds `bitset_filter` to filter an index with a bitset. Authors: - Micka (https://github.com/lowener) - tsuki (https://github.com/enp1s0) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Corey J. Nolet (https://github.com/cjnolet) --- cpp/bench/prims/neighbors/cagra_bench.cuh | 52 ++++++-- cpp/include/raft/neighbors/cagra.cuh | 20 ++- cpp/include/raft/neighbors/sample_filter.cuh | 48 ++++++++ cpp/test/neighbors/ann_cagra.cuh | 114 ++++++++++++++++++ .../ann_cagra/test_float_uint32_t.cu | 6 +- .../ann_cagra/test_int8_t_uint32_t.cu | 6 +- .../ann_cagra/test_uint8_t_uint32_t.cu | 6 +- 7 files changed, 241 insertions(+), 11 deletions(-) create mode 100644 cpp/include/raft/neighbors/sample_filter.cuh diff --git a/cpp/bench/prims/neighbors/cagra_bench.cuh b/cpp/bench/prims/neighbors/cagra_bench.cuh index bb405088bb..63f6c14686 100644 --- a/cpp/bench/prims/neighbors/cagra_bench.cuh +++ b/cpp/bench/prims/neighbors/cagra_bench.cuh @@ -18,8 +18,10 @@ #include #include +#include #include #include +#include #include @@ -40,6 +42,8 @@ struct params { int block_size; int search_width; int max_iterations; + /** Ratio of removed indices. */ + double removed_ratio; }; template @@ -49,7 +53,8 @@ struct CagraBench : public fixture { params_(ps), queries_(make_device_matrix(handle, ps.n_queries, ps.n_dims)), dataset_(make_device_matrix(handle, ps.n_samples, ps.n_dims)), - knn_graph_(make_device_matrix(handle, ps.n_samples, ps.degree)) + knn_graph_(make_device_matrix(handle, ps.n_samples, ps.degree)), + removed_indices_bitset_(handle, ps.n_samples) { // Generate random dataset and queriees raft::random::RngState state{42}; @@ -74,6 +79,13 @@ struct CagraBench : public fixture { auto metric = raft::distance::DistanceType::L2Expanded; + auto removed_indices = + raft::make_device_vector(handle, ps.removed_ratio * ps.n_samples); + thrust::sequence( + resource::get_thrust_policy(handle), + thrust::device_pointer_cast(removed_indices.data_handle()), + thrust::device_pointer_cast(removed_indices.data_handle() + removed_indices.extent(0))); + removed_indices_bitset_.set(handle, removed_indices.view()); index_.emplace(raft::neighbors::cagra::index( handle, metric, make_const_mdspan(dataset_.view()), make_const_mdspan(knn_graph_.view()))); } @@ -95,10 +107,18 @@ struct CagraBench : public fixture { distances.data_handle(), params_.n_queries, params_.k); auto queries_v = make_const_mdspan(queries_.view()); - loop_on_state(state, [&]() { - raft::neighbors::cagra::search( - this->handle, search_params, *this->index_, queries_v, ind_v, dist_v); - }); + if (params_.removed_ratio > 0) { + auto filter = raft::neighbors::filtering::bitset_filter(removed_indices_bitset_.view()); + loop_on_state(state, [&]() { + raft::neighbors::cagra::search_with_filtering( + this->handle, search_params, *this->index_, queries_v, ind_v, dist_v, filter); + }); + } else { + loop_on_state(state, [&]() { + raft::neighbors::cagra::search( + this->handle, search_params, *this->index_, queries_v, ind_v, dist_v); + }); + } double data_size = params_.n_samples * params_.n_dims * sizeof(T); double graph_size = params_.n_samples * params_.degree * sizeof(IdxT); @@ -120,6 +140,7 @@ struct CagraBench : public fixture { state.counters["block_size"] = params_.block_size; state.counters["search_width"] = params_.search_width; state.counters["iterations"] = iterations; + state.counters["removed_ratio"] = params_.removed_ratio; } private: @@ -128,6 +149,7 @@ struct CagraBench : public fixture { raft::device_matrix queries_; raft::device_matrix dataset_; raft::device_matrix knn_graph_; + raft::core::bitset removed_indices_bitset_; }; inline const std::vector generate_inputs() @@ -141,7 +163,8 @@ inline const std::vector generate_inputs() {64}, // itopk_size {0}, // block_size {1}, // search_width - {0} // max_iterations + {0}, // max_iterations + {0.0} // removed_ratio ); auto inputs2 = raft::util::itertools::product({2000000ull, 10000000ull}, // n_samples {128}, // dataset dim @@ -151,7 +174,22 @@ inline const std::vector generate_inputs() {64}, // itopk_size {64, 128, 256, 512, 1024}, // block_size {1}, // search_width - {0} // max_iterations + {0}, // max_iterations + {0.0} // removed_ratio + ); + inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); + + inputs2 = raft::util::itertools::product( + {2000000ull, 10000000ull}, // n_samples + {128}, // dataset dim + {1, 10, 10000}, // n_queries + {255}, // k + {64}, // knn graph degree + {300}, // itopk_size + {256}, // block_size + {2}, // search_width + {0}, // max_iterations + {0.0, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64} // removed_ratio ); inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); return inputs; diff --git a/cpp/include/raft/neighbors/cagra.cuh b/cpp/include/raft/neighbors/cagra.cuh index f96dd34e05..f9682a973f 100644 --- a/cpp/include/raft/neighbors/cagra.cuh +++ b/cpp/include/raft/neighbors/cagra.cuh @@ -391,7 +391,25 @@ void search(raft::resources const& res, /** * @brief Search ANN using the constructed index with the given sample filter. * - * See the [cagra::build](#cagra::build) documentation for a usage example. + * Usage example: + * @code{.cpp} + * using namespace raft::neighbors; + * // use default index parameters + * cagra::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = cagra::build(res, index_params, dataset); + * // use default search parameters + * cagra::search_params search_params; + * // create a bitset to filter the search + * auto removed_indices = raft::make_device_vector(res, n_removed_indices); + * raft::core::bitset removed_indices_bitset( + * res, removed_indices.view(), dataset.extent(0)); + * // search K nearest neighbours according to a bitset + * auto neighbors = raft::make_device_matrix(res, n_queries, k); + * auto distances = raft::make_device_matrix(res, n_queries, k); + * cagra::search_with_filtering(res, search_params, index, queries, neighbors, distances, + * filtering::bitset_filter(removed_indices_bitset.view())); + * @endcode * * @tparam T data element type * @tparam IdxT type of the indices diff --git a/cpp/include/raft/neighbors/sample_filter.cuh b/cpp/include/raft/neighbors/sample_filter.cuh new file mode 100644 index 0000000000..9182d72da9 --- /dev/null +++ b/cpp/include/raft/neighbors/sample_filter.cuh @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include + +namespace raft::neighbors::filtering { +/** + * @brief Filter an index with a bitset + * + * @tparam index_t Indexing type + */ +template +struct bitset_filter { + // View of the bitset to use as a filter + const raft::core::bitset_view bitset_view_; + + bitset_filter(const raft::core::bitset_view bitset_for_filtering) + : bitset_view_{bitset_for_filtering} + { + } + inline _RAFT_HOST_DEVICE bool operator()( + // query index + const uint32_t query_ix, + // the index of the current sample + const uint32_t sample_ix) const + { + return bitset_view_.test(sample_ix); + } +}; +} // namespace raft::neighbors::filtering diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index b750372244..e6c3873063 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -525,6 +526,119 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { } } + void testCagraRemoved() + { + size_t queries_size = ps.n_queries * ps.k; + std::vector indices_Cagra(queries_size); + std::vector indices_naive(queries_size); + std::vector distances_Cagra(queries_size); + std::vector distances_naive(queries_size); + + { + rmm::device_uvector distances_naive_dev(queries_size, stream_); + rmm::device_uvector indices_naive_dev(queries_size, stream_); + auto* database_filtered_ptr = database.data() + test_cagra_sample_filter::offset * ps.dim; + naive_knn(handle_, + distances_naive_dev.data(), + indices_naive_dev.data(), + search_queries.data(), + database_filtered_ptr, + ps.n_queries, + ps.n_rows - test_cagra_sample_filter::offset, + ps.dim, + ps.k, + ps.metric); + raft::linalg::addScalar(indices_naive_dev.data(), + indices_naive_dev.data(), + IdxT(test_cagra_sample_filter::offset), + queries_size, + stream_); + update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); + update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); + resource::sync_stream(handle_); + } + + { + rmm::device_uvector distances_dev(queries_size, stream_); + rmm::device_uvector indices_dev(queries_size, stream_); + + { + cagra::index_params index_params; + index_params.metric = ps.metric; // Note: currently ony the cagra::index_params metric is + // not used for knn_graph building. + cagra::search_params search_params; + search_params.algo = ps.algo; + search_params.max_queries = ps.max_queries; + search_params.team_size = ps.team_size; + search_params.hashmap_mode = cagra::hash_mode::HASH; + + auto database_view = raft::make_device_matrix_view( + (const DataT*)database.data(), ps.n_rows, ps.dim); + + cagra::index index(handle_); + if (ps.host_dataset) { + auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); + auto database_host_view = raft::make_host_matrix_view( + (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); + index = cagra::build(handle_, index_params, database_host_view); + } else { + index = cagra::build(handle_, index_params, database_view); + } + + if (!ps.include_serialized_dataset) { index.update_dataset(handle_, database_view); } + + auto search_queries_view = raft::make_device_matrix_view( + search_queries.data(), ps.n_queries, ps.dim); + auto indices_out_view = + raft::make_device_matrix_view(indices_dev.data(), ps.n_queries, ps.k); + auto dists_out_view = raft::make_device_matrix_view( + distances_dev.data(), ps.n_queries, ps.k); + auto removed_indices = + raft::make_device_vector(handle_, test_cagra_sample_filter::offset); + thrust::sequence( + resource::get_thrust_policy(handle_), + thrust::device_pointer_cast(removed_indices.data_handle()), + thrust::device_pointer_cast(removed_indices.data_handle() + removed_indices.extent(0))); + resource::sync_stream(handle_); + raft::core::bitset removed_indices_bitset( + handle_, removed_indices.view(), ps.n_rows); + cagra::search_with_filtering( + handle_, + search_params, + index, + search_queries_view, + indices_out_view, + dists_out_view, + raft::neighbors::filtering::bitset_filter(removed_indices_bitset.view())); + update_host(distances_Cagra.data(), distances_dev.data(), queries_size, stream_); + update_host(indices_Cagra.data(), indices_dev.data(), queries_size, stream_); + resource::sync_stream(handle_); + } + + double min_recall = ps.min_recall; + EXPECT_TRUE(eval_neighbours(indices_naive, + indices_Cagra, + distances_naive, + distances_Cagra, + ps.n_queries, + ps.k, + 0.001, + min_recall)); + EXPECT_TRUE(eval_distances(handle_, + database.data(), + search_queries.data(), + indices_dev.data(), + distances_dev.data(), + ps.n_rows, + ps.dim, + ps.n_queries, + ps.k, + ps.metric, + 1.0e-4)); + } + } + void SetUp() override { database.resize(((size_t)ps.n_rows) * ps.dim, stream_); diff --git a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu index 01d7e1e1ea..944c2cbc89 100644 --- a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu +++ b/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu @@ -27,7 +27,11 @@ typedef AnnCagraSortTest AnnCagraSortTestF_U32; TEST_P(AnnCagraSortTestF_U32, AnnCagraSort) { this->testCagraSort(); } typedef AnnCagraFilterTest AnnCagraFilterTestF_U32; -TEST_P(AnnCagraFilterTestF_U32, AnnCagraFilter) { this->testCagraFilter(); } +TEST_P(AnnCagraFilterTestF_U32, AnnCagraFilter) +{ + this->testCagraFilter(); + this->testCagraRemoved(); +} INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestF_U32, ::testing::ValuesIn(inputs)); INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestF_U32, ::testing::ValuesIn(inputs)); diff --git a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu index ee06d369fa..3d9dc76953 100644 --- a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu +++ b/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu @@ -25,7 +25,11 @@ TEST_P(AnnCagraTestI8_U32, AnnCagra) { this->testCagra(); } typedef AnnCagraSortTest AnnCagraSortTestI8_U32; TEST_P(AnnCagraSortTestI8_U32, AnnCagraSort) { this->testCagraSort(); } typedef AnnCagraFilterTest AnnCagraFilterTestI8_U32; -TEST_P(AnnCagraFilterTestI8_U32, AnnCagraFilter) { this->testCagraFilter(); } +TEST_P(AnnCagraFilterTestI8_U32, AnnCagraFilter) +{ + this->testCagraFilter(); + this->testCagraRemoved(); +} INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestI8_U32, ::testing::ValuesIn(inputs)); INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestI8_U32, ::testing::ValuesIn(inputs)); diff --git a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu index 3243e73ccd..c5b1b1704b 100644 --- a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu +++ b/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu @@ -27,7 +27,11 @@ typedef AnnCagraSortTest AnnCagraSortTestU8_ TEST_P(AnnCagraSortTestU8_U32, AnnCagraSort) { this->testCagraSort(); } typedef AnnCagraFilterTest AnnCagraFilterTestU8_U32; -TEST_P(AnnCagraFilterTestU8_U32, AnnCagraSort) { this->testCagraFilter(); } +TEST_P(AnnCagraFilterTestU8_U32, AnnCagraSort) +{ + this->testCagraFilter(); + this->testCagraRemoved(); +} INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestU8_U32, ::testing::ValuesIn(inputs)); INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestU8_U32, ::testing::ValuesIn(inputs));