From 00d1ececa78dc1399381c97729e2166af3d5d71e Mon Sep 17 00:00:00 2001 From: Rui Lan Date: Fri, 8 Dec 2023 11:47:07 -0800 Subject: [PATCH 01/14] Add subsample support for PQ codebook generation. More benchmark needed. --- .../raft/neighbors/detail/ivf_pq_build.cuh | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index 6adc4d583c..0012f3a9d2 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -401,8 +401,11 @@ void train_per_subset(raft::resources const& handle, auto device_memory = resource::get_workspace_resource(handle); rmm::device_uvector pq_centers_tmp(index.pq_centers().size(), stream, device_memory); - rmm::device_uvector sub_trainset(n_rows * size_t(index.pq_len()), stream, device_memory); - rmm::device_uvector sub_labels(n_rows, stream, device_memory); + // Subsampling the train set for codebook generation. Using similar subsampling strategy as train_per_cluster + size_t big_enough = 256ul * std::max(index.pq_book_size(), index.pq_dim()); + auto pq_n_rows = uint32_t(std::min(big_enough, n_rows)); + rmm::device_uvector sub_trainset(pq_n_rows * size_t(index.pq_len()), stream, device_memory); + rmm::device_uvector sub_labels(pq_n_rows, stream, device_memory); rmm::device_uvector pq_cluster_sizes(index.pq_book_size(), stream, device_memory); @@ -413,7 +416,7 @@ void train_per_subset(raft::resources const& handle, // Get the rotated cluster centers for each training vector. // This will be subtracted from the input vectors afterwards. utils::copy_selected( - n_rows, + pq_n_rows, index.pq_len(), index.centers_rot().data_handle() + index.pq_len() * j, labels, @@ -429,7 +432,7 @@ void train_per_subset(raft::resources const& handle, true, false, index.pq_len(), - n_rows, + pq_n_rows, index.dim(), &alpha, index.rotation_matrix().data_handle() + index.dim() * index.pq_len() * j, @@ -443,12 +446,12 @@ void train_per_subset(raft::resources const& handle, // train PQ codebook for this subspace auto sub_trainset_view = - raft::make_device_matrix_view(sub_trainset.data(), n_rows, index.pq_len()); + raft::make_device_matrix_view(sub_trainset.data(), pq_n_rows, index.pq_len()); auto centers_tmp_view = raft::make_device_matrix_view( pq_centers_tmp.data() + index.pq_book_size() * index.pq_len() * j, index.pq_book_size(), index.pq_len()); - auto sub_labels_view = raft::make_device_vector_view(sub_labels.data(), n_rows); + auto sub_labels_view = raft::make_device_vector_view(sub_labels.data(), pq_n_rows); auto cluster_sizes_view = raft::make_device_vector_view(pq_cluster_sizes.data(), index.pq_book_size()); raft::cluster::kmeans_balanced_params kmeans_params; From 7332a274749f715357e726675e0b33650fdf6015 Mon Sep 17 00:00:00 2001 From: Rui Lan Date: Fri, 5 Jan 2024 17:18:02 -0800 Subject: [PATCH 02/14] Add knob to control the amount of PQ codebook training subsampling. --- .../ann/src/raft/raft_ann_bench_param_parser.h | 2 ++ .../raft/neighbors/detail/ivf_pq_build.cuh | 7 ++++--- cpp/include/raft/neighbors/ivf_pq_types.hpp | 6 ++++++ .../pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd | 3 ++- .../pylibraft/neighbors/ivf_pq/ivf_pq.pyx | 15 ++++++++++++++- 5 files changed, 28 insertions(+), 5 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h index 2a021a8a12..c88cee71fc 100644 --- a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h @@ -86,6 +86,8 @@ void parse_build_param(const nlohmann::json& conf, throw std::runtime_error("codebook_kind: '" + kind + "', should be either 'cluster' or 'subspace'"); } + if (conf.contains("pq_codebook_ratio")) { + param.pq_codebook_trainset_fraction = 1.0 / (double)conf.at("pq_codebook_ratio"); } } } diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index 0012f3a9d2..afd5f73db5 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -395,15 +395,15 @@ void train_per_subset(raft::resources const& handle, const float* trainset, // [n_rows, dim] const uint32_t* labels, // [n_rows] uint32_t kmeans_n_iters, + double pq_codebook_trainset_fraction, rmm::mr::device_memory_resource* managed_memory) { auto stream = resource::get_cuda_stream(handle); auto device_memory = resource::get_workspace_resource(handle); rmm::device_uvector pq_centers_tmp(index.pq_centers().size(), stream, device_memory); - // Subsampling the train set for codebook generation. Using similar subsampling strategy as train_per_cluster - size_t big_enough = 256ul * std::max(index.pq_book_size(), index.pq_dim()); - auto pq_n_rows = uint32_t(std::min(big_enough, n_rows)); + // Subsampling the train set for codebook generation based on pq_codebook_trainset_fraction. + auto pq_n_rows = uint32_t(n_rows * pq_codebook_trainset_fraction); rmm::device_uvector sub_trainset(pq_n_rows * size_t(index.pq_len()), stream, device_memory); rmm::device_uvector sub_labels(pq_n_rows, stream, device_memory); @@ -1861,6 +1861,7 @@ auto build(raft::resources const& handle, trainset.data(), labels.data(), params.kmeans_n_iters, + params.pq_codebook_trainset_fraction, &managed_memory_upstream); break; case codebook_gen::PER_CLUSTER: diff --git a/cpp/include/raft/neighbors/ivf_pq_types.hpp b/cpp/include/raft/neighbors/ivf_pq_types.hpp index 45ab18c84f..8340bfde67 100644 --- a/cpp/include/raft/neighbors/ivf_pq_types.hpp +++ b/cpp/include/raft/neighbors/ivf_pq_types.hpp @@ -105,6 +105,12 @@ struct index_params : ann::index_params { * flag to `true` if you prefer to use as little GPU memory for the database as possible. */ bool conservative_memory_allocation = false; + /** + * The fraction of data to use during PQ codebook generation on top of the subsampled data + * controlled by kmeans_trainset_fraction. The parameter is only used when PQ codebook generation + * kind is PER_SUBSPACE and ignored when PQ codebook generation kind is PER_CLUSTER. + */ + double pq_codebook_trainset_fraction = 1; }; struct search_params : ann::search_params { diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd index 531c2428e9..81f96df9cd 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd @@ -78,7 +78,8 @@ cdef extern from "raft/neighbors/ivf_pq_types.hpp" \ codebook_gen codebook_kind bool force_random_rotation bool conservative_memory_allocation - + double pq_codebook_trainset_fraction + cdef cppclass index[IdxT](ann_index): index(const device_resources& handle, DistanceType metric, diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx index 0c1bbf6b9c..5b0675d069 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx @@ -156,6 +156,13 @@ cdef class IndexParams: repeated calls to `extend` (extending the database). To disable this behavior and use as little GPU memory for the database as possible, set this flat to `True`. + pq_codebook_trainset_fraction : int, default = 0.5 + If pq_codebook_trainset_fraction is less than 1, then the dataset is + subsampled for PQ codebook generation, and only n_samples * + pq_codebook_trainset_fraction rows are used for PQ codebook generation. + This subsampling is applied after kmeans_trainset subsampling, + controlled by kmeans_trainset_fraction and only used when codebook_kind + is PER_SUBSPACE. """ def __init__(self, *, n_lists=1024, @@ -167,7 +174,8 @@ cdef class IndexParams: codebook_kind="subspace", force_random_rotation=False, add_data_on_build=True, - conservative_memory_allocation=False): + conservative_memory_allocation=False, + pq_codebook_trainset_fraction=0.5): self.params.n_lists = n_lists self.params.metric = _get_metric(metric) self.params.metric_arg = 0 @@ -185,6 +193,8 @@ cdef class IndexParams: self.params.add_data_on_build = add_data_on_build self.params.conservative_memory_allocation = \ conservative_memory_allocation + self.params.pq_codebook_trainset_fraction = \ + pq_codebook_trainset_fraction @property def n_lists(self): @@ -226,6 +236,9 @@ cdef class IndexParams: def conservative_memory_allocation(self): return self.params.conservative_memory_allocation + @property + def pq_codebook_trainset_fraction(self): + return self.params.pq_codebook_trainset_fraction cdef class Index: # We store a pointer to the index because it dose not have a trivial From 3153f0e6607c703dc927e3a05167b2aca08405d9 Mon Sep 17 00:00:00 2001 From: Rui Lan Date: Fri, 12 Jan 2024 16:56:52 -0800 Subject: [PATCH 03/14] Fix if-statement dependency issue in parse build parameter. --- cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h index c88cee71fc..1b876ea147 100644 --- a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h @@ -86,8 +86,9 @@ void parse_build_param(const nlohmann::json& conf, throw std::runtime_error("codebook_kind: '" + kind + "', should be either 'cluster' or 'subspace'"); } - if (conf.contains("pq_codebook_ratio")) { - param.pq_codebook_trainset_fraction = 1.0 / (double)conf.at("pq_codebook_ratio"); } + } + if (conf.contains("pq_codebook_ratio")) { + param.pq_codebook_trainset_fraction = 1.0 / (double)conf.at("pq_codebook_ratio"); } } From a03f0af8adcafafe7427d4baf05c26fe6ae994eb Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Thu, 18 Jan 2024 14:51:02 -0500 Subject: [PATCH 04/14] DOC v24.04 Updates [skip ci] --- .../cuda11.8-conda/devcontainer.json | 4 +-- .devcontainer/cuda11.8-pip/devcontainer.json | 6 ++--- .../cuda12.0-conda/devcontainer.json | 4 +-- .devcontainer/cuda12.0-pip/devcontainer.json | 6 ++--- .github/workflows/build.yaml | 16 ++++++------ .github/workflows/pr.yaml | 26 +++++++++---------- .github/workflows/test.yaml | 10 +++---- README.md | 2 +- VERSION | 2 +- ci/build_docs.sh | 2 +- .../all_cuda-118_arch-aarch64.yaml | 8 +++--- .../all_cuda-118_arch-x86_64.yaml | 8 +++--- .../all_cuda-120_arch-aarch64.yaml | 8 +++--- .../all_cuda-120_arch-x86_64.yaml | 8 +++--- .../bench_ann_cuda-118_arch-aarch64.yaml | 2 +- .../bench_ann_cuda-118_arch-x86_64.yaml | 2 +- .../bench_ann_cuda-120_arch-aarch64.yaml | 2 +- .../bench_ann_cuda-120_arch-x86_64.yaml | 2 +- .../recipes/raft-dask/conda_build_config.yaml | 2 +- cpp/CMakeLists.txt | 4 +-- cpp/doxygen/Doxyfile | 2 +- .../cmake/thirdparty/fetch_rapids.cmake | 2 +- dependencies.yaml | 24 ++++++++--------- docs/source/build.md | 2 +- docs/source/conf.py | 4 +-- docs/source/developer_guide.md | 8 +++--- docs/source/raft_ann_benchmarks.md | 12 ++++----- fetch_rapids.cmake | 2 +- python/pylibraft/CMakeLists.txt | 2 +- python/pylibraft/pyproject.toml | 4 +-- python/raft-ann-bench/pyproject.toml | 2 +- python/raft-dask/CMakeLists.txt | 2 +- python/raft-dask/pyproject.toml | 8 +++--- 33 files changed, 99 insertions(+), 99 deletions(-) diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index 4c2161cab7..2682510ed1 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,12 +5,12 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04" } }, "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.2": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index 8bbc016620..f96fef205f 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,13 +5,13 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda11.8-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda11.8-ubuntu22.04" } }, "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/ucx:24.2": {"version": "1.14.1"}, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.2": {} + "ghcr.io/rapidsai/devcontainers/features/ucx:24.4": {"version": "1.14.1"}, + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/ucx", diff --git a/.devcontainer/cuda12.0-conda/devcontainer.json b/.devcontainer/cuda12.0-conda/devcontainer.json index ebb101d9d5..7ce4ea20d3 100644 --- a/.devcontainer/cuda12.0-conda/devcontainer.json +++ b/.devcontainer/cuda12.0-conda/devcontainer.json @@ -5,12 +5,12 @@ "args": { "CUDA": "12.0", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.02-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.04-cpp-mambaforge-ubuntu22.04" } }, "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.2": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.0-pip/devcontainer.json b/.devcontainer/cuda12.0-pip/devcontainer.json index 3e2efb2f37..4f97af830a 100644 --- a/.devcontainer/cuda12.0-pip/devcontainer.json +++ b/.devcontainer/cuda12.0-pip/devcontainer.json @@ -5,13 +5,13 @@ "args": { "CUDA": "12.0", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda12.0-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda12.0-ubuntu22.04" } }, "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/ucx:24.2": {"version": "1.14.1"}, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.2": {} + "ghcr.io/rapidsai/devcontainers/features/ucx:24.4": {"version": "1.14.1"}, + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/ucx", diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 4c6e3d0ed4..bd8b13d21e 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: wheel-publish-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -89,7 +89,7 @@ jobs: wheel-build-raft-dask: needs: wheel-publish-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -99,7 +99,7 @@ jobs: wheel-publish-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 19b990c8e9..889eef2a2c 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -25,29 +25,29 @@ jobs: - wheel-tests-raft-dask - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.04 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.04 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04 with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.04 with: build_type: pull-request enable_check_symbols: true @@ -55,19 +55,19 @@ jobs: conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -77,34 +77,34 @@ jobs: wheel-build-pylibraft: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 with: build_type: pull-request script: ci/build_wheel_pylibraft.sh wheel-tests-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 with: build_type: pull-request script: ci/test_wheel_pylibraft.sh wheel-build-raft-dask: needs: wheel-tests-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 with: build_type: pull-request script: "ci/build_wheel_raft_dask.sh" wheel-tests-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 with: build_type: pull-request script: ci/test_wheel_raft_dask.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.04 with: build_command: | sccache -z; diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 138fd0e300..24a9954b96 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.04 with: build_type: nightly branch: ${{ inputs.branch }} @@ -26,7 +26,7 @@ jobs: symbol_exclusions: (void (thrust::|cub::)|_ZN\d+raft_cutlass) conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04 with: build_type: nightly branch: ${{ inputs.branch }} @@ -34,7 +34,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 with: build_type: nightly branch: ${{ inputs.branch }} @@ -42,7 +42,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 with: build_type: nightly branch: ${{ inputs.branch }} @@ -51,7 +51,7 @@ jobs: script: ci/test_wheel_pylibraft.sh wheel-tests-raft-dask: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/README.md b/README.md index 26ddc30ed4..55b8fca8f6 100755 --- a/README.md +++ b/README.md @@ -287,7 +287,7 @@ You can also install the conda packages individually using the `mamba` command a mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.0 ``` -If installing the C++ APIs please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.02/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. +If installing the C++ APIs please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.04/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. ### Installing Python through Pip diff --git a/VERSION b/VERSION index 3c6c5e2b70..4a2fe8aa57 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.02.00 +24.04.00 diff --git a/ci/build_docs.sh b/ci/build_docs.sh index b7c8c7a3e0..4c07683642 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -28,7 +28,7 @@ rapids-mamba-retry install \ pylibraft \ raft-dask -export RAPIDS_VERSION_NUMBER="24.02" +export RAPIDS_VERSION_NUMBER="24.04" export RAPIDS_DOCS_DIR="$(mktemp -d)" rapids-logger "Build CPP docs" diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index ac076f5505..96c10a763a 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -20,7 +20,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.2.* +- dask-cuda==24.4.* - doxygen>=1.8.20 - gcc_linux-aarch64=11.* - gmock>=1.13.0 @@ -46,9 +46,9 @@ dependencies: - pydata-sphinx-theme - pytest - pytest-cov -- rapids-dask-dependency==24.2.* +- rapids-dask-dependency==24.4.* - recommonmark -- rmm==24.2.* +- rmm==24.4.* - scikit-build-core>=0.7.0 - scikit-learn - scipy @@ -56,6 +56,6 @@ dependencies: - sphinx-markdown-tables - sysroot_linux-aarch64==2.17 - ucx-proc=*=gpu -- ucx-py==0.36.* +- ucx-py==0.37.* - ucx>=1.13.0 name: all_cuda-118_arch-aarch64 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index b3ded51bb5..aea5bd1961 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -20,7 +20,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.2.* +- dask-cuda==24.4.* - doxygen>=1.8.20 - gcc_linux-64=11.* - gmock>=1.13.0 @@ -46,9 +46,9 @@ dependencies: - pydata-sphinx-theme - pytest - pytest-cov -- rapids-dask-dependency==24.2.* +- rapids-dask-dependency==24.4.* - recommonmark -- rmm==24.2.* +- rmm==24.4.* - scikit-build-core>=0.7.0 - scikit-learn - scipy @@ -56,6 +56,6 @@ dependencies: - sphinx-markdown-tables - sysroot_linux-64==2.17 - ucx-proc=*=gpu -- ucx-py==0.36.* +- ucx-py==0.37.* - ucx>=1.13.0 name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-120_arch-aarch64.yaml b/conda/environments/all_cuda-120_arch-aarch64.yaml index c0eede1389..0ea4a979d6 100644 --- a/conda/environments/all_cuda-120_arch-aarch64.yaml +++ b/conda/environments/all_cuda-120_arch-aarch64.yaml @@ -21,7 +21,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.2.* +- dask-cuda==24.4.* - doxygen>=1.8.20 - gcc_linux-aarch64=11.* - gmock>=1.13.0 @@ -42,9 +42,9 @@ dependencies: - pydata-sphinx-theme - pytest - pytest-cov -- rapids-dask-dependency==24.2.* +- rapids-dask-dependency==24.4.* - recommonmark -- rmm==24.2.* +- rmm==24.4.* - scikit-build-core>=0.7.0 - scikit-learn - scipy @@ -52,6 +52,6 @@ dependencies: - sphinx-markdown-tables - sysroot_linux-aarch64==2.17 - ucx-proc=*=gpu -- ucx-py==0.36.* +- ucx-py==0.37.* - ucx>=1.13.0 name: all_cuda-120_arch-aarch64 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index cebaf96493..48a463bc6f 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -21,7 +21,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.2.* +- dask-cuda==24.4.* - doxygen>=1.8.20 - gcc_linux-64=11.* - gmock>=1.13.0 @@ -42,9 +42,9 @@ dependencies: - pydata-sphinx-theme - pytest - pytest-cov -- rapids-dask-dependency==24.2.* +- rapids-dask-dependency==24.4.* - recommonmark -- rmm==24.2.* +- rmm==24.4.* - scikit-build-core>=0.7.0 - scikit-learn - scipy @@ -52,6 +52,6 @@ dependencies: - sphinx-markdown-tables - sysroot_linux-64==2.17 - ucx-proc=*=gpu -- ucx-py==0.36.* +- ucx-py==0.37.* - ucx>=1.13.0 name: all_cuda-120_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index 87e3942e6a..0e0385ceeb 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -38,7 +38,7 @@ dependencies: - openblas - pandas - pyyaml -- rmm==24.2.* +- rmm==24.4.* - scikit-build-core>=0.7.0 - sysroot_linux-aarch64==2.17 name: bench_ann_cuda-118_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index a4ac253a85..dfe76a2948 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -38,7 +38,7 @@ dependencies: - openblas - pandas - pyyaml -- rmm==24.2.* +- rmm==24.4.* - scikit-build-core>=0.7.0 - sysroot_linux-64==2.17 name: bench_ann_cuda-118_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml index 9ef9799363..0a6567c646 100644 --- a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml @@ -34,7 +34,7 @@ dependencies: - openblas - pandas - pyyaml -- rmm==24.2.* +- rmm==24.4.* - scikit-build-core>=0.7.0 - sysroot_linux-aarch64==2.17 name: bench_ann_cuda-120_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml index 5fa09096ba..a89d5317b6 100644 --- a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml @@ -34,7 +34,7 @@ dependencies: - openblas - pandas - pyyaml -- rmm==24.2.* +- rmm==24.4.* - scikit-build-core>=0.7.0 - sysroot_linux-64==2.17 name: bench_ann_cuda-120_arch-x86_64 diff --git a/conda/recipes/raft-dask/conda_build_config.yaml b/conda/recipes/raft-dask/conda_build_config.yaml index c12c35af3b..483e53026a 100644 --- a/conda/recipes/raft-dask/conda_build_config.yaml +++ b/conda/recipes/raft-dask/conda_build_config.yaml @@ -17,7 +17,7 @@ ucx_version: - ">=1.14.1,<1.16.0" ucx_py_version: - - "0.36.*" + - "0.37.*" cmake_version: - ">=3.26.4" diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index dbce46abfe..1d01d71735 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -10,8 +10,8 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express # or implied. See the License for the specific language governing permissions and limitations under # the License. -set(RAPIDS_VERSION "24.02") -set(RAFT_VERSION "24.02.00") +set(RAPIDS_VERSION "24.04") +set(RAFT_VERSION "24.04.00") cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) include(../fetch_rapids.cmake) diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile index 3eb0763eaf..779472d880 100644 --- a/cpp/doxygen/Doxyfile +++ b/cpp/doxygen/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = "RAFT C++ API" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = "24.02" +PROJECT_NUMBER = "24.04" # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/cpp/template/cmake/thirdparty/fetch_rapids.cmake b/cpp/template/cmake/thirdparty/fetch_rapids.cmake index 15b6c43a6f..aadfdb0028 100644 --- a/cpp/template/cmake/thirdparty/fetch_rapids.cmake +++ b/cpp/template/cmake/thirdparty/fetch_rapids.cmake @@ -12,7 +12,7 @@ # the License. # Use this variable to update RAPIDS and RAFT versions -set(RAPIDS_VERSION "24.02") +set(RAPIDS_VERSION "24.04") if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake diff --git a/dependencies.yaml b/dependencies.yaml index 0e4d6d4693..37404a1e37 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -180,7 +180,7 @@ dependencies: common: - output_types: [conda] packages: - - &rmm_conda rmm==24.2.* + - &rmm_conda rmm==24.4.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -201,12 +201,12 @@ dependencies: matrices: - matrix: {cuda: "12.2"} packages: &build_pylibraft_packages_cu12 - - &rmm_cu12 rmm-cu12==24.2.* + - &rmm_cu12 rmm-cu12==24.4.* - {matrix: {cuda: "12.1"}, packages: *build_pylibraft_packages_cu12} - {matrix: {cuda: "12.0"}, packages: *build_pylibraft_packages_cu12} - matrix: {cuda: "11.8"} packages: &build_pylibraft_packages_cu11 - - &rmm_cu11 rmm-cu11==24.2.* + - &rmm_cu11 rmm-cu11==24.4.* - {matrix: {cuda: "11.5"}, packages: *build_pylibraft_packages_cu11} - {matrix: {cuda: "11.4"}, packages: *build_pylibraft_packages_cu11} - {matrix: {cuda: "11.2"}, packages: *build_pylibraft_packages_cu11} @@ -459,20 +459,20 @@ dependencies: common: - output_types: [conda, pyproject] packages: - - dask-cuda==24.2.* + - dask-cuda==24.4.* - joblib>=0.11 - numba>=0.57 - *numpy - - rapids-dask-dependency==24.2.* - - ucx-py==0.36.* + - rapids-dask-dependency==24.4.* + - ucx-py==0.37.* - output_types: conda packages: - ucx>=1.13.0 - ucx-proc=*=gpu - - &ucx_py_conda ucx-py==0.36.* + - &ucx_py_conda ucx-py==0.37.* - output_types: pyproject packages: - - &pylibraft_conda pylibraft==24.2.* + - &pylibraft_conda pylibraft==24.4.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -484,14 +484,14 @@ dependencies: matrices: - matrix: {cuda: "12.2"} packages: &run_raft_dask_packages_cu12 - - &pylibraft_cu12 pylibraft-cu12==24.2.* - - &ucx_py_cu12 ucx-py-cu12==0.36.* + - &pylibraft_cu12 pylibraft-cu12==24.4.* + - &ucx_py_cu12 ucx-py-cu12==0.37.* - {matrix: {cuda: "12.1"}, packages: *run_raft_dask_packages_cu12} - {matrix: {cuda: "12.0"}, packages: *run_raft_dask_packages_cu12} - matrix: {cuda: "11.8"} packages: &run_raft_dask_packages_cu11 - - &pylibraft_cu11 pylibraft-cu11==24.2.* - - &ucx_py_cu11 ucx-py-cu11==0.36.* + - &pylibraft_cu11 pylibraft-cu11==24.4.* + - &ucx_py_cu11 ucx-py-cu11==0.37.* - {matrix: {cuda: "11.5"}, packages: *run_raft_dask_packages_cu11} - {matrix: {cuda: "11.4"}, packages: *run_raft_dask_packages_cu11} - {matrix: {cuda: "11.2"}, packages: *run_raft_dask_packages_cu11} diff --git a/docs/source/build.md b/docs/source/build.md index ae7734d0ed..e76512d16f 100644 --- a/docs/source/build.md +++ b/docs/source/build.md @@ -56,7 +56,7 @@ You can also install the conda packages individually using the `mamba` command a mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.0 ``` -If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.02/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. +If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.04/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. ## Installing Python through Pip diff --git a/docs/source/conf.py b/docs/source/conf.py index 2a2c700926..07dd4825fa 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -67,9 +67,9 @@ # built documents. # # The short X.Y version. -version = '24.02' +version = '24.04' # The full version, including alpha/beta/rc tags. -release = '24.02.00' +release = '24.04.00' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md index c5bcd03f69..d29130add0 100644 --- a/docs/source/developer_guide.md +++ b/docs/source/developer_guide.md @@ -187,7 +187,7 @@ RAFT relies on `clang-format` to enforce code style across all C++ and CUDA sour 1. Do not split empty functions/records/namespaces. 2. Two-space indentation everywhere, including the line continuations. 3. Disable reflowing of comments. - The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.02/cpp/.clang-format). + The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/.clang-format). [`doxygen`](https://doxygen.nl/) is used as documentation generator and also as a documentation linter. In order to run doxygen as a linter on C++/CUDA code, run @@ -205,7 +205,7 @@ you can run `codespell -i 3 -w .` from the repository root directory. This will bring up an interactive prompt to select which spelling fixes to apply. ### #include style -[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.02/cpp/scripts/include_checker.py) is used to enforce the include style as follows: +[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/scripts/include_checker.py) is used to enforce the include style as follows: 1. `#include "..."` should be used for referencing local files only. It is acceptable to be used for referencing files in a sub-folder/parent-folder of the same algorithm, but should never be used to include files in other algorithms or between algorithms and the primitives or other dependencies. 2. `#include <...>` should be used for referencing everything else @@ -215,7 +215,7 @@ python ./cpp/scripts/include_checker.py --inplace [cpp/include cpp/test ... list ``` ### Copyright header -[copyright.py](https://github.com/rapidsai/raft/blob/branch-24.02/ci/checks/copyright.py) checks the Copyright header for all git-modified files +[copyright.py](https://github.com/rapidsai/raft/blob/branch-24.04/ci/checks/copyright.py) checks the Copyright header for all git-modified files Manually, you can run the following to bulk-fix the header if only the years need to be updated: ```bash @@ -229,7 +229,7 @@ Call CUDA APIs via the provided helper macros `RAFT_CUDA_TRY`, `RAFT_CUBLAS_TRY` ## Logging ### Introduction -Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.02/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. +Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. ### Usage ```cpp diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md index 3789c3f01f..68fe80f9ce 100644 --- a/docs/source/raft_ann_benchmarks.md +++ b/docs/source/raft_ann_benchmarks.md @@ -62,7 +62,7 @@ Nightly images are located in [dockerhub](https://hub.docker.com/r/rapidsai/raft - The following command pulls the nightly container for python version 10, cuda version 12, and RAFT version 23.10: ```bash -docker pull rapidsai/raft-ann-bench:24.02a-cuda12.0-py3.10 #substitute raft-ann-bench for the exact desired container. +docker pull rapidsai/raft-ann-bench:24.04a-cuda12.0-py3.10 #substitute raft-ann-bench for the exact desired container. ``` The CUDA and python versions can be changed for the supported values: @@ -83,7 +83,7 @@ You can see the exact versions as well in the dockerhub site: [//]: # () [//]: # (```bash) -[//]: # (docker pull nvcr.io/nvidia/rapidsai/raft-ann-bench:24.02-cuda11.8-py3.10 #substitute raft-ann-bench for the exact desired container.) +[//]: # (docker pull nvcr.io/nvidia/rapidsai/raft-ann-bench:24.04-cuda11.8-py3.10 #substitute raft-ann-bench for the exact desired container.) [//]: # (```) @@ -344,7 +344,7 @@ For GPU-enabled systems, the `DATA_FOLDER` variable should be a local folder whe export DATA_FOLDER=path/to/store/datasets/and/results docker run --gpus all --rm -it -u $(id -u) \ -v $DATA_FOLDER:/data/benchmarks \ - rapidsai/raft-ann-bench:24.02a-cuda11.8-py3.10 \ + rapidsai/raft-ann-bench:24.04a-cuda11.8-py3.10 \ "--dataset deep-image-96-angular" \ "--normalize" \ "--algorithms raft_cagra,raft_ivf_pq --batch-size 10 -k 10" \ @@ -355,7 +355,7 @@ Usage of the above command is as follows: | Argument | Description | |-----------------------------------------------------------|----------------------------------------------------------------------------------------------------| -| `rapidsai/raft-ann-bench:24.02a-cuda11.8-py3.10` | Image to use. Can be either `raft-ann-bench` or `raft-ann-bench-datasets` | +| `rapidsai/raft-ann-bench:24.04a-cuda11.8-py3.10` | Image to use. Can be either `raft-ann-bench` or `raft-ann-bench-datasets` | | `"--dataset deep-image-96-angular"` | Dataset name | | `"--normalize"` | Whether to normalize the dataset | | `"--algorithms raft_cagra,hnswlib --batch-size 10 -k 10"` | Arguments passed to the `run` script, such as the algorithms to benchmark, the batch size, and `k` | @@ -372,7 +372,7 @@ The container arguments in the above section also be used for the CPU-only conta export DATA_FOLDER=path/to/store/datasets/and/results docker run --rm -it -u $(id -u) \ -v $DATA_FOLDER:/data/benchmarks \ - rapidsai/raft-ann-bench-cpu:24.02a-py3.10 \ + rapidsai/raft-ann-bench-cpu:24.04a-py3.10 \ "--dataset deep-image-96-angular" \ "--normalize" \ "--algorithms hnswlib --batch-size 10 -k 10" \ @@ -389,7 +389,7 @@ docker run --gpus all --rm -it -u $(id -u) \ --entrypoint /bin/bash \ --workdir /data/benchmarks \ -v $DATA_FOLDER:/data/benchmarks \ - rapidsai/raft-ann-bench:24.02a-cuda11.8-py3.10 + rapidsai/raft-ann-bench:24.04a-cuda11.8-py3.10 ``` This will drop you into a command line in the container, with the `raft-ann-bench` python package ready to use, as described in the [Running the benchmarks](#running-the-benchmarks) section above: diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake index ca871c5759..1dca136c97 100644 --- a/fetch_rapids.cmake +++ b/fetch_rapids.cmake @@ -12,7 +12,7 @@ # the License. # ============================================================================= if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) - file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.02/RAPIDS.cmake + file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.04/RAPIDS.cmake ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake ) endif() diff --git a/python/pylibraft/CMakeLists.txt b/python/pylibraft/CMakeLists.txt index f7e114ae66..c17243728e 100644 --- a/python/pylibraft/CMakeLists.txt +++ b/python/pylibraft/CMakeLists.txt @@ -16,7 +16,7 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) include(../../fetch_rapids.cmake) -set(pylibraft_version 24.02.00) +set(pylibraft_version 24.04.00) # We always need CUDA for pylibraft because the raft dependency brings in a header-only cuco # dependency that enables CUDA unconditionally. diff --git a/python/pylibraft/pyproject.toml b/python/pylibraft/pyproject.toml index 5070d6cf6f..ba45a6d33d 100644 --- a/python/pylibraft/pyproject.toml +++ b/python/pylibraft/pyproject.toml @@ -19,7 +19,7 @@ requires = [ "cuda-python>=11.7.1,<12.0a0", "cython>=3.0.0", "ninja", - "rmm==24.2.*", + "rmm==24.4.*", "scikit-build-core[pyproject]>=0.7.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. build-backend = "scikit_build_core.build" @@ -37,7 +37,7 @@ requires-python = ">=3.9" dependencies = [ "cuda-python>=11.7.1,<12.0a0", "numpy>=1.21", - "rmm==24.2.*", + "rmm==24.4.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", diff --git a/python/raft-ann-bench/pyproject.toml b/python/raft-ann-bench/pyproject.toml index e3ae36ef62..4a185b22ca 100644 --- a/python/raft-ann-bench/pyproject.toml +++ b/python/raft-ann-bench/pyproject.toml @@ -9,7 +9,7 @@ requires = [ [project] name = "raft-ann-bench" -version = "24.02.00" +version = "24.04.00" description = "RAFT ANN benchmarks" authors = [ { name = "NVIDIA Corporation" }, diff --git a/python/raft-dask/CMakeLists.txt b/python/raft-dask/CMakeLists.txt index 1d27e49583..ff441e343e 100644 --- a/python/raft-dask/CMakeLists.txt +++ b/python/raft-dask/CMakeLists.txt @@ -14,7 +14,7 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) -set(raft_dask_version 24.02.00) +set(raft_dask_version 24.04.00) include(../../fetch_rapids.cmake) include(rapids-cuda) diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml index 37d94be7f9..33e0c15684 100644 --- a/python/raft-dask/pyproject.toml +++ b/python/raft-dask/pyproject.toml @@ -33,13 +33,13 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.9" dependencies = [ - "dask-cuda==24.2.*", + "dask-cuda==24.4.*", "joblib>=0.11", "numba>=0.57", "numpy>=1.21", - "pylibraft==24.2.*", - "rapids-dask-dependency==24.2.*", - "ucx-py==0.36.*", + "pylibraft==24.4.*", + "rapids-dask-dependency==24.4.*", + "ucx-py==0.37.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", From fdd4ad2c78458cd08b663dd4e7fdb511033c2d66 Mon Sep 17 00:00:00 2001 From: Rui Lan Date: Tue, 23 Jan 2024 14:31:05 -0800 Subject: [PATCH 05/14] Revert "Allow topk larger than 1024 in CAGRA (#2097)" This reverts commit 0586fc324598cc25e4d8c9f4f6dc3db2eb3ab521. --- .../neighbors/detail/cagra/cagra_search.cuh | 2 +- .../detail/cagra/search_multi_kernel.cuh | 167 ++++-------------- .../neighbors/detail/cagra/search_plan.cuh | 10 +- cpp/test/neighbors/ann_cagra.cuh | 20 --- 4 files changed, 36 insertions(+), 163 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index 40cc7c76fb..41a43c9bce 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -131,7 +131,7 @@ void search_main(raft::resources const& res, factory::create( res, params, index.dim(), index.graph_degree(), topk); - plan->check(topk); + plan->check(neighbors.extent(1)); RAFT_LOG_DEBUG("Cagra search"); const uint32_t max_queries = plan->max_queries; diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index f9bf525503..7be3fedfa2 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,7 +37,6 @@ #include "topk_for_cagra/topk_core.cuh" //todo replace with raft kernel #include "utils.hpp" #include -#include #include #include // RAFT_CUDA_TRY_NOT_THROW is used TODO(tfeher): consider moving this to cuda_rt_essentials.hpp @@ -654,12 +653,6 @@ struct search : search_plan_impl { rmm::device_scalar terminate_flag; // dev_terminate_flag, host_terminate_flag.; rmm::device_uvector topk_workspace; - // temporary storage for _find_topk - rmm::device_uvector input_keys_storage; - rmm::device_uvector output_keys_storage; - rmm::device_uvector input_values_storage; - rmm::device_uvector output_values_storage; - search(raft::resources const& res, search_params params, int64_t dim, @@ -672,11 +665,7 @@ struct search : search_plan_impl { parent_node_list(0, resource::get_cuda_stream(res)), topk_hint(0, resource::get_cuda_stream(res)), topk_workspace(0, resource::get_cuda_stream(res)), - terminate_flag(resource::get_cuda_stream(res)), - input_keys_storage(0, resource::get_cuda_stream(res)), - output_keys_storage(0, resource::get_cuda_stream(res)), - input_values_storage(0, resource::get_cuda_stream(res)), - output_values_storage(0, resource::get_cuda_stream(res)) + terminate_flag(resource::get_cuda_stream(res)) { set_params(res); } @@ -706,98 +695,6 @@ struct search : search_plan_impl { ~search() {} - inline void _find_topk(raft::resources const& handle, - uint32_t topK, - uint32_t sizeBatch, - uint32_t numElements, - const float* inputKeys, // [sizeBatch, ldIK,] - uint32_t ldIK, // (*) ldIK >= numElements - const INDEX_T* inputVals, // [sizeBatch, ldIV,] - uint32_t ldIV, // (*) ldIV >= numElements - float* outputKeys, // [sizeBatch, ldOK,] - uint32_t ldOK, // (*) ldOK >= topK - INDEX_T* outputVals, // [sizeBatch, ldOV,] - uint32_t ldOV, // (*) ldOV >= topK - void* workspace, - bool sort, - uint32_t* hints) - { - auto stream = resource::get_cuda_stream(handle); - - // _cuann_find_topk right now is limited to a max-k of 1024. - // RAFT has a matrix::select_k function - which handles arbitrary sized values of k, - // but doesn't accept strided inputs unlike _cuann_find_topk - // The multi-kernel search path requires strided access - since its cleverly allocating memory - // (layout described in the search_plan_impl function below), such that both the - // neighbors and the internal_topk are adjacent - in a double buffered format. - // Since this layout doesn't work with the matrix::select_k code - we have to copy - // over to a contiguous (non-strided) access to handle topk larger than 1024, and - // potentially also copy back to a strided layout afterwards - if (topK <= 1024) { - return _cuann_find_topk(topK, - sizeBatch, - numElements, - inputKeys, - ldIK, - inputVals, - ldIV, - outputKeys, - ldOK, - outputVals, - ldOV, - workspace, - sort, - hints, - stream); - } - - if (ldIK > numElements) { - if (input_keys_storage.size() != sizeBatch * numElements) { - input_keys_storage.resize(sizeBatch * numElements, stream); - } - batched_memcpy( - input_keys_storage.data(), numElements, inputKeys, ldIK, numElements, sizeBatch, stream); - inputKeys = input_keys_storage.data(); - } - - if (ldIV > numElements) { - if (input_values_storage.size() != sizeBatch * numElements) { - input_values_storage.resize(sizeBatch * numElements, stream); - } - - batched_memcpy( - input_values_storage.data(), numElements, inputVals, ldIV, numElements, sizeBatch, stream); - inputVals = input_values_storage.data(); - } - - if ((ldOK > topK) && (output_keys_storage.size() != sizeBatch * topK)) { - output_keys_storage.resize(sizeBatch * topK, stream); - } - - if ((ldOV > topK) && (output_values_storage.size() != sizeBatch * topK)) { - output_values_storage.resize(sizeBatch * topK, stream); - } - - raft::matrix::select_k( - handle, - raft::make_device_matrix_view(inputKeys, sizeBatch, numElements), - raft::make_device_matrix_view(inputVals, sizeBatch, numElements), - raft::make_device_matrix_view( - ldOK > topK ? output_keys_storage.data() : outputKeys, sizeBatch, topK), - raft::make_device_matrix_view( - ldOV > topK ? output_values_storage.data() : outputVals, sizeBatch, topK), - true, // select_min - sort); - - if (ldOK > topK) { - batched_memcpy(outputKeys, ldOK, output_keys_storage.data(), topK, topK, sizeBatch, stream); - } - - if (ldOV > topK) { - batched_memcpy(outputVals, ldOV, output_values_storage.data(), topK, topK, sizeBatch, stream); - } - } - void operator()(raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, @@ -849,21 +746,21 @@ struct search : search_plan_impl { unsigned iter = 0; while (1) { // Make an index list of internal top-k nodes - _find_topk(res, - itopk_size, - num_queries, - result_buffer_size, - result_distances.data() + (iter & 0x1) * itopk_size, - result_buffer_allocation_size, - result_indices.data() + (iter & 0x1) * itopk_size, - result_buffer_allocation_size, - result_distances.data() + (1 - (iter & 0x1)) * result_buffer_size, - result_buffer_allocation_size, - result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size, - result_buffer_allocation_size, - topk_workspace.data(), - true, - top_hint_ptr); + _cuann_find_topk(itopk_size, + num_queries, + result_buffer_size, + result_distances.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_indices.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_distances.data() + (1 - (iter & 0x1)) * result_buffer_size, + result_buffer_allocation_size, + result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size, + result_buffer_allocation_size, + topk_workspace.data(), + true, + top_hint_ptr, + stream); // termination (1) if ((iter + 1 == max_iterations)) { @@ -944,21 +841,21 @@ struct search : search_plan_impl { result_indices_ptr = result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size; result_distances_ptr = result_distances.data() + (1 - (iter & 0x1)) * result_buffer_size; - _find_topk(res, - itopk_size, - num_queries, - result_buffer_size, - result_distances.data() + (iter & 0x1) * itopk_size, - result_buffer_allocation_size, - result_indices.data() + (iter & 0x1) * itopk_size, - result_buffer_allocation_size, - result_distances_ptr, - result_buffer_allocation_size, - result_indices_ptr, - result_buffer_allocation_size, - topk_workspace.data(), - true, - top_hint_ptr); + _cuann_find_topk(itopk_size, + num_queries, + result_buffer_size, + result_distances.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_indices.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_distances_ptr, + result_buffer_allocation_size, + result_indices_ptr, + result_buffer_allocation_size, + topk_workspace.data(), + true, + top_hint_ptr, + stream); } else { // Remove parent bit in search results remove_parent_bit( diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh index 271a1f4955..20df2adf61 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh @@ -42,12 +42,9 @@ struct search_plan_impl_base : public search_params { if (itopk_size <= 512 && search_params::max_queries >= num_sm * 2lu) { algo = search_algo::SINGLE_CTA; RAFT_LOG_DEBUG("Auto strategy: selecting single-cta"); - } else if (topk <= 1024) { + } else { algo = search_algo::MULTI_CTA; RAFT_LOG_DEBUG("Auto strategy: selecting multi-cta"); - } else { - algo = search_algo::MULTI_KERNEL; - RAFT_LOG_DEBUG("Auto strategy: selecting multi kernel"); } } } @@ -258,8 +255,7 @@ struct search_plan_impl : public search_plan_impl_base { virtual void check(const uint32_t topk) { // For single-CTA and multi kernel - RAFT_EXPECTS( - topk <= itopk_size, "topk = %u must be smaller than itopk_size = %lu", topk, itopk_size); + RAFT_EXPECTS(topk <= itopk_size, "topk must be smaller than itopk_size = %lu", itopk_size); } inline void check_params() @@ -267,7 +263,7 @@ struct search_plan_impl : public search_plan_impl_base { std::string error_message = ""; if (itopk_size > 1024) { - if ((algo == search_algo::MULTI_CTA) || (algo == search_algo::MULTI_KERNEL)) { + if (algo == search_algo::MULTI_CTA) { } else { error_message += std::string("- `internal_topk` (" + std::to_string(itopk_size) + ") must be smaller or equal to 1024"); diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index ef4f27ae64..915ef8a394 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -259,7 +259,6 @@ class AnnCagraTest : public ::testing::TestWithParam { search_params.algo = ps.algo; search_params.max_queries = ps.max_queries; search_params.team_size = ps.team_size; - search_params.itopk_size = ps.itopk_size; auto database_view = raft::make_device_matrix_view( (const DataT*)database.data(), ps.n_rows, ps.dim); @@ -497,7 +496,6 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { search_params.algo = ps.algo; search_params.max_queries = ps.max_queries; search_params.team_size = ps.team_size; - search_params.itopk_size = ps.itopk_size; search_params.hashmap_mode = cagra::hash_mode::HASH; auto database_view = raft::make_device_matrix_view( @@ -613,7 +611,6 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { search_params.algo = ps.algo; search_params.max_queries = ps.max_queries; search_params.team_size = ps.team_size; - search_params.itopk_size = ps.itopk_size; search_params.hashmap_mode = cagra::hash_mode::HASH; auto database_view = raft::make_device_matrix_view( @@ -821,23 +818,6 @@ inline std::vector generate_inputs() {0.995}); inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); - inputs2 = - raft::util::itertools::product({100}, - {20000}, - {32}, - {2048}, // k - {graph_build_algo::NN_DESCENT}, - {search_algo::AUTO}, - {10}, - {0}, - {4096}, // itopk_size - {1}, - {raft::distance::DistanceType::L2Expanded}, - {false}, - {false}, - {0.995}); - inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); - return inputs; } From f8bc4ff4b26754059d1b4a631e1bd68e00eb1a35 Mon Sep 17 00:00:00 2001 From: Rui Lan Date: Tue, 23 Jan 2024 14:53:44 -0800 Subject: [PATCH 06/14] Revert "Revert "Allow topk larger than 1024 in CAGRA (#2097)"" This reverts commit fdd4ad2c78458cd08b663dd4e7fdb511033c2d66. --- .../neighbors/detail/cagra/cagra_search.cuh | 2 +- .../detail/cagra/search_multi_kernel.cuh | 167 ++++++++++++++---- .../neighbors/detail/cagra/search_plan.cuh | 10 +- cpp/test/neighbors/ann_cagra.cuh | 20 +++ 4 files changed, 163 insertions(+), 36 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index 41a43c9bce..40cc7c76fb 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -131,7 +131,7 @@ void search_main(raft::resources const& res, factory::create( res, params, index.dim(), index.graph_degree(), topk); - plan->check(neighbors.extent(1)); + plan->check(topk); RAFT_LOG_DEBUG("Cagra search"); const uint32_t max_queries = plan->max_queries; diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index 7be3fedfa2..f9bf525503 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,6 +37,7 @@ #include "topk_for_cagra/topk_core.cuh" //todo replace with raft kernel #include "utils.hpp" #include +#include #include #include // RAFT_CUDA_TRY_NOT_THROW is used TODO(tfeher): consider moving this to cuda_rt_essentials.hpp @@ -653,6 +654,12 @@ struct search : search_plan_impl { rmm::device_scalar terminate_flag; // dev_terminate_flag, host_terminate_flag.; rmm::device_uvector topk_workspace; + // temporary storage for _find_topk + rmm::device_uvector input_keys_storage; + rmm::device_uvector output_keys_storage; + rmm::device_uvector input_values_storage; + rmm::device_uvector output_values_storage; + search(raft::resources const& res, search_params params, int64_t dim, @@ -665,7 +672,11 @@ struct search : search_plan_impl { parent_node_list(0, resource::get_cuda_stream(res)), topk_hint(0, resource::get_cuda_stream(res)), topk_workspace(0, resource::get_cuda_stream(res)), - terminate_flag(resource::get_cuda_stream(res)) + terminate_flag(resource::get_cuda_stream(res)), + input_keys_storage(0, resource::get_cuda_stream(res)), + output_keys_storage(0, resource::get_cuda_stream(res)), + input_values_storage(0, resource::get_cuda_stream(res)), + output_values_storage(0, resource::get_cuda_stream(res)) { set_params(res); } @@ -695,6 +706,98 @@ struct search : search_plan_impl { ~search() {} + inline void _find_topk(raft::resources const& handle, + uint32_t topK, + uint32_t sizeBatch, + uint32_t numElements, + const float* inputKeys, // [sizeBatch, ldIK,] + uint32_t ldIK, // (*) ldIK >= numElements + const INDEX_T* inputVals, // [sizeBatch, ldIV,] + uint32_t ldIV, // (*) ldIV >= numElements + float* outputKeys, // [sizeBatch, ldOK,] + uint32_t ldOK, // (*) ldOK >= topK + INDEX_T* outputVals, // [sizeBatch, ldOV,] + uint32_t ldOV, // (*) ldOV >= topK + void* workspace, + bool sort, + uint32_t* hints) + { + auto stream = resource::get_cuda_stream(handle); + + // _cuann_find_topk right now is limited to a max-k of 1024. + // RAFT has a matrix::select_k function - which handles arbitrary sized values of k, + // but doesn't accept strided inputs unlike _cuann_find_topk + // The multi-kernel search path requires strided access - since its cleverly allocating memory + // (layout described in the search_plan_impl function below), such that both the + // neighbors and the internal_topk are adjacent - in a double buffered format. + // Since this layout doesn't work with the matrix::select_k code - we have to copy + // over to a contiguous (non-strided) access to handle topk larger than 1024, and + // potentially also copy back to a strided layout afterwards + if (topK <= 1024) { + return _cuann_find_topk(topK, + sizeBatch, + numElements, + inputKeys, + ldIK, + inputVals, + ldIV, + outputKeys, + ldOK, + outputVals, + ldOV, + workspace, + sort, + hints, + stream); + } + + if (ldIK > numElements) { + if (input_keys_storage.size() != sizeBatch * numElements) { + input_keys_storage.resize(sizeBatch * numElements, stream); + } + batched_memcpy( + input_keys_storage.data(), numElements, inputKeys, ldIK, numElements, sizeBatch, stream); + inputKeys = input_keys_storage.data(); + } + + if (ldIV > numElements) { + if (input_values_storage.size() != sizeBatch * numElements) { + input_values_storage.resize(sizeBatch * numElements, stream); + } + + batched_memcpy( + input_values_storage.data(), numElements, inputVals, ldIV, numElements, sizeBatch, stream); + inputVals = input_values_storage.data(); + } + + if ((ldOK > topK) && (output_keys_storage.size() != sizeBatch * topK)) { + output_keys_storage.resize(sizeBatch * topK, stream); + } + + if ((ldOV > topK) && (output_values_storage.size() != sizeBatch * topK)) { + output_values_storage.resize(sizeBatch * topK, stream); + } + + raft::matrix::select_k( + handle, + raft::make_device_matrix_view(inputKeys, sizeBatch, numElements), + raft::make_device_matrix_view(inputVals, sizeBatch, numElements), + raft::make_device_matrix_view( + ldOK > topK ? output_keys_storage.data() : outputKeys, sizeBatch, topK), + raft::make_device_matrix_view( + ldOV > topK ? output_values_storage.data() : outputVals, sizeBatch, topK), + true, // select_min + sort); + + if (ldOK > topK) { + batched_memcpy(outputKeys, ldOK, output_keys_storage.data(), topK, topK, sizeBatch, stream); + } + + if (ldOV > topK) { + batched_memcpy(outputVals, ldOV, output_values_storage.data(), topK, topK, sizeBatch, stream); + } + } + void operator()(raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, @@ -746,21 +849,21 @@ struct search : search_plan_impl { unsigned iter = 0; while (1) { // Make an index list of internal top-k nodes - _cuann_find_topk(itopk_size, - num_queries, - result_buffer_size, - result_distances.data() + (iter & 0x1) * itopk_size, - result_buffer_allocation_size, - result_indices.data() + (iter & 0x1) * itopk_size, - result_buffer_allocation_size, - result_distances.data() + (1 - (iter & 0x1)) * result_buffer_size, - result_buffer_allocation_size, - result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size, - result_buffer_allocation_size, - topk_workspace.data(), - true, - top_hint_ptr, - stream); + _find_topk(res, + itopk_size, + num_queries, + result_buffer_size, + result_distances.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_indices.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_distances.data() + (1 - (iter & 0x1)) * result_buffer_size, + result_buffer_allocation_size, + result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size, + result_buffer_allocation_size, + topk_workspace.data(), + true, + top_hint_ptr); // termination (1) if ((iter + 1 == max_iterations)) { @@ -841,21 +944,21 @@ struct search : search_plan_impl { result_indices_ptr = result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size; result_distances_ptr = result_distances.data() + (1 - (iter & 0x1)) * result_buffer_size; - _cuann_find_topk(itopk_size, - num_queries, - result_buffer_size, - result_distances.data() + (iter & 0x1) * itopk_size, - result_buffer_allocation_size, - result_indices.data() + (iter & 0x1) * itopk_size, - result_buffer_allocation_size, - result_distances_ptr, - result_buffer_allocation_size, - result_indices_ptr, - result_buffer_allocation_size, - topk_workspace.data(), - true, - top_hint_ptr, - stream); + _find_topk(res, + itopk_size, + num_queries, + result_buffer_size, + result_distances.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_indices.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_distances_ptr, + result_buffer_allocation_size, + result_indices_ptr, + result_buffer_allocation_size, + topk_workspace.data(), + true, + top_hint_ptr); } else { // Remove parent bit in search results remove_parent_bit( diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh index 20df2adf61..271a1f4955 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh @@ -42,9 +42,12 @@ struct search_plan_impl_base : public search_params { if (itopk_size <= 512 && search_params::max_queries >= num_sm * 2lu) { algo = search_algo::SINGLE_CTA; RAFT_LOG_DEBUG("Auto strategy: selecting single-cta"); - } else { + } else if (topk <= 1024) { algo = search_algo::MULTI_CTA; RAFT_LOG_DEBUG("Auto strategy: selecting multi-cta"); + } else { + algo = search_algo::MULTI_KERNEL; + RAFT_LOG_DEBUG("Auto strategy: selecting multi kernel"); } } } @@ -255,7 +258,8 @@ struct search_plan_impl : public search_plan_impl_base { virtual void check(const uint32_t topk) { // For single-CTA and multi kernel - RAFT_EXPECTS(topk <= itopk_size, "topk must be smaller than itopk_size = %lu", itopk_size); + RAFT_EXPECTS( + topk <= itopk_size, "topk = %u must be smaller than itopk_size = %lu", topk, itopk_size); } inline void check_params() @@ -263,7 +267,7 @@ struct search_plan_impl : public search_plan_impl_base { std::string error_message = ""; if (itopk_size > 1024) { - if (algo == search_algo::MULTI_CTA) { + if ((algo == search_algo::MULTI_CTA) || (algo == search_algo::MULTI_KERNEL)) { } else { error_message += std::string("- `internal_topk` (" + std::to_string(itopk_size) + ") must be smaller or equal to 1024"); diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index 915ef8a394..ef4f27ae64 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -259,6 +259,7 @@ class AnnCagraTest : public ::testing::TestWithParam { search_params.algo = ps.algo; search_params.max_queries = ps.max_queries; search_params.team_size = ps.team_size; + search_params.itopk_size = ps.itopk_size; auto database_view = raft::make_device_matrix_view( (const DataT*)database.data(), ps.n_rows, ps.dim); @@ -496,6 +497,7 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { search_params.algo = ps.algo; search_params.max_queries = ps.max_queries; search_params.team_size = ps.team_size; + search_params.itopk_size = ps.itopk_size; search_params.hashmap_mode = cagra::hash_mode::HASH; auto database_view = raft::make_device_matrix_view( @@ -611,6 +613,7 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { search_params.algo = ps.algo; search_params.max_queries = ps.max_queries; search_params.team_size = ps.team_size; + search_params.itopk_size = ps.itopk_size; search_params.hashmap_mode = cagra::hash_mode::HASH; auto database_view = raft::make_device_matrix_view( @@ -818,6 +821,23 @@ inline std::vector generate_inputs() {0.995}); inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); + inputs2 = + raft::util::itertools::product({100}, + {20000}, + {32}, + {2048}, // k + {graph_build_algo::NN_DESCENT}, + {search_algo::AUTO}, + {10}, + {0}, + {4096}, // itopk_size + {1}, + {raft::distance::DistanceType::L2Expanded}, + {false}, + {false}, + {0.995}); + inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); + return inputs; } From 2856bff39532e08e0a84a3dc3af78fc121d90774 Mon Sep 17 00:00:00 2001 From: Rui Lan Date: Tue, 23 Jan 2024 15:04:24 -0800 Subject: [PATCH 07/14] Revert "Merge remote-tracking branch 'upstream/branch-24.04' into subsampling-ivfpq-codebook" This reverts commit aea3d445a18d2a93cf8500ad50390f1976e79317, reversing changes made to c90cdfa3b45744e0e6d90666755355fb7e90da43. --- .../ann/src/raft/raft_ann_bench_param_parser.h | 3 --- .../raft/neighbors/detail/ivf_pq_build.cuh | 16 ++++++---------- cpp/include/raft/neighbors/ivf_pq_types.hpp | 6 ------ .../pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd | 3 +-- .../pylibraft/neighbors/ivf_pq/ivf_pq.pyx | 15 +-------------- 5 files changed, 8 insertions(+), 35 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h index 1b876ea147..2a021a8a12 100644 --- a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h @@ -87,9 +87,6 @@ void parse_build_param(const nlohmann::json& conf, "', should be either 'cluster' or 'subspace'"); } } - if (conf.contains("pq_codebook_ratio")) { - param.pq_codebook_trainset_fraction = 1.0 / (double)conf.at("pq_codebook_ratio"); - } } template diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index edaea70bcc..cc94511fe7 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -353,17 +353,14 @@ void train_per_subset(raft::resources const& handle, const float* trainset, // [n_rows, dim] const uint32_t* labels, // [n_rows] uint32_t kmeans_n_iters, - double pq_codebook_trainset_fraction, rmm::mr::device_memory_resource* managed_memory) { auto stream = resource::get_cuda_stream(handle); auto device_memory = resource::get_workspace_resource(handle); rmm::device_uvector pq_centers_tmp(index.pq_centers().size(), stream, device_memory); - // Subsampling the train set for codebook generation based on pq_codebook_trainset_fraction. - auto pq_n_rows = uint32_t(n_rows * pq_codebook_trainset_fraction); - rmm::device_uvector sub_trainset(pq_n_rows * size_t(index.pq_len()), stream, device_memory); - rmm::device_uvector sub_labels(pq_n_rows, stream, device_memory); + rmm::device_uvector sub_trainset(n_rows * size_t(index.pq_len()), stream, device_memory); + rmm::device_uvector sub_labels(n_rows, stream, device_memory); rmm::device_uvector pq_cluster_sizes(index.pq_book_size(), stream, device_memory); @@ -374,7 +371,7 @@ void train_per_subset(raft::resources const& handle, // Get the rotated cluster centers for each training vector. // This will be subtracted from the input vectors afterwards. utils::copy_selected( - pq_n_rows, + n_rows, index.pq_len(), index.centers_rot().data_handle() + index.pq_len() * j, labels, @@ -390,7 +387,7 @@ void train_per_subset(raft::resources const& handle, true, false, index.pq_len(), - pq_n_rows, + n_rows, index.dim(), &alpha, index.rotation_matrix().data_handle() + index.dim() * index.pq_len() * j, @@ -404,12 +401,12 @@ void train_per_subset(raft::resources const& handle, // train PQ codebook for this subspace auto sub_trainset_view = - raft::make_device_matrix_view(sub_trainset.data(), pq_n_rows, index.pq_len()); + raft::make_device_matrix_view(sub_trainset.data(), n_rows, index.pq_len()); auto centers_tmp_view = raft::make_device_matrix_view( pq_centers_tmp.data() + index.pq_book_size() * index.pq_len() * j, index.pq_book_size(), index.pq_len()); - auto sub_labels_view = raft::make_device_vector_view(sub_labels.data(), pq_n_rows); + auto sub_labels_view = raft::make_device_vector_view(sub_labels.data(), n_rows); auto cluster_sizes_view = raft::make_device_vector_view(pq_cluster_sizes.data(), index.pq_book_size()); raft::cluster::kmeans_balanced_params kmeans_params; @@ -1791,7 +1788,6 @@ auto build(raft::resources const& handle, trainset.data_handle(), labels.data(), params.kmeans_n_iters, - params.pq_codebook_trainset_fraction, &managed_mr); break; case codebook_gen::PER_CLUSTER: diff --git a/cpp/include/raft/neighbors/ivf_pq_types.hpp b/cpp/include/raft/neighbors/ivf_pq_types.hpp index 8340bfde67..45ab18c84f 100644 --- a/cpp/include/raft/neighbors/ivf_pq_types.hpp +++ b/cpp/include/raft/neighbors/ivf_pq_types.hpp @@ -105,12 +105,6 @@ struct index_params : ann::index_params { * flag to `true` if you prefer to use as little GPU memory for the database as possible. */ bool conservative_memory_allocation = false; - /** - * The fraction of data to use during PQ codebook generation on top of the subsampled data - * controlled by kmeans_trainset_fraction. The parameter is only used when PQ codebook generation - * kind is PER_SUBSPACE and ignored when PQ codebook generation kind is PER_CLUSTER. - */ - double pq_codebook_trainset_fraction = 1; }; struct search_params : ann::search_params { diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd index 81f96df9cd..531c2428e9 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd @@ -78,8 +78,7 @@ cdef extern from "raft/neighbors/ivf_pq_types.hpp" \ codebook_gen codebook_kind bool force_random_rotation bool conservative_memory_allocation - double pq_codebook_trainset_fraction - + cdef cppclass index[IdxT](ann_index): index(const device_resources& handle, DistanceType metric, diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx index 5b0675d069..0c1bbf6b9c 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx @@ -156,13 +156,6 @@ cdef class IndexParams: repeated calls to `extend` (extending the database). To disable this behavior and use as little GPU memory for the database as possible, set this flat to `True`. - pq_codebook_trainset_fraction : int, default = 0.5 - If pq_codebook_trainset_fraction is less than 1, then the dataset is - subsampled for PQ codebook generation, and only n_samples * - pq_codebook_trainset_fraction rows are used for PQ codebook generation. - This subsampling is applied after kmeans_trainset subsampling, - controlled by kmeans_trainset_fraction and only used when codebook_kind - is PER_SUBSPACE. """ def __init__(self, *, n_lists=1024, @@ -174,8 +167,7 @@ cdef class IndexParams: codebook_kind="subspace", force_random_rotation=False, add_data_on_build=True, - conservative_memory_allocation=False, - pq_codebook_trainset_fraction=0.5): + conservative_memory_allocation=False): self.params.n_lists = n_lists self.params.metric = _get_metric(metric) self.params.metric_arg = 0 @@ -193,8 +185,6 @@ cdef class IndexParams: self.params.add_data_on_build = add_data_on_build self.params.conservative_memory_allocation = \ conservative_memory_allocation - self.params.pq_codebook_trainset_fraction = \ - pq_codebook_trainset_fraction @property def n_lists(self): @@ -236,9 +226,6 @@ cdef class IndexParams: def conservative_memory_allocation(self): return self.params.conservative_memory_allocation - @property - def pq_codebook_trainset_fraction(self): - return self.params.pq_codebook_trainset_fraction cdef class Index: # We store a pointer to the index because it dose not have a trivial From 49112ddd1969aa1bd36caf4edadd4350f016b3b8 Mon Sep 17 00:00:00 2001 From: Rui Lan Date: Tue, 23 Jan 2024 15:04:49 -0800 Subject: [PATCH 08/14] Revert "Merge pull request #2116 from rapidsai/branch-24.02" This reverts commit c90cdfa3b45744e0e6d90666755355fb7e90da43, reversing changes made to 0586fc324598cc25e4d8c9f4f6dc3db2eb3ab521. --- .../cuda11.8-conda/devcontainer.json | 4 +-- .devcontainer/cuda11.8-pip/devcontainer.json | 6 ++--- .../cuda12.0-conda/devcontainer.json | 4 +-- .devcontainer/cuda12.0-pip/devcontainer.json | 6 ++--- .github/workflows/build.yaml | 16 ++++++------ .github/workflows/pr.yaml | 26 +++++++++---------- .github/workflows/test.yaml | 10 +++---- README.md | 2 +- VERSION | 2 +- ci/build_docs.sh | 2 +- .../all_cuda-118_arch-aarch64.yaml | 8 +++--- .../all_cuda-118_arch-x86_64.yaml | 8 +++--- .../all_cuda-120_arch-aarch64.yaml | 8 +++--- .../all_cuda-120_arch-x86_64.yaml | 8 +++--- .../bench_ann_cuda-118_arch-aarch64.yaml | 2 +- .../bench_ann_cuda-118_arch-x86_64.yaml | 2 +- .../bench_ann_cuda-120_arch-aarch64.yaml | 2 +- .../bench_ann_cuda-120_arch-x86_64.yaml | 2 +- .../recipes/raft-dask/conda_build_config.yaml | 2 +- cpp/CMakeLists.txt | 4 +-- cpp/doxygen/Doxyfile | 2 +- .../cmake/thirdparty/fetch_rapids.cmake | 2 +- dependencies.yaml | 24 ++++++++--------- docs/source/build.md | 2 +- docs/source/conf.py | 4 +-- docs/source/developer_guide.md | 8 +++--- docs/source/raft_ann_benchmarks.md | 12 ++++----- fetch_rapids.cmake | 2 +- python/pylibraft/CMakeLists.txt | 2 +- python/pylibraft/pyproject.toml | 4 +-- python/raft-ann-bench/pyproject.toml | 2 +- python/raft-dask/CMakeLists.txt | 2 +- python/raft-dask/pyproject.toml | 8 +++--- 33 files changed, 99 insertions(+), 99 deletions(-) diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index 2682510ed1..4c2161cab7 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,12 +5,12 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04" } }, "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index f96fef205f..8bbc016620 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,13 +5,13 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda11.8-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda11.8-ubuntu22.04" } }, "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/ucx:24.4": {"version": "1.14.1"}, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {} + "ghcr.io/rapidsai/devcontainers/features/ucx:24.2": {"version": "1.14.1"}, + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/ucx", diff --git a/.devcontainer/cuda12.0-conda/devcontainer.json b/.devcontainer/cuda12.0-conda/devcontainer.json index 7ce4ea20d3..ebb101d9d5 100644 --- a/.devcontainer/cuda12.0-conda/devcontainer.json +++ b/.devcontainer/cuda12.0-conda/devcontainer.json @@ -5,12 +5,12 @@ "args": { "CUDA": "12.0", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.04-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.02-cpp-mambaforge-ubuntu22.04" } }, "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.0-pip/devcontainer.json b/.devcontainer/cuda12.0-pip/devcontainer.json index 4f97af830a..3e2efb2f37 100644 --- a/.devcontainer/cuda12.0-pip/devcontainer.json +++ b/.devcontainer/cuda12.0-pip/devcontainer.json @@ -5,13 +5,13 @@ "args": { "CUDA": "12.0", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda12.0-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda12.0-ubuntu22.04" } }, "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/ucx:24.4": {"version": "1.14.1"}, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {} + "ghcr.io/rapidsai/devcontainers/features/ucx:24.2": {"version": "1.14.1"}, + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/ucx", diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index bd8b13d21e..4c6e3d0ed4 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: wheel-publish-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -89,7 +89,7 @@ jobs: wheel-build-raft-dask: needs: wheel-publish-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -99,7 +99,7 @@ jobs: wheel-publish-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 82594909a9..fb12d7efae 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -25,29 +25,29 @@ jobs: - wheel-tests-raft-dask - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.02 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.02 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.02 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.02 with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.02 with: build_type: pull-request enable_check_symbols: true @@ -55,19 +55,19 @@ jobs: conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.02 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -77,34 +77,34 @@ jobs: wheel-build-pylibraft: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 with: build_type: pull-request script: ci/build_wheel_pylibraft.sh wheel-tests-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 with: build_type: pull-request script: ci/test_wheel_pylibraft.sh wheel-build-raft-dask: needs: wheel-tests-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 with: build_type: pull-request script: "ci/build_wheel_raft_dask.sh" wheel-tests-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 with: build_type: pull-request script: ci/test_wheel_raft_dask.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.02 with: build_command: | sccache -z; diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2a557a8b84..2472021375 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -26,7 +26,7 @@ jobs: symbol_exclusions: _ZN\d+raft_cutlass conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -34,7 +34,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -42,7 +42,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -51,7 +51,7 @@ jobs: script: ci/test_wheel_pylibraft.sh wheel-tests-raft-dask: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/README.md b/README.md index 55b8fca8f6..26ddc30ed4 100755 --- a/README.md +++ b/README.md @@ -287,7 +287,7 @@ You can also install the conda packages individually using the `mamba` command a mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.0 ``` -If installing the C++ APIs please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.04/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. +If installing the C++ APIs please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.02/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. ### Installing Python through Pip diff --git a/VERSION b/VERSION index 4a2fe8aa57..3c6c5e2b70 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.04.00 +24.02.00 diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 4c07683642..b7c8c7a3e0 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -28,7 +28,7 @@ rapids-mamba-retry install \ pylibraft \ raft-dask -export RAPIDS_VERSION_NUMBER="24.04" +export RAPIDS_VERSION_NUMBER="24.02" export RAPIDS_DOCS_DIR="$(mktemp -d)" rapids-logger "Build CPP docs" diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index 96c10a763a..ac076f5505 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -20,7 +20,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.4.* +- dask-cuda==24.2.* - doxygen>=1.8.20 - gcc_linux-aarch64=11.* - gmock>=1.13.0 @@ -46,9 +46,9 @@ dependencies: - pydata-sphinx-theme - pytest - pytest-cov -- rapids-dask-dependency==24.4.* +- rapids-dask-dependency==24.2.* - recommonmark -- rmm==24.4.* +- rmm==24.2.* - scikit-build-core>=0.7.0 - scikit-learn - scipy @@ -56,6 +56,6 @@ dependencies: - sphinx-markdown-tables - sysroot_linux-aarch64==2.17 - ucx-proc=*=gpu -- ucx-py==0.37.* +- ucx-py==0.36.* - ucx>=1.13.0 name: all_cuda-118_arch-aarch64 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index aea5bd1961..b3ded51bb5 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -20,7 +20,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.4.* +- dask-cuda==24.2.* - doxygen>=1.8.20 - gcc_linux-64=11.* - gmock>=1.13.0 @@ -46,9 +46,9 @@ dependencies: - pydata-sphinx-theme - pytest - pytest-cov -- rapids-dask-dependency==24.4.* +- rapids-dask-dependency==24.2.* - recommonmark -- rmm==24.4.* +- rmm==24.2.* - scikit-build-core>=0.7.0 - scikit-learn - scipy @@ -56,6 +56,6 @@ dependencies: - sphinx-markdown-tables - sysroot_linux-64==2.17 - ucx-proc=*=gpu -- ucx-py==0.37.* +- ucx-py==0.36.* - ucx>=1.13.0 name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-120_arch-aarch64.yaml b/conda/environments/all_cuda-120_arch-aarch64.yaml index 0ea4a979d6..c0eede1389 100644 --- a/conda/environments/all_cuda-120_arch-aarch64.yaml +++ b/conda/environments/all_cuda-120_arch-aarch64.yaml @@ -21,7 +21,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.4.* +- dask-cuda==24.2.* - doxygen>=1.8.20 - gcc_linux-aarch64=11.* - gmock>=1.13.0 @@ -42,9 +42,9 @@ dependencies: - pydata-sphinx-theme - pytest - pytest-cov -- rapids-dask-dependency==24.4.* +- rapids-dask-dependency==24.2.* - recommonmark -- rmm==24.4.* +- rmm==24.2.* - scikit-build-core>=0.7.0 - scikit-learn - scipy @@ -52,6 +52,6 @@ dependencies: - sphinx-markdown-tables - sysroot_linux-aarch64==2.17 - ucx-proc=*=gpu -- ucx-py==0.37.* +- ucx-py==0.36.* - ucx>=1.13.0 name: all_cuda-120_arch-aarch64 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 48a463bc6f..cebaf96493 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -21,7 +21,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.4.* +- dask-cuda==24.2.* - doxygen>=1.8.20 - gcc_linux-64=11.* - gmock>=1.13.0 @@ -42,9 +42,9 @@ dependencies: - pydata-sphinx-theme - pytest - pytest-cov -- rapids-dask-dependency==24.4.* +- rapids-dask-dependency==24.2.* - recommonmark -- rmm==24.4.* +- rmm==24.2.* - scikit-build-core>=0.7.0 - scikit-learn - scipy @@ -52,6 +52,6 @@ dependencies: - sphinx-markdown-tables - sysroot_linux-64==2.17 - ucx-proc=*=gpu -- ucx-py==0.37.* +- ucx-py==0.36.* - ucx>=1.13.0 name: all_cuda-120_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index 0e0385ceeb..87e3942e6a 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -38,7 +38,7 @@ dependencies: - openblas - pandas - pyyaml -- rmm==24.4.* +- rmm==24.2.* - scikit-build-core>=0.7.0 - sysroot_linux-aarch64==2.17 name: bench_ann_cuda-118_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index dfe76a2948..a4ac253a85 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -38,7 +38,7 @@ dependencies: - openblas - pandas - pyyaml -- rmm==24.4.* +- rmm==24.2.* - scikit-build-core>=0.7.0 - sysroot_linux-64==2.17 name: bench_ann_cuda-118_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml index 0a6567c646..9ef9799363 100644 --- a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml @@ -34,7 +34,7 @@ dependencies: - openblas - pandas - pyyaml -- rmm==24.4.* +- rmm==24.2.* - scikit-build-core>=0.7.0 - sysroot_linux-aarch64==2.17 name: bench_ann_cuda-120_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml index a89d5317b6..5fa09096ba 100644 --- a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml @@ -34,7 +34,7 @@ dependencies: - openblas - pandas - pyyaml -- rmm==24.4.* +- rmm==24.2.* - scikit-build-core>=0.7.0 - sysroot_linux-64==2.17 name: bench_ann_cuda-120_arch-x86_64 diff --git a/conda/recipes/raft-dask/conda_build_config.yaml b/conda/recipes/raft-dask/conda_build_config.yaml index 483e53026a..c12c35af3b 100644 --- a/conda/recipes/raft-dask/conda_build_config.yaml +++ b/conda/recipes/raft-dask/conda_build_config.yaml @@ -17,7 +17,7 @@ ucx_version: - ">=1.14.1,<1.16.0" ucx_py_version: - - "0.37.*" + - "0.36.*" cmake_version: - ">=3.26.4" diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ac75d7d83b..0d71026e17 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -10,8 +10,8 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express # or implied. See the License for the specific language governing permissions and limitations under # the License. -set(RAPIDS_VERSION "24.04") -set(RAFT_VERSION "24.04.00") +set(RAPIDS_VERSION "24.02") +set(RAFT_VERSION "24.02.00") cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) include(../fetch_rapids.cmake) diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile index 779472d880..3eb0763eaf 100644 --- a/cpp/doxygen/Doxyfile +++ b/cpp/doxygen/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = "RAFT C++ API" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = "24.04" +PROJECT_NUMBER = "24.02" # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/cpp/template/cmake/thirdparty/fetch_rapids.cmake b/cpp/template/cmake/thirdparty/fetch_rapids.cmake index aadfdb0028..15b6c43a6f 100644 --- a/cpp/template/cmake/thirdparty/fetch_rapids.cmake +++ b/cpp/template/cmake/thirdparty/fetch_rapids.cmake @@ -12,7 +12,7 @@ # the License. # Use this variable to update RAPIDS and RAFT versions -set(RAPIDS_VERSION "24.04") +set(RAPIDS_VERSION "24.02") if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake diff --git a/dependencies.yaml b/dependencies.yaml index 37404a1e37..0e4d6d4693 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -180,7 +180,7 @@ dependencies: common: - output_types: [conda] packages: - - &rmm_conda rmm==24.4.* + - &rmm_conda rmm==24.2.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -201,12 +201,12 @@ dependencies: matrices: - matrix: {cuda: "12.2"} packages: &build_pylibraft_packages_cu12 - - &rmm_cu12 rmm-cu12==24.4.* + - &rmm_cu12 rmm-cu12==24.2.* - {matrix: {cuda: "12.1"}, packages: *build_pylibraft_packages_cu12} - {matrix: {cuda: "12.0"}, packages: *build_pylibraft_packages_cu12} - matrix: {cuda: "11.8"} packages: &build_pylibraft_packages_cu11 - - &rmm_cu11 rmm-cu11==24.4.* + - &rmm_cu11 rmm-cu11==24.2.* - {matrix: {cuda: "11.5"}, packages: *build_pylibraft_packages_cu11} - {matrix: {cuda: "11.4"}, packages: *build_pylibraft_packages_cu11} - {matrix: {cuda: "11.2"}, packages: *build_pylibraft_packages_cu11} @@ -459,20 +459,20 @@ dependencies: common: - output_types: [conda, pyproject] packages: - - dask-cuda==24.4.* + - dask-cuda==24.2.* - joblib>=0.11 - numba>=0.57 - *numpy - - rapids-dask-dependency==24.4.* - - ucx-py==0.37.* + - rapids-dask-dependency==24.2.* + - ucx-py==0.36.* - output_types: conda packages: - ucx>=1.13.0 - ucx-proc=*=gpu - - &ucx_py_conda ucx-py==0.37.* + - &ucx_py_conda ucx-py==0.36.* - output_types: pyproject packages: - - &pylibraft_conda pylibraft==24.4.* + - &pylibraft_conda pylibraft==24.2.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -484,14 +484,14 @@ dependencies: matrices: - matrix: {cuda: "12.2"} packages: &run_raft_dask_packages_cu12 - - &pylibraft_cu12 pylibraft-cu12==24.4.* - - &ucx_py_cu12 ucx-py-cu12==0.37.* + - &pylibraft_cu12 pylibraft-cu12==24.2.* + - &ucx_py_cu12 ucx-py-cu12==0.36.* - {matrix: {cuda: "12.1"}, packages: *run_raft_dask_packages_cu12} - {matrix: {cuda: "12.0"}, packages: *run_raft_dask_packages_cu12} - matrix: {cuda: "11.8"} packages: &run_raft_dask_packages_cu11 - - &pylibraft_cu11 pylibraft-cu11==24.4.* - - &ucx_py_cu11 ucx-py-cu11==0.37.* + - &pylibraft_cu11 pylibraft-cu11==24.2.* + - &ucx_py_cu11 ucx-py-cu11==0.36.* - {matrix: {cuda: "11.5"}, packages: *run_raft_dask_packages_cu11} - {matrix: {cuda: "11.4"}, packages: *run_raft_dask_packages_cu11} - {matrix: {cuda: "11.2"}, packages: *run_raft_dask_packages_cu11} diff --git a/docs/source/build.md b/docs/source/build.md index e76512d16f..ae7734d0ed 100644 --- a/docs/source/build.md +++ b/docs/source/build.md @@ -56,7 +56,7 @@ You can also install the conda packages individually using the `mamba` command a mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.0 ``` -If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.04/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. +If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.02/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. ## Installing Python through Pip diff --git a/docs/source/conf.py b/docs/source/conf.py index 07dd4825fa..2a2c700926 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -67,9 +67,9 @@ # built documents. # # The short X.Y version. -version = '24.04' +version = '24.02' # The full version, including alpha/beta/rc tags. -release = '24.04.00' +release = '24.02.00' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md index d29130add0..c5bcd03f69 100644 --- a/docs/source/developer_guide.md +++ b/docs/source/developer_guide.md @@ -187,7 +187,7 @@ RAFT relies on `clang-format` to enforce code style across all C++ and CUDA sour 1. Do not split empty functions/records/namespaces. 2. Two-space indentation everywhere, including the line continuations. 3. Disable reflowing of comments. - The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/.clang-format). + The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.02/cpp/.clang-format). [`doxygen`](https://doxygen.nl/) is used as documentation generator and also as a documentation linter. In order to run doxygen as a linter on C++/CUDA code, run @@ -205,7 +205,7 @@ you can run `codespell -i 3 -w .` from the repository root directory. This will bring up an interactive prompt to select which spelling fixes to apply. ### #include style -[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/scripts/include_checker.py) is used to enforce the include style as follows: +[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.02/cpp/scripts/include_checker.py) is used to enforce the include style as follows: 1. `#include "..."` should be used for referencing local files only. It is acceptable to be used for referencing files in a sub-folder/parent-folder of the same algorithm, but should never be used to include files in other algorithms or between algorithms and the primitives or other dependencies. 2. `#include <...>` should be used for referencing everything else @@ -215,7 +215,7 @@ python ./cpp/scripts/include_checker.py --inplace [cpp/include cpp/test ... list ``` ### Copyright header -[copyright.py](https://github.com/rapidsai/raft/blob/branch-24.04/ci/checks/copyright.py) checks the Copyright header for all git-modified files +[copyright.py](https://github.com/rapidsai/raft/blob/branch-24.02/ci/checks/copyright.py) checks the Copyright header for all git-modified files Manually, you can run the following to bulk-fix the header if only the years need to be updated: ```bash @@ -229,7 +229,7 @@ Call CUDA APIs via the provided helper macros `RAFT_CUDA_TRY`, `RAFT_CUBLAS_TRY` ## Logging ### Introduction -Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. +Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.02/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. ### Usage ```cpp diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md index 68fe80f9ce..3789c3f01f 100644 --- a/docs/source/raft_ann_benchmarks.md +++ b/docs/source/raft_ann_benchmarks.md @@ -62,7 +62,7 @@ Nightly images are located in [dockerhub](https://hub.docker.com/r/rapidsai/raft - The following command pulls the nightly container for python version 10, cuda version 12, and RAFT version 23.10: ```bash -docker pull rapidsai/raft-ann-bench:24.04a-cuda12.0-py3.10 #substitute raft-ann-bench for the exact desired container. +docker pull rapidsai/raft-ann-bench:24.02a-cuda12.0-py3.10 #substitute raft-ann-bench for the exact desired container. ``` The CUDA and python versions can be changed for the supported values: @@ -83,7 +83,7 @@ You can see the exact versions as well in the dockerhub site: [//]: # () [//]: # (```bash) -[//]: # (docker pull nvcr.io/nvidia/rapidsai/raft-ann-bench:24.04-cuda11.8-py3.10 #substitute raft-ann-bench for the exact desired container.) +[//]: # (docker pull nvcr.io/nvidia/rapidsai/raft-ann-bench:24.02-cuda11.8-py3.10 #substitute raft-ann-bench for the exact desired container.) [//]: # (```) @@ -344,7 +344,7 @@ For GPU-enabled systems, the `DATA_FOLDER` variable should be a local folder whe export DATA_FOLDER=path/to/store/datasets/and/results docker run --gpus all --rm -it -u $(id -u) \ -v $DATA_FOLDER:/data/benchmarks \ - rapidsai/raft-ann-bench:24.04a-cuda11.8-py3.10 \ + rapidsai/raft-ann-bench:24.02a-cuda11.8-py3.10 \ "--dataset deep-image-96-angular" \ "--normalize" \ "--algorithms raft_cagra,raft_ivf_pq --batch-size 10 -k 10" \ @@ -355,7 +355,7 @@ Usage of the above command is as follows: | Argument | Description | |-----------------------------------------------------------|----------------------------------------------------------------------------------------------------| -| `rapidsai/raft-ann-bench:24.04a-cuda11.8-py3.10` | Image to use. Can be either `raft-ann-bench` or `raft-ann-bench-datasets` | +| `rapidsai/raft-ann-bench:24.02a-cuda11.8-py3.10` | Image to use. Can be either `raft-ann-bench` or `raft-ann-bench-datasets` | | `"--dataset deep-image-96-angular"` | Dataset name | | `"--normalize"` | Whether to normalize the dataset | | `"--algorithms raft_cagra,hnswlib --batch-size 10 -k 10"` | Arguments passed to the `run` script, such as the algorithms to benchmark, the batch size, and `k` | @@ -372,7 +372,7 @@ The container arguments in the above section also be used for the CPU-only conta export DATA_FOLDER=path/to/store/datasets/and/results docker run --rm -it -u $(id -u) \ -v $DATA_FOLDER:/data/benchmarks \ - rapidsai/raft-ann-bench-cpu:24.04a-py3.10 \ + rapidsai/raft-ann-bench-cpu:24.02a-py3.10 \ "--dataset deep-image-96-angular" \ "--normalize" \ "--algorithms hnswlib --batch-size 10 -k 10" \ @@ -389,7 +389,7 @@ docker run --gpus all --rm -it -u $(id -u) \ --entrypoint /bin/bash \ --workdir /data/benchmarks \ -v $DATA_FOLDER:/data/benchmarks \ - rapidsai/raft-ann-bench:24.04a-cuda11.8-py3.10 + rapidsai/raft-ann-bench:24.02a-cuda11.8-py3.10 ``` This will drop you into a command line in the container, with the `raft-ann-bench` python package ready to use, as described in the [Running the benchmarks](#running-the-benchmarks) section above: diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake index 1dca136c97..ca871c5759 100644 --- a/fetch_rapids.cmake +++ b/fetch_rapids.cmake @@ -12,7 +12,7 @@ # the License. # ============================================================================= if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) - file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.04/RAPIDS.cmake + file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.02/RAPIDS.cmake ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake ) endif() diff --git a/python/pylibraft/CMakeLists.txt b/python/pylibraft/CMakeLists.txt index c17243728e..f7e114ae66 100644 --- a/python/pylibraft/CMakeLists.txt +++ b/python/pylibraft/CMakeLists.txt @@ -16,7 +16,7 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) include(../../fetch_rapids.cmake) -set(pylibraft_version 24.04.00) +set(pylibraft_version 24.02.00) # We always need CUDA for pylibraft because the raft dependency brings in a header-only cuco # dependency that enables CUDA unconditionally. diff --git a/python/pylibraft/pyproject.toml b/python/pylibraft/pyproject.toml index ba45a6d33d..5070d6cf6f 100644 --- a/python/pylibraft/pyproject.toml +++ b/python/pylibraft/pyproject.toml @@ -19,7 +19,7 @@ requires = [ "cuda-python>=11.7.1,<12.0a0", "cython>=3.0.0", "ninja", - "rmm==24.4.*", + "rmm==24.2.*", "scikit-build-core[pyproject]>=0.7.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. build-backend = "scikit_build_core.build" @@ -37,7 +37,7 @@ requires-python = ">=3.9" dependencies = [ "cuda-python>=11.7.1,<12.0a0", "numpy>=1.21", - "rmm==24.4.*", + "rmm==24.2.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", diff --git a/python/raft-ann-bench/pyproject.toml b/python/raft-ann-bench/pyproject.toml index 4a185b22ca..e3ae36ef62 100644 --- a/python/raft-ann-bench/pyproject.toml +++ b/python/raft-ann-bench/pyproject.toml @@ -9,7 +9,7 @@ requires = [ [project] name = "raft-ann-bench" -version = "24.04.00" +version = "24.02.00" description = "RAFT ANN benchmarks" authors = [ { name = "NVIDIA Corporation" }, diff --git a/python/raft-dask/CMakeLists.txt b/python/raft-dask/CMakeLists.txt index ff441e343e..1d27e49583 100644 --- a/python/raft-dask/CMakeLists.txt +++ b/python/raft-dask/CMakeLists.txt @@ -14,7 +14,7 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) -set(raft_dask_version 24.04.00) +set(raft_dask_version 24.02.00) include(../../fetch_rapids.cmake) include(rapids-cuda) diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml index 33e0c15684..37d94be7f9 100644 --- a/python/raft-dask/pyproject.toml +++ b/python/raft-dask/pyproject.toml @@ -33,13 +33,13 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.9" dependencies = [ - "dask-cuda==24.4.*", + "dask-cuda==24.2.*", "joblib>=0.11", "numba>=0.57", "numpy>=1.21", - "pylibraft==24.4.*", - "rapids-dask-dependency==24.4.*", - "ucx-py==0.37.*", + "pylibraft==24.2.*", + "rapids-dask-dependency==24.2.*", + "ucx-py==0.36.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", From a70a745dbe3430b3d7f63823b65b0157a591eeff Mon Sep 17 00:00:00 2001 From: Rui Lan Date: Tue, 23 Jan 2024 17:34:25 -0800 Subject: [PATCH 09/14] Change to max point based subsampling. --- .../src/raft/raft_ann_bench_param_parser.h | 3 +++ .../raft/neighbors/detail/ivf_pq_build.cuh | 24 ++++++++++++------- cpp/include/raft/neighbors/ivf_pq_types.hpp | 8 +++++++ .../neighbors/ivf_pq/cpp/c_ivf_pq.pxd | 1 + .../pylibraft/neighbors/ivf_pq/ivf_pq.pyx | 16 ++++++++++++- 5 files changed, 43 insertions(+), 9 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h index 2a021a8a12..6324e11200 100644 --- a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h @@ -87,6 +87,9 @@ void parse_build_param(const nlohmann::json& conf, "', should be either 'cluster' or 'subspace'"); } } + if (conf.contains("max_train_points_per_pq_code")) { + param.max_train_points_per_pq_code = 1.0 / (double)conf.at("max_train_points_per_pq_code"); + } } template diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index cc94511fe7..2bf588df8c 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -353,14 +353,18 @@ void train_per_subset(raft::resources const& handle, const float* trainset, // [n_rows, dim] const uint32_t* labels, // [n_rows] uint32_t kmeans_n_iters, + uint32_t max_train_points_per_pq_code, rmm::mr::device_memory_resource* managed_memory) { auto stream = resource::get_cuda_stream(handle); auto device_memory = resource::get_workspace_resource(handle); rmm::device_uvector pq_centers_tmp(index.pq_centers().size(), stream, device_memory); - rmm::device_uvector sub_trainset(n_rows * size_t(index.pq_len()), stream, device_memory); - rmm::device_uvector sub_labels(n_rows, stream, device_memory); + // Subsampling the train set for codebook generation based on max_train_points_per_pq_code. + size_t big_enough = max_train_points_per_pq_code * size_t(index.pq_book_size()); + auto pq_n_rows = uint32_t(std::min(big_enough, n_rows)); + rmm::device_uvector sub_trainset(pq_n_rows * size_t(index.pq_len()), stream, device_memory); + rmm::device_uvector sub_labels(pq_n_rows, stream, device_memory); rmm::device_uvector pq_cluster_sizes(index.pq_book_size(), stream, device_memory); @@ -371,7 +375,7 @@ void train_per_subset(raft::resources const& handle, // Get the rotated cluster centers for each training vector. // This will be subtracted from the input vectors afterwards. utils::copy_selected( - n_rows, + pq_n_rows, index.pq_len(), index.centers_rot().data_handle() + index.pq_len() * j, labels, @@ -387,7 +391,7 @@ void train_per_subset(raft::resources const& handle, true, false, index.pq_len(), - n_rows, + pq_n_rows, index.dim(), &alpha, index.rotation_matrix().data_handle() + index.dim() * index.pq_len() * j, @@ -401,12 +405,12 @@ void train_per_subset(raft::resources const& handle, // train PQ codebook for this subspace auto sub_trainset_view = - raft::make_device_matrix_view(sub_trainset.data(), n_rows, index.pq_len()); + raft::make_device_matrix_view(sub_trainset.data(), pq_n_rows, index.pq_len()); auto centers_tmp_view = raft::make_device_matrix_view( pq_centers_tmp.data() + index.pq_book_size() * index.pq_len() * j, index.pq_book_size(), index.pq_len()); - auto sub_labels_view = raft::make_device_vector_view(sub_labels.data(), n_rows); + auto sub_labels_view = raft::make_device_vector_view(sub_labels.data(), pq_n_rows); auto cluster_sizes_view = raft::make_device_vector_view(pq_cluster_sizes.data(), index.pq_book_size()); raft::cluster::kmeans_balanced_params kmeans_params; @@ -430,6 +434,7 @@ void train_per_cluster(raft::resources const& handle, const float* trainset, // [n_rows, dim] const uint32_t* labels, // [n_rows] uint32_t kmeans_n_iters, + uint32_t max_train_points_per_pq_code, rmm::mr::device_memory_resource* managed_memory) { auto stream = resource::get_cuda_stream(handle); @@ -477,9 +482,10 @@ void train_per_cluster(raft::resources const& handle, indices + cluster_offsets[l], device_memory); - // limit the cluster size to bound the training time. + // limit the cluster size to bound the training time based on max_train_points_per_pq_code + // If pq_book_size is less than pq_dim, use max_train_points_per_pq_code per pq_dim instead // [sic] we interpret the data as pq_len-dimensional - size_t big_enough = 256ul * std::max(index.pq_book_size(), index.pq_dim()); + size_t big_enough = max_train_points_per_pq_code * std::max(index.pq_book_size(), index.pq_dim()); size_t available_rows = size_t(cluster_size) * size_t(index.pq_dim()); auto pq_n_rows = uint32_t(std::min(big_enough, available_rows)); // train PQ codebook for this cluster @@ -1788,6 +1794,7 @@ auto build(raft::resources const& handle, trainset.data_handle(), labels.data(), params.kmeans_n_iters, + params.max_train_points_per_pq_code, &managed_mr); break; case codebook_gen::PER_CLUSTER: @@ -1797,6 +1804,7 @@ auto build(raft::resources const& handle, trainset.data_handle(), labels.data(), params.kmeans_n_iters, + params.max_train_points_per_pq_code, &managed_mr); break; default: RAFT_FAIL("Unreachable code"); diff --git a/cpp/include/raft/neighbors/ivf_pq_types.hpp b/cpp/include/raft/neighbors/ivf_pq_types.hpp index 45ab18c84f..c3de6020c5 100644 --- a/cpp/include/raft/neighbors/ivf_pq_types.hpp +++ b/cpp/include/raft/neighbors/ivf_pq_types.hpp @@ -105,6 +105,14 @@ struct index_params : ann::index_params { * flag to `true` if you prefer to use as little GPU memory for the database as possible. */ bool conservative_memory_allocation = false; + /** + * The max number of data points to use per PQ code during PQ codebook training. Using more data + * points per PQ code may increase the quality of PQ codebook but may also increase the build time. + * The parameter is applied to both PQ codebook generation methods, i.e., PER_SUBSPACE and + * PER_CLUSTER. For PER_CLUSTER, max_train_points_per_pq_code per pq_dim numbers of points would be + * used if pq_dim is more than pq_book_size. + */ + uint32_t max_train_points_per_pq_code = 256; }; struct search_params : ann::search_params { diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd index 531c2428e9..8798b8bf60 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd @@ -78,6 +78,7 @@ cdef extern from "raft/neighbors/ivf_pq_types.hpp" \ codebook_gen codebook_kind bool force_random_rotation bool conservative_memory_allocation + uint32_t max_train_points_per_pq_code cdef cppclass index[IdxT](ann_index): index(const device_resources& handle, diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx index 0c1bbf6b9c..4ee1785ca2 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx @@ -156,6 +156,14 @@ cdef class IndexParams: repeated calls to `extend` (extending the database). To disable this behavior and use as little GPU memory for the database as possible, set this flat to `True`. + max_train_points_per_pq_code : int, default = 256 + The max number of data points to use per PQ code during PQ codebook + training. Using more data points per PQ code may increase the + quality of PQ codebook but may also increase the build time. The + parameter is applied to both PQ codebook generation methods, i.e., + PER_SUBSPACE and PER_CLUSTER. For PER_CLUSTER, + max_train_points_per_pq_code per pq_dim numbers of points would be + used if pq_dim is more than pq_book_size. """ def __init__(self, *, n_lists=1024, @@ -167,7 +175,8 @@ cdef class IndexParams: codebook_kind="subspace", force_random_rotation=False, add_data_on_build=True, - conservative_memory_allocation=False): + conservative_memory_allocation=False, + max_train_points_per_pq_code=256): self.params.n_lists = n_lists self.params.metric = _get_metric(metric) self.params.metric_arg = 0 @@ -185,6 +194,8 @@ cdef class IndexParams: self.params.add_data_on_build = add_data_on_build self.params.conservative_memory_allocation = \ conservative_memory_allocation + self.params.max_train_points_per_pq_code = \ + max_train_points_per_pq_code @property def n_lists(self): @@ -226,6 +237,9 @@ cdef class IndexParams: def conservative_memory_allocation(self): return self.params.conservative_memory_allocation + @property + def max_train_points_per_pq_code(self): + return self.params.max_train_points_per_pq_code cdef class Index: # We store a pointer to the index because it dose not have a trivial From f480c13bf4b23d6ed616791e41409b2d1a97b894 Mon Sep 17 00:00:00 2001 From: abc99lr Date: Tue, 23 Jan 2024 21:58:57 -0800 Subject: [PATCH 10/14] Add max_train_points_per_pq_code row in benchmark tuning guide. --- docs/source/ann_benchmarks_param_tuning.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md index afb4ed18ea..2620e67761 100644 --- a/docs/source/ann_benchmarks_param_tuning.md +++ b/docs/source/ann_benchmarks_param_tuning.md @@ -38,13 +38,13 @@ IVF-pq is an inverted-file index, which partitions the vectors into a series of | `pq_bits` | `build` | N | Positive Integer. [4-8] | 8 | Bit length of the vector element after quantization. | | `codebook_kind` | `build` | N | ["cluster", "subspace"] | "subspace" | Type of codebook. See the [API docs](https://docs.rapids.ai/api/raft/nightly/cpp_api/neighbors_ivf_pq/#_CPPv412codebook_gen) for more detail | | `dataset_memory_type` | `build` | N | ["device", "host", "mmap"] | "host" | What memory type should the dataset reside? | +| `max_train_points_per_pq_code` | `build` | N | Positive Number >=1 | 256 | Max number of data points per PQ code used for PQ code book creation. Depending on input dataset size, the data points could be less than what user specifies. | | `query_memory_type` | `search` | N | ["device", "host", "mmap"] | "device | What memory type should the queries reside? | | `nprobe` | `search` | Y | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. | | `internalDistanceDtype` | `search` | N | [`float`, `half`] | `half` | The precision to use for the distance computations. Lower precision can increase performance at the cost of accuracy. | | `smemLutDtype` | `search` | N | [`float`, `half`, `fp8`] | `half` | The precision to use for the lookup table in shared memory. Lower precision can increase performance at the cost of accuracy. | | `refine_ratio` | `search` | N| Positive Number >=1 | 1 | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors. | - ### `raft_cagra` CAGRA uses a graph-based index, which creates an intermediate, approximate kNN graph using IVF-PQ and then further refining and optimizing to create a final kNN graph. This kNN graph is used by CAGRA as an index for search. From c2b271599cdd22b4aebe7f29a032f417a4df54c6 Mon Sep 17 00:00:00 2001 From: abc99lr Date: Wed, 24 Jan 2024 11:18:06 -0800 Subject: [PATCH 11/14] Address comments. --- cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h | 2 +- cpp/include/raft/neighbors/ivf_pq_types.hpp | 4 ++-- docs/source/ann_benchmarks_param_tuning.md | 1 + python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx | 6 +++--- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h index 6324e11200..feef5ee995 100644 --- a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h @@ -88,7 +88,7 @@ void parse_build_param(const nlohmann::json& conf, } } if (conf.contains("max_train_points_per_pq_code")) { - param.max_train_points_per_pq_code = 1.0 / (double)conf.at("max_train_points_per_pq_code"); + param.max_train_points_per_pq_code = conf.at("max_train_points_per_pq_code"); } } diff --git a/cpp/include/raft/neighbors/ivf_pq_types.hpp b/cpp/include/raft/neighbors/ivf_pq_types.hpp index c3de6020c5..35fb5fac69 100644 --- a/cpp/include/raft/neighbors/ivf_pq_types.hpp +++ b/cpp/include/raft/neighbors/ivf_pq_types.hpp @@ -109,8 +109,8 @@ struct index_params : ann::index_params { * The max number of data points to use per PQ code during PQ codebook training. Using more data * points per PQ code may increase the quality of PQ codebook but may also increase the build time. * The parameter is applied to both PQ codebook generation methods, i.e., PER_SUBSPACE and - * PER_CLUSTER. For PER_CLUSTER, max_train_points_per_pq_code per pq_dim numbers of points would be - * used if pq_dim is more than pq_book_size. + * PER_CLUSTER. In both cases, we will use `pq_book_size * max_train_points_per_pq_code` training + * points to train each codebook. */ uint32_t max_train_points_per_pq_code = 256; }; diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md index 2620e67761..e003aa879c 100644 --- a/docs/source/ann_benchmarks_param_tuning.md +++ b/docs/source/ann_benchmarks_param_tuning.md @@ -45,6 +45,7 @@ IVF-pq is an inverted-file index, which partitions the vectors into a series of | `smemLutDtype` | `search` | N | [`float`, `half`, `fp8`] | `half` | The precision to use for the lookup table in shared memory. Lower precision can increase performance at the cost of accuracy. | | `refine_ratio` | `search` | N| Positive Number >=1 | 1 | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors. | + ### `raft_cagra` CAGRA uses a graph-based index, which creates an intermediate, approximate kNN graph using IVF-PQ and then further refining and optimizing to create a final kNN graph. This kNN graph is used by CAGRA as an index for search. diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx index 4ee1785ca2..eb9ecb1de2 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx @@ -161,9 +161,9 @@ cdef class IndexParams: training. Using more data points per PQ code may increase the quality of PQ codebook but may also increase the build time. The parameter is applied to both PQ codebook generation methods, i.e., - PER_SUBSPACE and PER_CLUSTER. For PER_CLUSTER, - max_train_points_per_pq_code per pq_dim numbers of points would be - used if pq_dim is more than pq_book_size. + PER_SUBSPACE and PER_CLUSTER. In both cases, we will use + pq_book_size * max_train_points_per_pq_code training points to + train each codebook. """ def __init__(self, *, n_lists=1024, From aa1a3e5a72cc531d32effd80d8e1dda7cbc0b645 Mon Sep 17 00:00:00 2001 From: abc99lr Date: Wed, 24 Jan 2024 13:15:13 -0800 Subject: [PATCH 12/14] Run format checker. From cc88715ed62b4bb7ba3c17bbd81b9c4f5e4034d4 Mon Sep 17 00:00:00 2001 From: abc99lr Date: Wed, 24 Jan 2024 14:53:16 -0800 Subject: [PATCH 13/14] Fix formatting issue. --- cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h | 2 +- cpp/include/raft/neighbors/ivf_pq_types.hpp | 2 +- python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd | 2 +- python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h index feef5ee995..ae40deb50c 100644 --- a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/neighbors/ivf_pq_types.hpp b/cpp/include/raft/neighbors/ivf_pq_types.hpp index 35fb5fac69..c8d1d18d8b 100644 --- a/cpp/include/raft/neighbors/ivf_pq_types.hpp +++ b/cpp/include/raft/neighbors/ivf_pq_types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd index 8798b8bf60..930c3245f1 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx index eb9ecb1de2..7081b65ce3 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 67205f92d57aaae6ecb2f6cd56c18ae460cd71c9 Mon Sep 17 00:00:00 2001 From: abc99lr Date: Wed, 24 Jan 2024 16:32:38 -0800 Subject: [PATCH 14/14] More format changes. --- .../raft/neighbors/detail/ivf_pq_build.cuh | 17 ++++++++++------- cpp/include/raft/neighbors/ivf_pq_types.hpp | 4 ++-- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index 2bf588df8c..0ef6cb13fb 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -361,9 +361,10 @@ void train_per_subset(raft::resources const& handle, rmm::device_uvector pq_centers_tmp(index.pq_centers().size(), stream, device_memory); // Subsampling the train set for codebook generation based on max_train_points_per_pq_code. - size_t big_enough = max_train_points_per_pq_code * size_t(index.pq_book_size()); - auto pq_n_rows = uint32_t(std::min(big_enough, n_rows)); - rmm::device_uvector sub_trainset(pq_n_rows * size_t(index.pq_len()), stream, device_memory); + size_t big_enough = max_train_points_per_pq_code * size_t(index.pq_book_size()); + auto pq_n_rows = uint32_t(std::min(big_enough, n_rows)); + rmm::device_uvector sub_trainset( + pq_n_rows * size_t(index.pq_len()), stream, device_memory); rmm::device_uvector sub_labels(pq_n_rows, stream, device_memory); rmm::device_uvector pq_cluster_sizes(index.pq_book_size(), stream, device_memory); @@ -404,13 +405,14 @@ void train_per_subset(raft::resources const& handle, stream); // train PQ codebook for this subspace - auto sub_trainset_view = - raft::make_device_matrix_view(sub_trainset.data(), pq_n_rows, index.pq_len()); + auto sub_trainset_view = raft::make_device_matrix_view( + sub_trainset.data(), pq_n_rows, index.pq_len()); auto centers_tmp_view = raft::make_device_matrix_view( pq_centers_tmp.data() + index.pq_book_size() * index.pq_len() * j, index.pq_book_size(), index.pq_len()); - auto sub_labels_view = raft::make_device_vector_view(sub_labels.data(), pq_n_rows); + auto sub_labels_view = + raft::make_device_vector_view(sub_labels.data(), pq_n_rows); auto cluster_sizes_view = raft::make_device_vector_view(pq_cluster_sizes.data(), index.pq_book_size()); raft::cluster::kmeans_balanced_params kmeans_params; @@ -485,7 +487,8 @@ void train_per_cluster(raft::resources const& handle, // limit the cluster size to bound the training time based on max_train_points_per_pq_code // If pq_book_size is less than pq_dim, use max_train_points_per_pq_code per pq_dim instead // [sic] we interpret the data as pq_len-dimensional - size_t big_enough = max_train_points_per_pq_code * std::max(index.pq_book_size(), index.pq_dim()); + size_t big_enough = + max_train_points_per_pq_code * std::max(index.pq_book_size(), index.pq_dim()); size_t available_rows = size_t(cluster_size) * size_t(index.pq_dim()); auto pq_n_rows = uint32_t(std::min(big_enough, available_rows)); // train PQ codebook for this cluster diff --git a/cpp/include/raft/neighbors/ivf_pq_types.hpp b/cpp/include/raft/neighbors/ivf_pq_types.hpp index c8d1d18d8b..04c2354fbc 100644 --- a/cpp/include/raft/neighbors/ivf_pq_types.hpp +++ b/cpp/include/raft/neighbors/ivf_pq_types.hpp @@ -107,8 +107,8 @@ struct index_params : ann::index_params { bool conservative_memory_allocation = false; /** * The max number of data points to use per PQ code during PQ codebook training. Using more data - * points per PQ code may increase the quality of PQ codebook but may also increase the build time. - * The parameter is applied to both PQ codebook generation methods, i.e., PER_SUBSPACE and + * points per PQ code may increase the quality of PQ codebook but may also increase the build + * time. The parameter is applied to both PQ codebook generation methods, i.e., PER_SUBSPACE and * PER_CLUSTER. In both cases, we will use `pq_book_size * max_train_points_per_pq_code` training * points to train each codebook. */