diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index f489cc62c6..35df378438 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -58,10 +58,6 @@ if(BUILD_CPU_ONLY) set(RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE OFF) set(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB OFF) set(RAFT_ANN_BENCH_USE_GGNN OFF) -elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0.0) - # Disable faiss benchmarks on CUDA 12 since faiss is not yet CUDA 12-enabled. - # https://github.com/rapidsai/raft/issues/1627 - set(RAFT_FAISS_ENABLE_GPU OFF) endif() set(RAFT_ANN_BENCH_USE_RAFT OFF) diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index 8762ccd1fe..185d54a0a3 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -459,8 +459,14 @@ void register_search(std::shared_ptr> dataset, */ ->MeasureProcessCPUTime() ->UseRealTime(); - - if (metric_objective == Objective::THROUGHPUT) { b->ThreadRange(threads[0], threads[1]); } + if (metric_objective == Objective::THROUGHPUT) { + if (index.algo.find("faiss_gpu") != std::string::npos) { + log_warn( + "FAISS GPU does not work in throughput mode because the underlying " + "StandardGpuResources object is not thread-safe. This will cause unexpected results"); + } + b->ThreadRange(threads[0], threads[1]); + } } } } diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp index 965522d929..234b33d80a 100644 --- a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp +++ b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp @@ -51,10 +51,10 @@ void parse_build_param(const nlohmann::json& conf, { parse_base_build_param(conf, param); param.M = conf.at("M"); - if (conf.contains("usePrecomputed")) { - param.usePrecomputed = conf.at("usePrecomputed"); + if (conf.contains("use_precomputed_table")) { + param.use_precomputed_table = conf.at("use_precomputed_table"); } else { - param.usePrecomputed = false; + param.use_precomputed_table = false; } if (conf.contains("bitsPerCode")) { param.bitsPerCode = conf.at("bitsPerCode"); diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h index 3caca15b7f..c7ce4595b5 100644 --- a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h @@ -229,7 +229,7 @@ class FaissCpuIVFPQ : public FaissCpu { struct BuildParam : public FaissCpu::BuildParam { int M; int bitsPerCode; - bool usePrecomputed; + bool use_precomputed_table; }; FaissCpuIVFPQ(Metric metric, int dim, const BuildParam& param) : FaissCpu(metric, dim, param) diff --git a/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu b/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu index c5056cb364..b47c497e3d 100644 --- a/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu +++ b/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu @@ -45,6 +45,11 @@ void parse_build_param(const nlohmann::json& conf, typename raft::bench::ann::FaissGpuIVFFlat::BuildParam& param) { parse_base_build_param(conf, param); + if (conf.contains("use_raft")) { + param.use_raft = conf.at("use_raft"); + } else { + param.use_raft = false; + } } template @@ -63,6 +68,16 @@ void parse_build_param(const nlohmann::json& conf, } else { param.useFloat16 = false; } + if (conf.contains("use_raft")) { + param.use_raft = conf.at("use_raft"); + } else { + param.use_raft = false; + } + if (conf.contains("bitsPerCode")) { + param.bitsPerCode = conf.at("bitsPerCode"); + } else { + param.bitsPerCode = 8; + } } template @@ -160,5 +175,18 @@ REGISTER_ALGO_INSTANCE(std::uint8_t); #ifdef ANN_BENCH_BUILD_MAIN #include "../common/benchmark.hpp" -int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); } +int main(int argc, char** argv) +{ + rmm::mr::cuda_memory_resource cuda_mr; + // Construct a resource that uses a coalescing best-fit pool allocator + // and is initially sized to half of free device memory. + rmm::mr::pool_memory_resource pool_mr{ + &cuda_mr, rmm::percent_of_free_device_memory(50)}; + // Updates the current device resource pointer to `pool_mr` + auto old_mr = rmm::mr::set_current_device_resource(&pool_mr); + auto ret = raft::bench::ann::run_main(argc, argv); + // Restores the current device resource pointer to its previous value + rmm::mr::set_current_device_resource(old_mr); + return ret; +} #endif diff --git a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h index 2effe631e5..6955201c5d 100644 --- a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h @@ -17,15 +17,29 @@ #define FAISS_WRAPPER_H_ #include "../common/ann_types.hpp" +#include "../raft/raft_ann_bench_utils.h" +#include +#include +#include +#include #include +#include +#include #include +#include + +#include +#include +#include + #include #include #include #include #include +#include #include #include #include @@ -43,7 +57,7 @@ namespace { -faiss::MetricType parse_metric_type(raft::bench::ann::Metric metric) +faiss::MetricType parse_metric_faiss(raft::bench::ann::Metric metric) { if (metric == raft::bench::ann::Metric::kInnerProduct) { return faiss::METRIC_INNER_PRODUCT; @@ -95,7 +109,7 @@ class FaissGpu : public ANN, public AnnGPU { FaissGpu(Metric metric, int dim, const BuildParam& param) : ANN(metric, dim), gpu_resource_{std::make_shared()}, - metric_type_(parse_metric_type(metric)), + metric_type_(parse_metric_faiss(metric)), nlist_{param.nlist}, training_sample_fraction_{1.0 / double(param.ratio)} { @@ -127,7 +141,7 @@ class FaissGpu : public ANN, public AnnGPU { AlgoProperty property; // to enable building big dataset which is larger than GPU memory property.dataset_memory_type = MemoryType::Host; - property.query_memory_type = MemoryType::Host; + property.query_memory_type = MemoryType::Device; return property; } @@ -162,8 +176,10 @@ class FaissGpu : public ANN, public AnnGPU { int device_; double training_sample_fraction_; std::shared_ptr search_params_; + std::shared_ptr refine_search_params_{nullptr}; const T* dataset_; float refine_ratio_ = 1.0; + Objective metric_objective_; }; template @@ -201,19 +217,65 @@ template void FaissGpu::search( const T* queries, int batch_size, int k, AnnBase::index_type* neighbors, float* distances) const { + ASSERT(Objective::LATENCY, "l2Knn: rowMajorIndex and rowMajorQuery should have same layout"); + using IdxT = faiss::idx_t; static_assert(sizeof(size_t) == sizeof(faiss::idx_t), "sizes of size_t and faiss::idx_t are different"); - if (this->refine_ratio_ > 1.0) { - // TODO: FAISS changed their search APIs to accept the search parameters as a struct object - // but their refine API doesn't allow the struct to be passed in. Once this is fixed, we - // need to re-enable refinement below - // index_refine_->search(batch_size, queries, k, distances, - // reinterpret_cast(neighbors), this->search_params_.get()); Related FAISS issue: - // https://github.com/facebookresearch/faiss/issues/3118 - throw std::runtime_error( - "FAISS doesn't support refinement in their new APIs so this feature is disabled in the " - "benchmarks for the time being."); + if (refine_ratio_ > 1.0) { + if (raft::get_device_for_address(queries) >= 0) { + uint32_t k0 = static_cast(refine_ratio_ * k); + auto distances_tmp = raft::make_device_matrix( + gpu_resource_->getRaftHandle(device_), batch_size, k0); + auto candidates = + raft::make_device_matrix(gpu_resource_->getRaftHandle(device_), batch_size, k0); + index_->search(batch_size, + queries, + k0, + distances_tmp.data_handle(), + candidates.data_handle(), + this->search_params_.get()); + + auto queries_host = raft::make_host_matrix(batch_size, index_->d); + auto candidates_host = raft::make_host_matrix(batch_size, k0); + auto neighbors_host = raft::make_host_matrix(batch_size, k); + auto distances_host = raft::make_host_matrix(batch_size, k); + auto dataset_v = raft::make_host_matrix_view( + this->dataset_, index_->ntotal, index_->d); + + raft::device_resources handle_ = gpu_resource_->getRaftHandle(device_); + + raft::copy(queries_host.data_handle(), queries, queries_host.size(), handle_.get_stream()); + raft::copy(candidates_host.data_handle(), + candidates.data_handle(), + candidates_host.size(), + handle_.get_stream()); + + // wait for the queries to copy to host in 'stream` + handle_.sync_stream(); + + raft::runtime::neighbors::refine(handle_, + dataset_v, + queries_host.view(), + candidates_host.view(), + neighbors_host.view(), + distances_host.view(), + parse_metric_type(this->metric_)); + + raft::copy(neighbors, + (size_t*)neighbors_host.data_handle(), + neighbors_host.size(), + handle_.get_stream()); + raft::copy( + distances, distances_host.data_handle(), distances_host.size(), handle_.get_stream()); + } else { + index_refine_->search(batch_size, + queries, + k, + distances, + reinterpret_cast(neighbors), + this->refine_search_params_.get()); + } } else { index_->search(batch_size, queries, @@ -255,13 +317,16 @@ void FaissGpu::load_(const std::string& file) template class FaissGpuIVFFlat : public FaissGpu { public: - using typename FaissGpu::BuildParam; + struct BuildParam : public FaissGpu::BuildParam { + bool use_raft; + }; FaissGpuIVFFlat(Metric metric, int dim, const BuildParam& param) : FaissGpu(metric, dim, param) { faiss::gpu::GpuIndexIVFFlatConfig config; - config.device = this->device_; - this->index_ = std::make_shared( + config.device = this->device_; + config.use_raft = param.use_raft; + this->index_ = std::make_shared( this->gpu_resource_.get(), dim, param.nlist, this->metric_type_, config); } @@ -295,6 +360,8 @@ class FaissGpuIVFPQ : public FaissGpu { int M; bool useFloat16; bool usePrecomputed; + bool use_raft; + int bitsPerCode; }; FaissGpuIVFPQ(Metric metric, int dim, const BuildParam& param) : FaissGpu(metric, dim, param) @@ -302,16 +369,17 @@ class FaissGpuIVFPQ : public FaissGpu { faiss::gpu::GpuIndexIVFPQConfig config; config.useFloat16LookupTables = param.useFloat16; config.usePrecomputedTables = param.usePrecomputed; + config.use_raft = param.use_raft; + config.interleavedLayout = param.use_raft; config.device = this->device_; - this->index_ = - std::make_shared(this->gpu_resource_.get(), - dim, - param.nlist, - param.M, - 8, // FAISS only supports bitsPerCode=8 - this->metric_type_, - config); + this->index_ = std::make_shared(this->gpu_resource_.get(), + dim, + param.nlist, + param.M, + param.bitsPerCode, + this->metric_type_, + config); } void set_search_param(const typename FaissGpu::AnnSearchParam& param) override @@ -329,6 +397,11 @@ class FaissGpuIVFPQ : public FaissGpu { this->index_refine_ = std::make_shared(this->index_.get(), this->dataset_); this->index_refine_.get()->k_factor = search_param.refine_ratio; + faiss::IndexRefineSearchParameters faiss_refine_search_params; + faiss_refine_search_params.k_factor = this->index_refine_.get()->k_factor; + faiss_refine_search_params.base_index_params = this->search_params_.get(); + this->refine_search_params_ = + std::make_unique(faiss_refine_search_params); } } @@ -385,6 +458,11 @@ class FaissGpuIVFSQ : public FaissGpu { this->index_refine_ = std::make_shared(this->index_.get(), this->dataset_); this->index_refine_.get()->k_factor = search_param.refine_ratio; + faiss::IndexRefineSearchParameters faiss_refine_search_params; + faiss_refine_search_params.k_factor = this->index_refine_.get()->k_factor; + faiss_refine_search_params.base_index_params = this->search_params_.get(); + this->refine_search_params_ = + std::make_unique(faiss_refine_search_params); } } diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake index 288da763bf..706b0c2f11 100644 --- a/cpp/cmake/thirdparty/get_faiss.cmake +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -55,6 +55,7 @@ function(find_and_configure_faiss) EXCLUDE_FROM_ALL ${exclude} OPTIONS "FAISS_ENABLE_GPU ${PKG_ENABLE_GPU}" + "FAISS_ENABLE_RAFT ${PKG_ENABLE_GPU}" "FAISS_ENABLE_PYTHON OFF" "FAISS_OPT_LEVEL ${RAFT_FAISS_OPT_LEVEL}" "FAISS_USE_CUDA_TOOLKIT_STATIC ${CUDA_STATIC_RUNTIME}" @@ -115,4 +116,4 @@ endfunction() find_and_configure_faiss( BUILD_STATIC_LIBS ${RAFT_USE_FAISS_STATIC} ENABLE_GPU ${RAFT_FAISS_ENABLE_GPU} -) +) \ No newline at end of file diff --git a/cpp/include/raft_runtime/neighbors/refine.hpp b/cpp/include/raft_runtime/neighbors/refine.hpp index fba7d0fc0e..592c8be82b 100644 --- a/cpp/include/raft_runtime/neighbors/refine.hpp +++ b/cpp/include/raft_runtime/neighbors/refine.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,8 +17,9 @@ #pragma once #include +#include #include -// #include +#include namespace raft::runtime::neighbors { @@ -29,7 +30,7 @@ namespace raft::runtime::neighbors { raft::device_matrix_view neighbor_candidates, \ raft::device_matrix_view indices, \ raft::device_matrix_view distances, \ - distance::DistanceType metric); \ + raft::distance::DistanceType metric); \ \ void refine(raft::resources const& handle, \ raft::host_matrix_view dataset, \ @@ -37,7 +38,7 @@ namespace raft::runtime::neighbors { raft::host_matrix_view neighbor_candidates, \ raft::host_matrix_view indices, \ raft::host_matrix_view distances, \ - distance::DistanceType metric); + raft::distance::DistanceType metric); RAFT_INST_REFINE(int64_t, float); RAFT_INST_REFINE(int64_t, uint8_t); diff --git a/docs/source/ann_benchmarks_build.md b/docs/source/ann_benchmarks_build.md index 80730c5d68..56af8e555c 100644 --- a/docs/source/ann_benchmarks_build.md +++ b/docs/source/ann_benchmarks_build.md @@ -36,9 +36,12 @@ You can limit the algorithms that are built by providing a semicolon-delimited l ``` Available targets to use with `--limit-bench-ann` are: -- FAISS_IVF_FLAT_ANN_BENCH -- FAISS_IVF_PQ_ANN_BENCH -- FAISS_BFKNN_ANN_BENCH +- FAISS_GPU_IVF_FLAT_ANN_BENCH +- FAISS_GPU_IVF_PQ_ANN_BENCH +- FAISS_CPU_IVF_FLAT_ANN_BENCH +- FAISS_CPU_IVF_PQ_ANN_BENCH +- FAISS_GPU_FLAT_ANN_BENCH +- FAISS_CPU_FLAT_ANN_BENCH - GGNN_ANN_BENCH - HNSWLIB_ANN_BENCH - RAFT_CAGRA_ANN_BENCH diff --git a/python/raft-ann-bench/src/raft_ann_bench/constraints/__init__.py b/python/raft-ann-bench/src/raft_ann_bench/constraints/__init__.py index 2b7b2728fe..e94ee56c92 100644 --- a/python/raft-ann-bench/src/raft_ann_bench/constraints/__init__.py +++ b/python/raft-ann-bench/src/raft_ann_bench/constraints/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -51,3 +51,27 @@ def raft_cagra_search_constraints(params, build_params, k, batch_size): def hnswlib_search_constraints(params, build_params, k, batch_size): if "ef" in params: return params["ef"] >= k + + +def faiss_gpu_ivf_pq_build_constraints(params, dims): + ret = True + # M must be defined + ret = params["M"] <= dims and dims % params["M"] == 0 + if "use_raft" in params and params["use_raft"]: + return ret + pq_bits = 8 + if "bitsPerCode" in params: + pq_bits = params["bitsPerCode"] + lookup_table_size = 4 + if "useFloat16" in params and params["useFloat16"]: + lookup_table_size = 2 + # FAISS constraint to check if lookup table fits in shared memory + # for now hard code maximum shared memory per block to 49 kB (the value for A100 and V100) + return ret and lookup_table_size * params["M"] * (2**pq_bits) <= 49152 + + +def faiss_gpu_ivf_pq_search_constraints(params, build_params, k, batch_size): + ret = True + if "nlist" in build_params and "nprobe" in params: + ret = ret and build_params["nlist"] >= params["nprobe"] + return ret diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_flat.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_flat.yaml new file mode 100644 index 0000000000..29c145f86d --- /dev/null +++ b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_flat.yaml @@ -0,0 +1,10 @@ +name: faiss_cpu_ivf_flat +groups: + base: + build: + nlist: [2048] + ratio: [10] + useFloat16: [False] + search: + nprobe: [1, 5, 10, 50, 100, 200] + refine_ratio: [1] \ No newline at end of file diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_pq.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_pq.yaml new file mode 100644 index 0000000000..a531ec8294 --- /dev/null +++ b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_pq.yaml @@ -0,0 +1,18 @@ +name: faiss_cpu_ivf_pq +groups: + base: + build: + nlist: [1024, 2048, 4096, 8192] + M: [48, 32, 16] + ratio: [10] + bitsPerCode: [8, 6, 5, 4] + search: + nprobe: [1, 5, 10, 50, 100, 200] + large: + build: + nlist: [8192, 16384, 32768, 65536] + M: [48, 32, 16] + ratio: [10] + bitsPerCode: [8, 6, 5, 4] + search: + nprobe: [20, 30, 40, 50, 100, 200, 500, 1000] diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_flat.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_flat.yaml index ed237becb3..e4abc35f5c 100644 --- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_flat.yaml +++ b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_flat.yaml @@ -3,8 +3,19 @@ groups: base: build: nlist: [2048] - ratio: [1, 4, 10] - useFloat16: [False] + ratio: [10] + useFloat16: [False, True] + use_raft: [False] search: - nprobe: [2048] + nprobe: [1, 5, 10, 50, 100, 200] refine_ratio: [1] +groups: + baseraft: + build: + nlist: [2048] + ratio: [10] + useFloat16: [False, True] + use_raft: [True] + search: + nprobe: [1, 5, 10, 50, 100, 200] + refine_ratio: [1] \ No newline at end of file diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_pq.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_pq.yaml index 87c3afc727..7560ceaa9c 100644 --- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_pq.yaml +++ b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_pq.yaml @@ -1,12 +1,77 @@ name: faiss_gpu_ivf_pq +constraints: + build: raft-ann-bench.constraints.faiss_gpu_ivf_pq_build_constraints + search: raft-ann-bench.constraints.faiss_gpu_ivf_pq_search_constraints groups: base: build: nlist: [1024, 2048, 4096, 8192] - M: [8, 16] - ratio: [10, 25] + M: [64, 32, 16] + ratio: [10] + usePrecomputed: [False, True] + useFloat16: [False, True] + use_raft: [False] + bitsPerCode: [8] + search: + nprobe: [1, 5, 10, 50, 100, 200] + refine_ratio: [1, 2, 4] + baseraft: + build: + nlist: [1024, 2048, 4096, 8192] + M: [64, 32, 16] + ratio: [10] usePrecomputed: [False] - useFloat16: [False] + useFloat16: [False, True] + use_raft: [True] + bitsPerCode: [8, 6, 5, 4] search: nprobe: [1, 5, 10, 50, 100, 200] + refine_ratio: [1, 2, 4] + large: + build: + nlist: [8192, 16384, 32768, 65536] + M: [48, 32, 16] + ratio: [4] + usePrecomputed: [False, True] + useFloat16: [False, True] + use_raft: [False] + bitsPerCode: [8] + search: + nprobe: [20, 30, 40, 50, 100, 200, 500, 1000] + refine_ratio: [1, 2, 4] + largeraft: + build: + nlist: [8192, 16384, 32768, 65536] + M: [48, 32, 16] + ratio: [4] + usePrecomputed: [False] + useFloat16: [False, True] + use_raft: [True] + bitsPerCode: [8, 6, 5, 4] + search: + nprobe: [20, 30, 40, 50, 100, 200, 500, 1000] + refine_ratio: [1, 2, 4] + 100M: + build: + nlist: [50000] + M: [48] + ratio: [10] + usePrecomputed: [False, True] + useFloat16: [False, True] + use_raft: [False] + bitsPerCode: [8] + search: + nprobe: [20, 30, 40, 50, 100, 200, 500, 1000] + refine_ratio: [1] + 100Mraft: + build: + nlist: [50000] + M: [48] + ratio: [10] + usePrecomputed: [False, True] + useFloat16: [False, True] + use_raft: [True] + bitsPerCode: [8, 6, 5, 4] + search: + nprobe: [20, 30, 40, 50, 100, 200, 500, 1000] refine_ratio: [1] \ No newline at end of file diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_ivf_pq.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_ivf_pq.yaml index 7eaec2b77b..bcdcde42a2 100644 --- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_ivf_pq.yaml +++ b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_ivf_pq.yaml @@ -6,12 +6,36 @@ groups: base: build: nlist: [1024, 2048, 4096, 8192] - pq_dim: [64, 32] + pq_dim: [64, 32, 16] pq_bits: [8, 6, 5, 4] - ratio: [10, 25] + ratio: [10] niter: [25] search: nprobe: [1, 5, 10, 50, 100, 200] internalDistanceDtype: ["float"] smemLutDtype: ["float", "fp8", "half"] - refine_ratio: [1, 2, 4] \ No newline at end of file + refine_ratio: [1, 2, 4] + large: + build: + nlist: [8192, 16384, 32768, 65536] + pq_dim: [48, 32, 16] + pq_bits: [8, 6, 5, 4] + ratio: [4] + niter: [20] + search: + nprobe: [20, 30, 40, 50, 100, 200, 500, 1000] + internalDistanceDtype: ["float"] + smemLutDtype: ["float", "fp8", "half"] + refine_ratio: [1, 2, 4] + 100M: + build: + nlist: [50000] + pq_dim: [48] + pq_bits: [8, 6, 5, 4] + ratio: [10] + niter: [10] + search: + nprobe: [20, 30, 40, 50, 100, 200, 500, 1000] + internalDistanceDtype: ["float"] + smemLutDtype: ["float", "fp8", "half"] + refine_ratio: [1]