diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index f489cc62c6..35df378438 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -58,10 +58,6 @@ if(BUILD_CPU_ONLY)
   set(RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE OFF)
   set(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB OFF)
   set(RAFT_ANN_BENCH_USE_GGNN OFF)
-elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0.0)
-  # Disable faiss benchmarks on CUDA 12 since faiss is not yet CUDA 12-enabled.
-  # https://github.com/rapidsai/raft/issues/1627
-  set(RAFT_FAISS_ENABLE_GPU OFF)
 endif()
 
 set(RAFT_ANN_BENCH_USE_RAFT OFF)
diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
index 8762ccd1fe..185d54a0a3 100644
--- a/cpp/bench/ann/src/common/benchmark.hpp
+++ b/cpp/bench/ann/src/common/benchmark.hpp
@@ -459,8 +459,14 @@ void register_search(std::shared_ptr<const Dataset<T>> dataset,
                    */
                   ->MeasureProcessCPUTime()
                   ->UseRealTime();
-
-      if (metric_objective == Objective::THROUGHPUT) { b->ThreadRange(threads[0], threads[1]); }
+      if (metric_objective == Objective::THROUGHPUT) {
+        if (index.algo.find("faiss_gpu") != std::string::npos) {
+          log_warn(
+            "FAISS GPU does not work in throughput mode because the underlying "
+            "StandardGpuResources object is not thread-safe. This will cause unexpected results");
+        }
+        b->ThreadRange(threads[0], threads[1]);
+      }
     }
   }
 }
diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp
index 965522d929..234b33d80a 100644
--- a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp
+++ b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp
@@ -51,10 +51,10 @@ void parse_build_param(const nlohmann::json& conf,
 {
   parse_base_build_param<T>(conf, param);
   param.M = conf.at("M");
-  if (conf.contains("usePrecomputed")) {
-    param.usePrecomputed = conf.at("usePrecomputed");
+  if (conf.contains("use_precomputed_table")) {
+    param.use_precomputed_table = conf.at("use_precomputed_table");
   } else {
-    param.usePrecomputed = false;
+    param.use_precomputed_table = false;
   }
   if (conf.contains("bitsPerCode")) {
     param.bitsPerCode = conf.at("bitsPerCode");
diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h
index 3caca15b7f..c7ce4595b5 100644
--- a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h
+++ b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h
@@ -229,7 +229,7 @@ class FaissCpuIVFPQ : public FaissCpu<T> {
   struct BuildParam : public FaissCpu<T>::BuildParam {
     int M;
     int bitsPerCode;
-    bool usePrecomputed;
+    bool use_precomputed_table;
   };
 
   FaissCpuIVFPQ(Metric metric, int dim, const BuildParam& param) : FaissCpu<T>(metric, dim, param)
diff --git a/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu b/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu
index c5056cb364..b47c497e3d 100644
--- a/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu
+++ b/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu
@@ -45,6 +45,11 @@ void parse_build_param(const nlohmann::json& conf,
                        typename raft::bench::ann::FaissGpuIVFFlat<T>::BuildParam& param)
 {
   parse_base_build_param<T>(conf, param);
+  if (conf.contains("use_raft")) {
+    param.use_raft = conf.at("use_raft");
+  } else {
+    param.use_raft = false;
+  }
 }
 
 template <typename T>
@@ -63,6 +68,16 @@ void parse_build_param(const nlohmann::json& conf,
   } else {
     param.useFloat16 = false;
   }
+  if (conf.contains("use_raft")) {
+    param.use_raft = conf.at("use_raft");
+  } else {
+    param.use_raft = false;
+  }
+  if (conf.contains("bitsPerCode")) {
+    param.bitsPerCode = conf.at("bitsPerCode");
+  } else {
+    param.bitsPerCode = 8;
+  }
 }
 
 template <typename T>
@@ -160,5 +175,18 @@ REGISTER_ALGO_INSTANCE(std::uint8_t);
 
 #ifdef ANN_BENCH_BUILD_MAIN
 #include "../common/benchmark.hpp"
-int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
+int main(int argc, char** argv)
+{
+  rmm::mr::cuda_memory_resource cuda_mr;
+  // Construct a resource that uses a coalescing best-fit pool allocator
+  // and is initially sized to half of free device memory.
+  rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> pool_mr{
+    &cuda_mr, rmm::percent_of_free_device_memory(50)};
+  // Updates the current device resource pointer to `pool_mr`
+  auto old_mr = rmm::mr::set_current_device_resource(&pool_mr);
+  auto ret    = raft::bench::ann::run_main(argc, argv);
+  // Restores the current device resource pointer to its previous value
+  rmm::mr::set_current_device_resource(old_mr);
+  return ret;
+}
 #endif
diff --git a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h
index 2effe631e5..6955201c5d 100644
--- a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h
+++ b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h
@@ -17,15 +17,29 @@
 #define FAISS_WRAPPER_H_
 
 #include "../common/ann_types.hpp"
+#include "../raft/raft_ann_bench_utils.h"
 
+#include <raft/core/device_mdarray.hpp>
+#include <raft/core/device_resources.hpp>
+#include <raft/core/host_mdarray.hpp>
+#include <raft/core/host_mdspan.hpp>
 #include <raft/core/logger.hpp>
+#include <raft/core/resource/stream_view.hpp>
+#include <raft/distance/distance_types.hpp>
 #include <raft/util/cudart_utils.hpp>
 
+#include <raft_runtime/neighbors/refine.hpp>
+
+#include <rmm/cuda_device.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
+
 #include <faiss/IndexFlat.h>
 #include <faiss/IndexIVFFlat.h>
 #include <faiss/IndexIVFPQ.h>
 #include <faiss/IndexRefine.h>
 #include <faiss/IndexScalarQuantizer.h>
+#include <faiss/MetricType.h>
 #include <faiss/gpu/GpuIndexFlat.h>
 #include <faiss/gpu/GpuIndexIVFFlat.h>
 #include <faiss/gpu/GpuIndexIVFPQ.h>
@@ -43,7 +57,7 @@
 
 namespace {
 
-faiss::MetricType parse_metric_type(raft::bench::ann::Metric metric)
+faiss::MetricType parse_metric_faiss(raft::bench::ann::Metric metric)
 {
   if (metric == raft::bench::ann::Metric::kInnerProduct) {
     return faiss::METRIC_INNER_PRODUCT;
@@ -95,7 +109,7 @@ class FaissGpu : public ANN<T>, public AnnGPU {
   FaissGpu(Metric metric, int dim, const BuildParam& param)
     : ANN<T>(metric, dim),
       gpu_resource_{std::make_shared<faiss::gpu::StandardGpuResources>()},
-      metric_type_(parse_metric_type(metric)),
+      metric_type_(parse_metric_faiss(metric)),
       nlist_{param.nlist},
       training_sample_fraction_{1.0 / double(param.ratio)}
   {
@@ -127,7 +141,7 @@ class FaissGpu : public ANN<T>, public AnnGPU {
     AlgoProperty property;
     // to enable building big dataset which is larger than GPU memory
     property.dataset_memory_type = MemoryType::Host;
-    property.query_memory_type   = MemoryType::Host;
+    property.query_memory_type   = MemoryType::Device;
     return property;
   }
 
@@ -162,8 +176,10 @@ class FaissGpu : public ANN<T>, public AnnGPU {
   int device_;
   double training_sample_fraction_;
   std::shared_ptr<faiss::SearchParameters> search_params_;
+  std::shared_ptr<faiss::IndexRefineSearchParameters> refine_search_params_{nullptr};
   const T* dataset_;
   float refine_ratio_ = 1.0;
+  Objective metric_objective_;
 };
 
 template <typename T>
@@ -201,19 +217,65 @@ template <typename T>
 void FaissGpu<T>::search(
   const T* queries, int batch_size, int k, AnnBase::index_type* neighbors, float* distances) const
 {
+  ASSERT(Objective::LATENCY, "l2Knn: rowMajorIndex and rowMajorQuery should have same layout");
+  using IdxT = faiss::idx_t;
   static_assert(sizeof(size_t) == sizeof(faiss::idx_t),
                 "sizes of size_t and faiss::idx_t are different");
 
-  if (this->refine_ratio_ > 1.0) {
-    // TODO: FAISS changed their search APIs to accept the search parameters as a struct object
-    // but their refine API doesn't allow the struct to be passed in. Once this is fixed, we
-    // need to re-enable refinement below
-    // index_refine_->search(batch_size, queries, k, distances,
-    // reinterpret_cast<faiss::idx_t*>(neighbors), this->search_params_.get()); Related FAISS issue:
-    // https://github.com/facebookresearch/faiss/issues/3118
-    throw std::runtime_error(
-      "FAISS doesn't support refinement in their new APIs so this feature is disabled in the "
-      "benchmarks for the time being.");
+  if (refine_ratio_ > 1.0) {
+    if (raft::get_device_for_address(queries) >= 0) {
+      uint32_t k0        = static_cast<uint32_t>(refine_ratio_ * k);
+      auto distances_tmp = raft::make_device_matrix<float, IdxT>(
+        gpu_resource_->getRaftHandle(device_), batch_size, k0);
+      auto candidates =
+        raft::make_device_matrix<IdxT, IdxT>(gpu_resource_->getRaftHandle(device_), batch_size, k0);
+      index_->search(batch_size,
+                     queries,
+                     k0,
+                     distances_tmp.data_handle(),
+                     candidates.data_handle(),
+                     this->search_params_.get());
+
+      auto queries_host    = raft::make_host_matrix<T, IdxT>(batch_size, index_->d);
+      auto candidates_host = raft::make_host_matrix<IdxT, IdxT>(batch_size, k0);
+      auto neighbors_host  = raft::make_host_matrix<IdxT, IdxT>(batch_size, k);
+      auto distances_host  = raft::make_host_matrix<float, IdxT>(batch_size, k);
+      auto dataset_v       = raft::make_host_matrix_view<const T, faiss::idx_t>(
+        this->dataset_, index_->ntotal, index_->d);
+
+      raft::device_resources handle_ = gpu_resource_->getRaftHandle(device_);
+
+      raft::copy(queries_host.data_handle(), queries, queries_host.size(), handle_.get_stream());
+      raft::copy(candidates_host.data_handle(),
+                 candidates.data_handle(),
+                 candidates_host.size(),
+                 handle_.get_stream());
+
+      // wait for the queries to copy to host in 'stream`
+      handle_.sync_stream();
+
+      raft::runtime::neighbors::refine(handle_,
+                                       dataset_v,
+                                       queries_host.view(),
+                                       candidates_host.view(),
+                                       neighbors_host.view(),
+                                       distances_host.view(),
+                                       parse_metric_type(this->metric_));
+
+      raft::copy(neighbors,
+                 (size_t*)neighbors_host.data_handle(),
+                 neighbors_host.size(),
+                 handle_.get_stream());
+      raft::copy(
+        distances, distances_host.data_handle(), distances_host.size(), handle_.get_stream());
+    } else {
+      index_refine_->search(batch_size,
+                            queries,
+                            k,
+                            distances,
+                            reinterpret_cast<faiss::idx_t*>(neighbors),
+                            this->refine_search_params_.get());
+    }
   } else {
     index_->search(batch_size,
                    queries,
@@ -255,13 +317,16 @@ void FaissGpu<T>::load_(const std::string& file)
 template <typename T>
 class FaissGpuIVFFlat : public FaissGpu<T> {
  public:
-  using typename FaissGpu<T>::BuildParam;
+  struct BuildParam : public FaissGpu<T>::BuildParam {
+    bool use_raft;
+  };
 
   FaissGpuIVFFlat(Metric metric, int dim, const BuildParam& param) : FaissGpu<T>(metric, dim, param)
   {
     faiss::gpu::GpuIndexIVFFlatConfig config;
-    config.device = this->device_;
-    this->index_  = std::make_shared<faiss::gpu::GpuIndexIVFFlat>(
+    config.device   = this->device_;
+    config.use_raft = param.use_raft;
+    this->index_    = std::make_shared<faiss::gpu::GpuIndexIVFFlat>(
       this->gpu_resource_.get(), dim, param.nlist, this->metric_type_, config);
   }
 
@@ -295,6 +360,8 @@ class FaissGpuIVFPQ : public FaissGpu<T> {
     int M;
     bool useFloat16;
     bool usePrecomputed;
+    bool use_raft;
+    int bitsPerCode;
   };
 
   FaissGpuIVFPQ(Metric metric, int dim, const BuildParam& param) : FaissGpu<T>(metric, dim, param)
@@ -302,16 +369,17 @@ class FaissGpuIVFPQ : public FaissGpu<T> {
     faiss::gpu::GpuIndexIVFPQConfig config;
     config.useFloat16LookupTables = param.useFloat16;
     config.usePrecomputedTables   = param.usePrecomputed;
+    config.use_raft               = param.use_raft;
+    config.interleavedLayout      = param.use_raft;
     config.device                 = this->device_;
 
-    this->index_ =
-      std::make_shared<faiss::gpu::GpuIndexIVFPQ>(this->gpu_resource_.get(),
-                                                  dim,
-                                                  param.nlist,
-                                                  param.M,
-                                                  8,  // FAISS only supports bitsPerCode=8
-                                                  this->metric_type_,
-                                                  config);
+    this->index_ = std::make_shared<faiss::gpu::GpuIndexIVFPQ>(this->gpu_resource_.get(),
+                                                               dim,
+                                                               param.nlist,
+                                                               param.M,
+                                                               param.bitsPerCode,
+                                                               this->metric_type_,
+                                                               config);
   }
 
   void set_search_param(const typename FaissGpu<T>::AnnSearchParam& param) override
@@ -329,6 +397,11 @@ class FaissGpuIVFPQ : public FaissGpu<T> {
       this->index_refine_ =
         std::make_shared<faiss::IndexRefineFlat>(this->index_.get(), this->dataset_);
       this->index_refine_.get()->k_factor = search_param.refine_ratio;
+      faiss::IndexRefineSearchParameters faiss_refine_search_params;
+      faiss_refine_search_params.k_factor          = this->index_refine_.get()->k_factor;
+      faiss_refine_search_params.base_index_params = this->search_params_.get();
+      this->refine_search_params_ =
+        std::make_unique<faiss::IndexRefineSearchParameters>(faiss_refine_search_params);
     }
   }
 
@@ -385,6 +458,11 @@ class FaissGpuIVFSQ : public FaissGpu<T> {
       this->index_refine_ =
         std::make_shared<faiss::IndexRefineFlat>(this->index_.get(), this->dataset_);
       this->index_refine_.get()->k_factor = search_param.refine_ratio;
+      faiss::IndexRefineSearchParameters faiss_refine_search_params;
+      faiss_refine_search_params.k_factor          = this->index_refine_.get()->k_factor;
+      faiss_refine_search_params.base_index_params = this->search_params_.get();
+      this->refine_search_params_ =
+        std::make_unique<faiss::IndexRefineSearchParameters>(faiss_refine_search_params);
     }
   }
 
diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake
index 288da763bf..706b0c2f11 100644
--- a/cpp/cmake/thirdparty/get_faiss.cmake
+++ b/cpp/cmake/thirdparty/get_faiss.cmake
@@ -55,6 +55,7 @@ function(find_and_configure_faiss)
     EXCLUDE_FROM_ALL ${exclude}
     OPTIONS
     "FAISS_ENABLE_GPU ${PKG_ENABLE_GPU}"
+    "FAISS_ENABLE_RAFT ${PKG_ENABLE_GPU}"
     "FAISS_ENABLE_PYTHON OFF"
     "FAISS_OPT_LEVEL ${RAFT_FAISS_OPT_LEVEL}"
     "FAISS_USE_CUDA_TOOLKIT_STATIC ${CUDA_STATIC_RUNTIME}"
@@ -115,4 +116,4 @@ endfunction()
 find_and_configure_faiss(
   BUILD_STATIC_LIBS ${RAFT_USE_FAISS_STATIC}
   ENABLE_GPU ${RAFT_FAISS_ENABLE_GPU}
-)
+)
\ No newline at end of file
diff --git a/cpp/include/raft_runtime/neighbors/refine.hpp b/cpp/include/raft_runtime/neighbors/refine.hpp
index fba7d0fc0e..592c8be82b 100644
--- a/cpp/include/raft_runtime/neighbors/refine.hpp
+++ b/cpp/include/raft_runtime/neighbors/refine.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,8 +17,9 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/host_mdspan.hpp>
 #include <raft/core/resources.hpp>
-// #include <raft/core/host_mdspan.hpp>
+#include <raft/distance/distance_types.hpp>
 
 namespace raft::runtime::neighbors {
 
@@ -29,7 +30,7 @@ namespace raft::runtime::neighbors {
               raft::device_matrix_view<const IDX_T, int64_t, row_major> neighbor_candidates, \
               raft::device_matrix_view<IDX_T, int64_t, row_major> indices,                   \
               raft::device_matrix_view<float, int64_t, row_major> distances,                 \
-              distance::DistanceType metric);                                                \
+              raft::distance::DistanceType metric);                                          \
                                                                                              \
   void refine(raft::resources const& handle,                                                 \
               raft::host_matrix_view<const DATA_T, int64_t, row_major> dataset,              \
@@ -37,7 +38,7 @@ namespace raft::runtime::neighbors {
               raft::host_matrix_view<const IDX_T, int64_t, row_major> neighbor_candidates,   \
               raft::host_matrix_view<IDX_T, int64_t, row_major> indices,                     \
               raft::host_matrix_view<float, int64_t, row_major> distances,                   \
-              distance::DistanceType metric);
+              raft::distance::DistanceType metric);
 
 RAFT_INST_REFINE(int64_t, float);
 RAFT_INST_REFINE(int64_t, uint8_t);
diff --git a/docs/source/ann_benchmarks_build.md b/docs/source/ann_benchmarks_build.md
index 80730c5d68..56af8e555c 100644
--- a/docs/source/ann_benchmarks_build.md
+++ b/docs/source/ann_benchmarks_build.md
@@ -36,9 +36,12 @@ You can limit the algorithms that are built by providing a semicolon-delimited l
 ```
 
 Available targets to use with `--limit-bench-ann` are:
-- FAISS_IVF_FLAT_ANN_BENCH
-- FAISS_IVF_PQ_ANN_BENCH
-- FAISS_BFKNN_ANN_BENCH
+- FAISS_GPU_IVF_FLAT_ANN_BENCH
+- FAISS_GPU_IVF_PQ_ANN_BENCH
+- FAISS_CPU_IVF_FLAT_ANN_BENCH
+- FAISS_CPU_IVF_PQ_ANN_BENCH
+- FAISS_GPU_FLAT_ANN_BENCH
+- FAISS_CPU_FLAT_ANN_BENCH
 - GGNN_ANN_BENCH
 - HNSWLIB_ANN_BENCH
 - RAFT_CAGRA_ANN_BENCH
diff --git a/python/raft-ann-bench/src/raft_ann_bench/constraints/__init__.py b/python/raft-ann-bench/src/raft_ann_bench/constraints/__init__.py
index 2b7b2728fe..e94ee56c92 100644
--- a/python/raft-ann-bench/src/raft_ann_bench/constraints/__init__.py
+++ b/python/raft-ann-bench/src/raft_ann_bench/constraints/__init__.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -51,3 +51,27 @@ def raft_cagra_search_constraints(params, build_params, k, batch_size):
 def hnswlib_search_constraints(params, build_params, k, batch_size):
     if "ef" in params:
         return params["ef"] >= k
+
+
+def faiss_gpu_ivf_pq_build_constraints(params, dims):
+    ret = True
+    # M must be defined
+    ret = params["M"] <= dims and dims % params["M"] == 0
+    if "use_raft" in params and params["use_raft"]:
+        return ret
+    pq_bits = 8
+    if "bitsPerCode" in params:
+        pq_bits = params["bitsPerCode"]
+    lookup_table_size = 4
+    if "useFloat16" in params and params["useFloat16"]:
+        lookup_table_size = 2
+    # FAISS constraint to check if lookup table fits in shared memory
+    # for now hard code maximum shared memory per block to 49 kB (the value for A100 and V100)
+    return ret and lookup_table_size * params["M"] * (2**pq_bits) <= 49152
+
+
+def faiss_gpu_ivf_pq_search_constraints(params, build_params, k, batch_size):
+    ret = True
+    if "nlist" in build_params and "nprobe" in params:
+        ret = ret and build_params["nlist"] >= params["nprobe"]
+    return ret
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_flat.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_flat.yaml
new file mode 100644
index 0000000000..29c145f86d
--- /dev/null
+++ b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_flat.yaml
@@ -0,0 +1,10 @@
+name: faiss_cpu_ivf_flat
+groups:
+  base:
+    build:
+      nlist: [2048]
+      ratio: [10]
+      useFloat16: [False]
+    search:
+      nprobe: [1, 5, 10, 50, 100, 200]
+      refine_ratio: [1]
\ No newline at end of file
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_pq.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_pq.yaml
new file mode 100644
index 0000000000..a531ec8294
--- /dev/null
+++ b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_pq.yaml
@@ -0,0 +1,18 @@
+name: faiss_cpu_ivf_pq
+groups:
+  base:
+    build:
+      nlist: [1024, 2048, 4096, 8192]
+      M: [48, 32, 16]
+      ratio: [10]
+      bitsPerCode: [8, 6, 5, 4]
+    search:
+      nprobe: [1, 5, 10, 50, 100, 200]
+  large:
+    build:
+      nlist: [8192, 16384, 32768, 65536]
+      M: [48, 32, 16]
+      ratio: [10]
+      bitsPerCode: [8, 6, 5, 4]
+    search:
+      nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_flat.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_flat.yaml
index ed237becb3..e4abc35f5c 100644
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_flat.yaml
+++ b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_flat.yaml
@@ -3,8 +3,19 @@ groups:
   base:
     build:
       nlist: [2048]
-      ratio: [1, 4, 10]
-      useFloat16: [False]
+      ratio: [10]
+      useFloat16: [False, True]
+      use_raft: [False]
     search:
-      nprobe: [2048]
+      nprobe: [1, 5, 10, 50, 100, 200]
       refine_ratio: [1]
+groups:
+  baseraft:
+    build:
+      nlist: [2048]
+      ratio: [10]
+      useFloat16: [False, True]
+      use_raft: [True]
+    search:
+      nprobe: [1, 5, 10, 50, 100, 200]
+      refine_ratio: [1]
\ No newline at end of file
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_pq.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_pq.yaml
index 87c3afc727..7560ceaa9c 100644
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_pq.yaml
+++ b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_pq.yaml
@@ -1,12 +1,77 @@
 name: faiss_gpu_ivf_pq
+constraints:
+  build: raft-ann-bench.constraints.faiss_gpu_ivf_pq_build_constraints
+  search: raft-ann-bench.constraints.faiss_gpu_ivf_pq_search_constraints
 groups:
   base:
     build:
       nlist: [1024, 2048, 4096, 8192]
-      M: [8, 16]
-      ratio: [10, 25]
+      M: [64, 32, 16]
+      ratio: [10]
+      usePrecomputed: [False, True]
+      useFloat16: [False, True]
+      use_raft: [False]
+      bitsPerCode: [8]
+    search:
+      nprobe: [1, 5, 10, 50, 100, 200]
+      refine_ratio: [1, 2, 4]
+  baseraft:
+    build:
+      nlist: [1024, 2048, 4096, 8192]
+      M: [64, 32, 16]
+      ratio: [10]
       usePrecomputed: [False]
-      useFloat16: [False]
+      useFloat16: [False, True]
+      use_raft: [True]
+      bitsPerCode: [8, 6, 5, 4]
     search:
       nprobe: [1, 5, 10, 50, 100, 200]
+      refine_ratio: [1, 2, 4]
+  large:
+    build:
+      nlist: [8192, 16384, 32768, 65536]
+      M: [48, 32, 16]
+      ratio: [4]
+      usePrecomputed: [False, True]
+      useFloat16: [False, True]
+      use_raft: [False]
+      bitsPerCode: [8]
+    search:
+      nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
+      refine_ratio: [1, 2, 4]
+  largeraft:
+    build:
+      nlist: [8192, 16384, 32768, 65536]
+      M: [48, 32, 16]
+      ratio: [4]
+      usePrecomputed: [False]
+      useFloat16: [False, True]
+      use_raft: [True]
+      bitsPerCode: [8, 6, 5, 4]
+    search:
+      nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
+      refine_ratio: [1, 2, 4]
+  100M:
+    build:
+      nlist: [50000]
+      M: [48]
+      ratio: [10]
+      usePrecomputed: [False, True]
+      useFloat16: [False, True]
+      use_raft: [False]
+      bitsPerCode: [8]
+    search:
+      nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
+      refine_ratio: [1]
+  100Mraft:
+    build:
+      nlist: [50000]
+      M: [48]
+      ratio: [10]
+      usePrecomputed: [False, True]
+      useFloat16: [False, True]
+      use_raft: [True]
+      bitsPerCode: [8, 6, 5, 4]
+    search:
+      nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
       refine_ratio: [1]
\ No newline at end of file
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_ivf_pq.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_ivf_pq.yaml
index 7eaec2b77b..bcdcde42a2 100644
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_ivf_pq.yaml
+++ b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_ivf_pq.yaml
@@ -6,12 +6,36 @@ groups:
   base:
     build:
       nlist: [1024, 2048, 4096, 8192]
-      pq_dim: [64, 32]
+      pq_dim: [64, 32, 16]
       pq_bits: [8, 6, 5, 4]
-      ratio: [10, 25]
+      ratio: [10]
       niter: [25]
     search:
       nprobe: [1, 5, 10, 50, 100, 200]
       internalDistanceDtype: ["float"]
       smemLutDtype: ["float", "fp8", "half"]
-      refine_ratio: [1, 2, 4]
\ No newline at end of file
+      refine_ratio: [1, 2, 4]
+  large:
+    build:
+      nlist: [8192, 16384, 32768, 65536]
+      pq_dim: [48, 32, 16]
+      pq_bits: [8, 6, 5, 4]
+      ratio: [4]
+      niter: [20]
+    search:
+      nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
+      internalDistanceDtype: ["float"]
+      smemLutDtype: ["float", "fp8", "half"]
+      refine_ratio: [1, 2, 4]
+  100M:
+    build:
+      nlist: [50000]
+      pq_dim: [48]
+      pq_bits: [8, 6, 5, 4]
+      ratio: [10]
+      niter: [10]
+    search:
+      nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
+      internalDistanceDtype: ["float"]
+      smemLutDtype: ["float", "fp8", "half"]
+      refine_ratio: [1]