diff --git a/cpp/bench/CMakeLists.txt b/cpp/bench/CMakeLists.txt
index 7a0f1d5201..51e1c41499 100644
--- a/cpp/bench/CMakeLists.txt
+++ b/cpp/bench/CMakeLists.txt
@@ -30,6 +30,7 @@ add_executable(${RAFT_CPP_BENCH_TARGET}
   bench/random/permute.cu
   bench/random/rng.cu
   bench/spatial/fused_l2_nn.cu
+  bench/spatial/knn.cu
   bench/spatial/selection.cu
   bench/main.cpp
 )
diff --git a/cpp/bench/common/benchmark.hpp b/cpp/bench/common/benchmark.hpp
index de34cf4f57..fb878a0c8d 100644
--- a/cpp/bench/common/benchmark.hpp
+++ b/cpp/bench/common/benchmark.hpp
@@ -40,7 +40,7 @@ struct using_pool_memory_res {
  private:
   rmm::mr::device_memory_resource* orig_res_;
   rmm::mr::cuda_memory_resource cuda_res_;
-  rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> pool_res_;
+  rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> pool_res_;
 
  public:
   using_pool_memory_res(size_t initial_size, size_t max_size)
@@ -115,13 +115,20 @@ class fixture {
     int device_id     = 0;
     RAFT_CUDA_TRY(cudaGetDevice(&device_id));
     RAFT_CUDA_TRY(cudaDeviceGetAttribute(&l2_cache_size, cudaDevAttrL2CacheSize, device_id));
-    scratch_buf_ = rmm::device_buffer(l2_cache_size, stream);
+    scratch_buf_ = rmm::device_buffer(l2_cache_size * 3, stream);
   }
 
   // every benchmark should be overriding this
   virtual void run_benchmark(::benchmark::State& state) = 0;
   virtual void generate_metrics(::benchmark::State& state) {}
 
+ protected:
+  /** The helper that writes zeroes to some buffer in GPU memory to flush the L2 cache.  */
+  void flush_L2_cache()
+  {
+    RAFT_CUDA_TRY(cudaMemsetAsync(scratch_buf_.data(), 0, scratch_buf_.size(), stream));
+  }
+
   /**
    * The helper to be used inside `run_benchmark`, to loop over the state and record time using the
    * cuda_event_timer.
@@ -130,9 +137,7 @@ class fixture {
   void loop_on_state(::benchmark::State& state, Lambda benchmark_func, bool flush_L2 = true)
   {
     for (auto _ : state) {
-      if (flush_L2) {
-        RAFT_CUDA_TRY(cudaMemsetAsync(scratch_buf_.data(), 0, scratch_buf_.size(), stream));
-      }
+      if (flush_L2) { flush_L2_cache(); }
       cuda_event_timer timer(state, stream);
       benchmark_func();
     }
@@ -147,9 +152,9 @@ class Fixture : public ::benchmark::Fixture {
 
  public:
   explicit Fixture(const std::string name, const Params&... params)
-    : ::benchmark::Fixture(), params_(params...)
+    : ::benchmark::Fixture(), params_(params...), name_(name)
   {
-    SetName(name.c_str());
+    SetName(name_.c_str());
   }
   Fixture() = delete;
 
@@ -165,6 +170,7 @@ class Fixture : public ::benchmark::Fixture {
  private:
   std::unique_ptr<Class> fixture_;
   std::tuple<Params...> params_;
+  const std::string name_;
 
  protected:
   void BenchmarkCase(State& state) override
diff --git a/cpp/bench/spatial/knn.cu b/cpp/bench/spatial/knn.cu
new file mode 100644
index 0000000000..72a1244269
--- /dev/null
+++ b/cpp/bench/spatial/knn.cu
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <common/benchmark.hpp>
+
+#include <raft/random/rng.cuh>
+#include <raft/spatial/knn/knn.cuh>
+#if defined RAFT_NN_COMPILED
+#include <raft/spatial/knn/specializations.cuh>
+#endif
+
+#include <rmm/mr/device/managed_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
+
+#include <rmm/mr/host/new_delete_resource.hpp>
+#include <rmm/mr/host/pinned_memory_resource.hpp>
+
+#include <optional>
+
+namespace raft::bench::spatial {
+
+struct params {
+  /** Size of the dataset. */
+  size_t n_samples;
+  /** Number of dimensions in the dataset. */
+  size_t n_dims;
+  /** The batch size -- number of KNN searches. */
+  size_t n_queries;
+  /** Number of nearest neighbours to find for every probe. */
+  size_t k;
+};
+
+auto operator<<(std::ostream& os, const params& p) -> std::ostream&
+{
+  os << p.n_samples << "#" << p.n_dims << "#" << p.n_queries << "#" << p.k;
+  return os;
+}
+
+enum class TransferStrategy { NO_COPY, COPY_PLAIN, COPY_PINNED, MAP_PINNED, MANAGED };
+enum class Scope { BUILD, SEARCH, BUILD_SEARCH };
+
+auto operator<<(std::ostream& os, const TransferStrategy& ts) -> std::ostream&
+{
+  switch (ts) {
+    case TransferStrategy::NO_COPY: os << "NO_COPY"; break;
+    case TransferStrategy::COPY_PLAIN: os << "COPY_PLAIN"; break;
+    case TransferStrategy::COPY_PINNED: os << "COPY_PINNED"; break;
+    case TransferStrategy::MAP_PINNED: os << "MAP_PINNED"; break;
+    case TransferStrategy::MANAGED: os << "MANAGED"; break;
+    default: os << "UNKNOWN";
+  }
+  return os;
+}
+
+auto operator<<(std::ostream& os, const Scope& s) -> std::ostream&
+{
+  switch (s) {
+    case Scope::BUILD: os << "BUILD"; break;
+    case Scope::SEARCH: os << "SEARCH"; break;
+    case Scope::BUILD_SEARCH: os << "BUILD_SEARCH"; break;
+    default: os << "UNKNOWN";
+  }
+  return os;
+}
+
+struct device_resource {
+ public:
+  explicit device_resource(bool managed) : managed_(managed)
+  {
+    if (managed_) {
+      res_ = new rmm::mr::managed_memory_resource();
+    } else {
+      res_ = rmm::mr::get_current_device_resource();
+    }
+  }
+
+  ~device_resource()
+  {
+    if (managed_) { delete res_; }
+  }
+
+  [[nodiscard]] auto get() const -> rmm::mr::device_memory_resource* { return res_; }
+
+ private:
+  const bool managed_;
+  rmm::mr::device_memory_resource* res_;
+};
+
+template <typename T>
+struct host_uvector {
+  host_uvector(size_t n, bool pinned) : n_(n)
+  {
+    if (pinned) {
+      res_ = new rmm::mr::pinned_memory_resource();
+    } else {
+      res_ = new rmm::mr::new_delete_resource();
+    }
+    arr_ = static_cast<T*>(res_->allocate(n_ * sizeof(T)));
+  }
+
+  ~host_uvector() noexcept
+  {
+    res_->deallocate(arr_, n_ * sizeof(T));
+    delete res_;
+  }
+
+  auto data() -> T* { return arr_; }
+  [[nodiscard]] auto size() const -> size_t { return n_; }
+
+ private:
+  rmm::mr::host_memory_resource* res_;
+  size_t n_;
+  T* arr_;
+};
+
+template <typename ValT, typename IdxT>
+struct brute_force_knn {
+  using dist_t = ValT;
+
+  ValT* index;
+  params ps;
+
+  brute_force_knn(const raft::handle_t& handle, const params& ps, const ValT* data)
+    : index(const_cast<ValT*>(data)), ps(ps)
+  {
+  }
+
+  void search(const raft::handle_t& handle,
+              const ValT* search_items,
+              dist_t* out_dists,
+              IdxT* out_idxs)
+  {
+    std::vector<ValT*> input{index};
+    std::vector<size_t> sizes{ps.n_samples};
+    raft::spatial::knn::brute_force_knn<IdxT, ValT, size_t>(handle,
+                                                            input,
+                                                            sizes,
+                                                            ps.n_dims,
+                                                            const_cast<ValT*>(search_items),
+                                                            ps.n_queries,
+                                                            out_idxs,
+                                                            out_dists,
+                                                            ps.k);
+  }
+};
+
+template <typename ValT, typename IdxT, typename ImplT>
+struct knn : public fixture {
+  explicit knn(const params& p, const TransferStrategy& strategy, const Scope& scope)
+    : params_(p),
+      strategy_(strategy),
+      scope_(scope),
+      dev_mem_res_(strategy == TransferStrategy::MANAGED),
+      data_host_(0),
+      search_items_(p.n_queries * p.n_dims, stream),
+      out_dists_(p.n_queries * p.k, stream),
+      out_idxs_(p.n_queries * p.k, stream)
+  {
+    raft::random::RngState state{42};
+    gen_data(state, search_items_, search_items_.size(), stream);
+    try {
+      size_t total_size = p.n_samples * p.n_dims;
+      data_host_.resize(total_size);
+      constexpr size_t kGenMinibatchSize = 1024 * 1024 * 1024;
+      rmm::device_uvector<ValT> d(std::min(kGenMinibatchSize, total_size), stream);
+      for (size_t offset = 0; offset < total_size; offset += kGenMinibatchSize) {
+        size_t actual_size = std::min(total_size - offset, kGenMinibatchSize);
+        gen_data(state, d, actual_size, stream);
+        copy(data_host_.data() + offset, d.data(), actual_size, stream);
+      }
+    } catch (std::bad_alloc& e) {
+      data_does_not_fit_ = true;
+    }
+  }
+
+  template <typename T>
+  void gen_data(raft::random::RngState& state,
+                rmm::device_uvector<T>& vec,
+                size_t n,
+                rmm::cuda_stream_view stream)
+  {
+    constexpr T kRangeMax = std::is_integral_v<T> ? std::numeric_limits<T>::max() : T(1);
+    constexpr T kRangeMin = std::is_integral_v<T> ? std::numeric_limits<T>::min() : T(-1);
+    if constexpr (std::is_integral_v<T>) {
+      raft::random::uniformInt(state, vec.data(), n, kRangeMin, kRangeMax, stream);
+    } else {
+      raft::random::uniform(state, vec.data(), n, kRangeMin, kRangeMax, stream);
+    }
+  }
+
+  void run_benchmark(::benchmark::State& state) override
+  {
+    if (data_does_not_fit_) {
+      state.SkipWithError("The data size is too big to fit into the host memory.");
+    }
+    if (scope_ == Scope::SEARCH && strategy_ != TransferStrategy::NO_COPY) {
+      state.SkipWithError(
+        "When benchmarking without index building (Scope::SEARCH), the data must be already on the "
+        "device (TransferStrategy::NO_COPY)");
+    }
+
+    using_pool_memory_res default_resource;
+
+    try {
+      std::ostringstream label_stream;
+      label_stream << params_ << "#" << strategy_ << "#" << scope_;
+      state.SetLabel(label_stream.str());
+      raft::handle_t handle(stream);
+      std::optional<ImplT> index;
+
+      if (scope_ == Scope::SEARCH) {  // also implies TransferStrategy::NO_COPY
+        rmm::device_uvector<ValT> data(data_host_.size(), stream);
+        copy(data.data(), data_host_.data(), data_host_.size(), stream);
+        index.emplace(handle, params_, data.data());
+        stream.synchronize();
+      }
+
+      // benchmark loop
+      for (auto _ : state) {
+        // managed or plain device memory initialized anew every time
+        rmm::device_uvector<ValT> data(data_host_.size(), stream, dev_mem_res_.get());
+        ValT* data_ptr         = data.data();
+        size_t allocation_size = data_host_.size() * sizeof(ValT);
+
+        // Non-benchmarked part: using different methods to copy the data if necessary
+        switch (strategy_) {
+          case TransferStrategy::NO_COPY:  // copy data to GPU before starting the timer.
+            copy(data_ptr, data_host_.data(), data_host_.size(), stream);
+            break;
+          case TransferStrategy::COPY_PINNED:
+            RAFT_CUDA_TRY(
+              cudaHostRegister(data_host_.data(), allocation_size, cudaHostRegisterDefault));
+            break;
+          case TransferStrategy::MAP_PINNED:
+            RAFT_CUDA_TRY(
+              cudaHostRegister(data_host_.data(), allocation_size, cudaHostRegisterMapped));
+            RAFT_CUDA_TRY(cudaHostGetDevicePointer(&data_ptr, data_host_.data(), 0));
+            break;
+          case TransferStrategy::MANAGED:  // sic! using std::memcpy rather than cuda copy
+            CUDA_CHECK(cudaMemAdvise(
+              data_ptr, allocation_size, cudaMemAdviseSetPreferredLocation, handle.get_device()));
+            CUDA_CHECK(cudaMemAdvise(
+              data_ptr, allocation_size, cudaMemAdviseSetAccessedBy, handle.get_device()));
+            CUDA_CHECK(cudaMemAdvise(
+              data_ptr, allocation_size, cudaMemAdviseSetReadMostly, handle.get_device()));
+            std::memcpy(data_ptr, data_host_.data(), allocation_size);
+            break;
+          default: break;
+        }
+
+        flush_L2_cache();
+        {
+          // Timer synchronizes the stream, so all prior gpu work should be done before it sets off.
+          cuda_event_timer timer(state, stream);
+          switch (strategy_) {
+            case TransferStrategy::COPY_PLAIN:
+            case TransferStrategy::COPY_PINNED:
+              copy(data_ptr, data_host_.data(), data_host_.size(), stream);
+            default: break;
+          }
+
+          if (scope_ != Scope::SEARCH) { index.emplace(handle, params_, data_ptr); }
+          if (scope_ != Scope::BUILD) {
+            index->search(handle, search_items_.data(), out_dists_.data(), out_idxs_.data());
+          }
+        }
+
+        if (scope_ != Scope::SEARCH) { index.reset(); }
+
+        switch (strategy_) {
+          case TransferStrategy::COPY_PINNED:
+          case TransferStrategy::MAP_PINNED:
+            RAFT_CUDA_TRY(cudaHostUnregister(data_host_.data()));
+            break;
+          default: break;
+        }
+      }
+    } catch (raft::exception& e) {
+      state.SkipWithError(e.what());
+    } catch (std::bad_alloc& e) {
+      state.SkipWithError(e.what());
+    }
+  }
+
+ private:
+  const params params_;
+  const TransferStrategy strategy_;
+  const Scope scope_;
+  device_resource dev_mem_res_;
+  bool data_does_not_fit_ = false;
+
+  std::vector<ValT> data_host_;
+  rmm::device_uvector<ValT> search_items_;
+  rmm::device_uvector<typename ImplT::dist_t> out_dists_;
+  rmm::device_uvector<IdxT> out_idxs_;
+};
+
+const std::vector<params> kInputs{
+  {2000000, 128, 1000, 32}, {10000000, 128, 1000, 32}, {10000, 8192, 1000, 32}};
+
+const std::vector<TransferStrategy> kAllStrategies{
+  TransferStrategy::NO_COPY, TransferStrategy::MAP_PINNED, TransferStrategy::MANAGED};
+const std::vector<TransferStrategy> kNoCopyOnly{TransferStrategy::NO_COPY};
+
+const std::vector<Scope> kScopeFull{Scope::BUILD_SEARCH};
+const std::vector<Scope> kAllScopes{Scope::BUILD_SEARCH, Scope::SEARCH, Scope::BUILD};
+
+#define KNN_REGISTER(ValT, IdxT, ImplT, inputs, strats, scope)                   \
+  namespace BENCHMARK_PRIVATE_NAME(knn)                                          \
+  {                                                                              \
+    using KNN = knn<ValT, IdxT, ImplT<ValT, IdxT>>;                              \
+    RAFT_BENCH_REGISTER(KNN, #ValT "/" #IdxT "/" #ImplT, inputs, strats, scope); \
+  }
+
+KNN_REGISTER(float, int64_t, brute_force_knn, kInputs, kAllStrategies, kScopeFull);
+
+KNN_REGISTER(float, uint32_t, brute_force_knn, kInputs, kNoCopyOnly, kScopeFull);
+
+}  // namespace raft::bench::spatial
diff --git a/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh b/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh
index c2d89aae7d..5d703bdb8d 100644
--- a/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh
+++ b/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh
@@ -72,7 +72,11 @@ __global__ void haversine_knn_kernel(value_idx* out_inds,
 
   faiss::gpu::
     BlockSelect<value_t, value_idx, false, faiss::gpu::Comparator<value_t>, warp_q, thread_q, tpb>
-      heap(faiss::gpu::Limits<value_t>::getMax(), -1, smemK, smemV, k);
+      heap(faiss::gpu::Limits<value_t>::getMax(),
+           std::numeric_limits<value_idx>::max(),
+           smemK,
+           smemV,
+           k);
 
   // Grid is exactly sized to rows available
   int limit = faiss::gpu::utils::roundDown(n_index_rows, faiss::gpu::kWarpSize);
diff --git a/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh b/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh
index 196124352a..f78ffa84e1 100644
--- a/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh
+++ b/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh
@@ -268,11 +268,11 @@ void brute_force_knn_impl(
   int device;
   RAFT_CUDA_TRY(cudaGetDevice(&device));
 
-  rmm::device_uvector<std::int64_t> trans(id_ranges->size(), userStream);
+  rmm::device_uvector<IdxType> trans(id_ranges->size(), userStream);
   raft::update_device(trans.data(), id_ranges->data(), id_ranges->size(), userStream);
 
   rmm::device_uvector<value_t> all_D(0, userStream);
-  rmm::device_uvector<std::int64_t> all_I(0, userStream);
+  rmm::device_uvector<IdxType> all_I(0, userStream);
 
   value_t* out_D = res_D;
   IdxType* out_I = res_I;
@@ -342,6 +342,8 @@ void brute_force_knn_impl(
           args.numQueries      = n;
           args.outDistances    = out_d_ptr;
           args.outIndices      = out_i_ptr;
+          args.outIndicesType  = sizeof(IdxType) == 4 ? faiss::gpu::IndicesDataType::I32
+                                                      : faiss::gpu::IndicesDataType::I64;
 
           /**
            * @todo: Until FAISS supports pluggable allocation strategies,
diff --git a/cpp/include/raft/spatial/knn/specializations/knn.cuh b/cpp/include/raft/spatial/knn/specializations/knn.cuh
index 6cf2418d29..bbbbf67d71 100644
--- a/cpp/include/raft/spatial/knn/specializations/knn.cuh
+++ b/cpp/include/raft/spatial/knn/specializations/knn.cuh
@@ -50,6 +50,38 @@ extern template void brute_force_knn<long, float, unsigned int>(raft::handle_t c
                                                                 std::vector<long>* translations,
                                                                 distance::DistanceType metric,
                                                                 float metric_arg);
+
+extern template void brute_force_knn<uint32_t, float, int>(raft::handle_t const& handle,
+                                                           std::vector<float*>& input,
+                                                           std::vector<int>& sizes,
+                                                           int D,
+                                                           float* search_items,
+                                                           int n,
+                                                           uint32_t* res_I,
+                                                           float* res_D,
+                                                           int k,
+                                                           bool rowMajorIndex,
+                                                           bool rowMajorQuery,
+                                                           std::vector<uint32_t>* translations,
+                                                           distance::DistanceType metric,
+                                                           float metric_arg);
+
+extern template void brute_force_knn<uint32_t, float, unsigned int>(
+  raft::handle_t const& handle,
+  std::vector<float*>& input,
+  std::vector<unsigned int>& sizes,
+  unsigned int D,
+  float* search_items,
+  unsigned int n,
+  uint32_t* res_I,
+  float* res_D,
+  unsigned int k,
+  bool rowMajorIndex,
+  bool rowMajorQuery,
+  std::vector<uint32_t>* translations,
+  distance::DistanceType metric,
+  float metric_arg);
+
 };  // namespace knn
 };  // namespace spatial
 };  // namespace raft
diff --git a/cpp/src/nn/specializations/knn.cu b/cpp/src/nn/specializations/knn.cu
index bb59e5b2ba..4e0a821c24 100644
--- a/cpp/src/nn/specializations/knn.cu
+++ b/cpp/src/nn/specializations/knn.cu
@@ -51,6 +51,36 @@ template void brute_force_knn<long, float, unsigned int>(raft::handle_t const& h
                                                          distance::DistanceType metric,
                                                          float metric_arg);
 
+template void brute_force_knn<uint32_t, float, int>(raft::handle_t const& handle,
+                                                    std::vector<float*>& input,
+                                                    std::vector<int>& sizes,
+                                                    int D,
+                                                    float* search_items,
+                                                    int n,
+                                                    uint32_t* res_I,
+                                                    float* res_D,
+                                                    int k,
+                                                    bool rowMajorIndex,
+                                                    bool rowMajorQuery,
+                                                    std::vector<uint32_t>* translations,
+                                                    distance::DistanceType metric,
+                                                    float metric_arg);
+
+template void brute_force_knn<uint32_t, float, unsigned int>(raft::handle_t const& handle,
+                                                             std::vector<float*>& input,
+                                                             std::vector<unsigned int>& sizes,
+                                                             unsigned int D,
+                                                             float* search_items,
+                                                             unsigned int n,
+                                                             uint32_t* res_I,
+                                                             float* res_D,
+                                                             unsigned int k,
+                                                             bool rowMajorIndex,
+                                                             bool rowMajorQuery,
+                                                             std::vector<uint32_t>* translations,
+                                                             distance::DistanceType metric,
+                                                             float metric_arg);
+
 };  // namespace knn
 };  // namespace spatial
 };  // namespace raft
diff --git a/cpp/test/spatial/knn.cu b/cpp/test/spatial/knn.cu
index bf13288f48..37e0edb6ab 100644
--- a/cpp/test/spatial/knn.cu
+++ b/cpp/test/spatial/knn.cu
@@ -16,8 +16,8 @@
 
 #include "../test_utils.h"
 
+#include <raft/core/logger.hpp>
 #include <raft/distance/distance_type.hpp>
-
 #include <raft/spatial/knn/knn.cuh>
 #if defined RAFT_NN_COMPILED
 #include <raft/spatial/knn/specializations.cuh>
@@ -40,8 +40,9 @@ struct KNNInputs {
   std::vector<int> labels;
 };
 
+template <typename IdxT>
 __global__ void build_actual_output(
-  int* output, int n_rows, int k, const int* idx_labels, const int64_t* indices)
+  int* output, int n_rows, int k, const int* idx_labels, const IdxT* indices)
 {
   int element = threadIdx.x + blockDim.x * blockIdx.x;
   if (element >= n_rows * k) return;
@@ -60,7 +61,7 @@ __global__ void build_expected_output(int* output, int n_rows, int k, const int*
   }
 }
 
-template <typename T>
+template <typename T, typename IdxT>
 class KNNTest : public ::testing::TestWithParam<KNNInputs> {
  public:
   KNNTest()
@@ -79,9 +80,11 @@ class KNNTest : public ::testing::TestWithParam<KNNInputs> {
  protected:
   void testBruteForce()
   {
+#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_DEBUG)
     raft::print_device_vector("Input array: ", input_.data(), rows_ * cols_, std::cout);
-    std::cout << "K: " << k_ << "\n";
+    std::cout << "K: " << k_ << std::endl;
     raft::print_device_vector("Labels array: ", search_labels_.data(), rows_, std::cout);
+#endif
 
     std::vector<float*> input_vec;
     std::vector<int> sizes_vec;
@@ -131,7 +134,7 @@ class KNNTest : public ::testing::TestWithParam<KNNInputs> {
     RAFT_CUDA_TRY(cudaMemsetAsync(input_.data(), 0, input_.size() * sizeof(float), stream));
     RAFT_CUDA_TRY(
       cudaMemsetAsync(search_data_.data(), 0, search_data_.size() * sizeof(float), stream));
-    RAFT_CUDA_TRY(cudaMemsetAsync(indices_.data(), 0, indices_.size() * sizeof(int64_t), stream));
+    RAFT_CUDA_TRY(cudaMemsetAsync(indices_.data(), 0, indices_.size() * sizeof(IdxT), stream));
     RAFT_CUDA_TRY(cudaMemsetAsync(distances_.data(), 0, distances_.size() * sizeof(float), stream));
     RAFT_CUDA_TRY(
       cudaMemsetAsync(search_labels_.data(), 0, search_labels_.size() * sizeof(int), stream));
@@ -165,7 +168,7 @@ class KNNTest : public ::testing::TestWithParam<KNNInputs> {
   int cols_;
   rmm::device_uvector<float> input_;
   rmm::device_uvector<float> search_data_;
-  rmm::device_uvector<int64_t> indices_;
+  rmm::device_uvector<IdxT> indices_;
   rmm::device_uvector<float> distances_;
   int k_;
 
@@ -191,10 +194,13 @@ const std::vector<KNNInputs> inputs = {
    2,
    {0, 0, 0, 0, 0, 1, 1, 1, 1, 1}}};
 
-typedef KNNTest<float> KNNTestF;
-TEST_P(KNNTestF, BruteForce) { this->testBruteForce(); }
+typedef KNNTest<float, int64_t> KNNTestFint64_t;
+TEST_P(KNNTestFint64_t, BruteForce) { this->testBruteForce(); }
+typedef KNNTest<float, uint32_t> KNNTestFuint32_t;
+TEST_P(KNNTestFuint32_t, BruteForce) { this->testBruteForce(); }
 
-INSTANTIATE_TEST_CASE_P(KNNTest, KNNTestF, ::testing::ValuesIn(inputs));
+INSTANTIATE_TEST_CASE_P(KNNTest, KNNTestFint64_t, ::testing::ValuesIn(inputs));
+INSTANTIATE_TEST_CASE_P(KNNTest, KNNTestFuint32_t, ::testing::ValuesIn(inputs));
 
 }  // namespace knn
 }  // namespace spatial