diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 6f0afa4eb0..af08a1a2a4 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -332,6 +332,21 @@ if(RAFT_COMPILE_DIST_LIBRARY)
     src/distance/specializations/fused_l2_nn_double_int64.cu
     src/distance/specializations/fused_l2_nn_float_int.cu
     src/distance/specializations/fused_l2_nn_float_int64.cu
+    src/nn/specializations/detail/ivfpq_build.cu
+    src/nn/specializations/detail/ivfpq_compute_similarity_float_fast.cu
+    src/nn/specializations/detail/ivfpq_compute_similarity_float_no_basediff.cu
+    src/nn/specializations/detail/ivfpq_compute_similarity_float_no_smem_lut.cu
+    src/nn/specializations/detail/ivfpq_compute_similarity_fp8s_fast.cu
+    src/nn/specializations/detail/ivfpq_compute_similarity_fp8s_no_basediff.cu
+    src/nn/specializations/detail/ivfpq_compute_similarity_fp8s_no_smem_lut.cu
+    src/nn/specializations/detail/ivfpq_compute_similarity_fp8u_fast.cu
+    src/nn/specializations/detail/ivfpq_compute_similarity_fp8u_no_basediff.cu
+    src/nn/specializations/detail/ivfpq_compute_similarity_fp8u_no_smem_lut.cu
+    src/nn/specializations/detail/ivfpq_compute_similarity_half_fast.cu
+    src/nn/specializations/detail/ivfpq_compute_similarity_half_no_basediff.cu
+    src/nn/specializations/detail/ivfpq_compute_similarity_half_no_smem_lut.cu
+    src/nn/specializations/detail/ivfpq_search.cu
+    src/nn/specializations/detail/ivfpq_search_float_uint64_t.cu
     src/random/specializations/rmat_rectangular_generator_int_double.cu
     src/random/specializations/rmat_rectangular_generator_int64_double.cu
     src/random/specializations/rmat_rectangular_generator_int_float.cu
@@ -400,6 +415,8 @@ if(RAFT_COMPILE_NN_LIBRARY)
     src/nn/specializations/detail/ivfpq_compute_similarity_half_fast.cu
     src/nn/specializations/detail/ivfpq_compute_similarity_half_no_basediff.cu
     src/nn/specializations/detail/ivfpq_compute_similarity_half_no_smem_lut.cu
+    src/nn/specializations/detail/ivfpq_build.cu
+    src/nn/specializations/detail/ivfpq_search.cu
     src/nn/specializations/detail/ivfpq_search_float_int64_t.cu
     src/nn/specializations/detail/ivfpq_search_float_uint32_t.cu
     src/nn/specializations/detail/ivfpq_search_float_uint64_t.cu
diff --git a/cpp/include/raft/neighbors/specializations/ivf_pq_specialization.hpp b/cpp/include/raft/neighbors/specializations/ivf_pq_specialization.hpp
new file mode 100644
index 0000000000..2bce997e18
--- /dev/null
+++ b/cpp/include/raft/neighbors/specializations/ivf_pq_specialization.hpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/neighbors/ivf_pq_types.hpp>
+
+namespace raft::neighbors ::ivf_pq {
+
+#define RAFT_INST_SEARCH(T, IdxT)   \
+  void search(const handle_t&,      \
+              const search_params&, \
+              const index<IdxT>&,   \
+              const T*,             \
+              uint32_t,             \
+              uint32_t,             \
+              IdxT*,                \
+              float*,               \
+              rmm::mr::device_memory_resource*);
+
+RAFT_INST_SEARCH(float, uint64_t);
+RAFT_INST_SEARCH(int8_t, uint64_t);
+RAFT_INST_SEARCH(uint8_t, uint64_t);
+
+#undef RAFT_INST_SEARCH
+
+// We define overloads for build and extend with void return type. This is used in the Cython
+// wrappers, where exception handling is not compatible with return type that has nontrivial
+// constructor.
+#define RAFT_INST_BUILD_EXTEND(T, IdxT)      \
+  auto build(const handle_t& handle,         \
+             const index_params& params,     \
+             const T* dataset,               \
+             IdxT n_rows,                    \
+             uint32_t dim)                   \
+    ->index<IdxT>;                           \
+                                             \
+  auto extend(const handle_t& handle,        \
+              const index<IdxT>& orig_index, \
+              const T* new_vectors,          \
+              const IdxT* new_indices,       \
+              IdxT n_rows)                   \
+    ->index<IdxT>;                           \
+                                             \
+  void build(const handle_t& handle,         \
+             const index_params& params,     \
+             const T* dataset,               \
+             IdxT n_rows,                    \
+             uint32_t dim,                   \
+             index<IdxT>* idx);              \
+                                             \
+  void extend(const handle_t& handle,        \
+              index<IdxT>* idx,              \
+              const T* new_vectors,          \
+              const IdxT* new_indices,       \
+              IdxT n_rows);
+
+RAFT_INST_BUILD_EXTEND(float, uint64_t)
+RAFT_INST_BUILD_EXTEND(int8_t, uint64_t)
+RAFT_INST_BUILD_EXTEND(uint8_t, uint64_t)
+
+#undef RAFT_INST_BUILD_EXTEND
+
+}  // namespace raft::neighbors::ivf_pq
diff --git a/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh b/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh
index b5ae9e7d5e..5c03f8f67c 100644
--- a/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh
+++ b/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh
@@ -27,7 +27,6 @@
 
 #include <raft/core/handle.hpp>
 #include <raft/distance/distance_types.hpp>
-#include <raft/spatial/knn/faiss_mr.hpp>
 
 namespace raft {
 namespace spatial {
diff --git a/cpp/src/nn/specializations/detail/ivfpq_build.cu b/cpp/src/nn/specializations/detail/ivfpq_build.cu
new file mode 100644
index 0000000000..9ff22a3729
--- /dev/null
+++ b/cpp/src/nn/specializations/detail/ivfpq_build.cu
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/neighbors/ivf_pq.cuh>
+#include <raft/neighbors/specializations/ivf_pq_specialization.hpp>
+
+namespace raft::neighbors::ivf_pq {
+
+#define RAFT_INST_BUILD_EXTEND(T, IdxT)                                           \
+  auto build(const handle_t& handle,                                              \
+             const index_params& params,                                          \
+             const T* dataset,                                                    \
+             IdxT n_rows,                                                         \
+             uint32_t dim)                                                        \
+    ->index<IdxT>                                                                 \
+  {                                                                               \
+    return build<T, IdxT>(handle, params, dataset, n_rows, dim);                  \
+  }                                                                               \
+  auto extend(const handle_t& handle,                                             \
+              const index<IdxT>& orig_index,                                      \
+              const T* new_vectors,                                               \
+              const IdxT* new_indices,                                            \
+              IdxT n_rows)                                                        \
+    ->index<IdxT>                                                                 \
+  {                                                                               \
+    return extend<T, IdxT>(handle, orig_index, new_vectors, new_indices, n_rows); \
+  }                                                                               \
+                                                                                  \
+  void build(const handle_t& handle,                                              \
+             const index_params& params,                                          \
+             const T* dataset,                                                    \
+             IdxT n_rows,                                                         \
+             uint32_t dim,                                                        \
+             index<IdxT>* idx)                                                    \
+  {                                                                               \
+    *idx = build<T, IdxT>(handle, params, dataset, n_rows, dim);                  \
+  }                                                                               \
+  void extend(const handle_t& handle,                                             \
+              index<IdxT>* idx,                                                   \
+              const T* new_vectors,                                               \
+              const IdxT* new_indices,                                            \
+              IdxT n_rows)                                                        \
+  {                                                                               \
+    extend<T, IdxT>(handle, idx, new_vectors, new_indices, n_rows);               \
+  }
+
+RAFT_INST_BUILD_EXTEND(float, uint64_t);
+RAFT_INST_BUILD_EXTEND(int8_t, uint64_t);
+RAFT_INST_BUILD_EXTEND(uint8_t, uint64_t);
+
+#undef RAFT_INST_BUILD_EXTEND
+
+}  // namespace raft::neighbors::ivf_pq
diff --git a/cpp/src/nn/specializations/detail/ivfpq_search.cu b/cpp/src/nn/specializations/detail/ivfpq_search.cu
new file mode 100644
index 0000000000..80bf589803
--- /dev/null
+++ b/cpp/src/nn/specializations/detail/ivfpq_search.cu
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/neighbors/ivf_pq.cuh>
+#include <raft/neighbors/specializations/detail/ivf_pq_search.cuh>
+#include <raft/neighbors/specializations/ivf_pq_specialization.hpp>
+
+namespace raft::neighbors::ivf_pq {
+
+#define RAFT_SEARCH_INST(T, IdxT)                                                          \
+  void search(const handle_t& handle,                                                      \
+              const search_params& params,                                                 \
+              const index<IdxT>& idx,                                                      \
+              const T* queries,                                                            \
+              uint32_t n_queries,                                                          \
+              uint32_t k,                                                                  \
+              IdxT* neighbors,                                                             \
+              float* distances,                                                            \
+              rmm::mr::device_memory_resource* mr)                                         \
+  {                                                                                        \
+    search<T, IdxT>(handle, params, idx, queries, n_queries, k, neighbors, distances, mr); \
+  }
+
+RAFT_SEARCH_INST(float, uint64_t);
+RAFT_SEARCH_INST(int8_t, uint64_t);
+RAFT_SEARCH_INST(uint8_t, uint64_t);
+
+#undef RAFT_INST_SEARCH
+
+}  // namespace raft::neighbors::ivf_pq
diff --git a/python/pylibraft/CMakeLists.txt b/python/pylibraft/CMakeLists.txt
index 0c69634d16..0a7d700c91 100644
--- a/python/pylibraft/CMakeLists.txt
+++ b/python/pylibraft/CMakeLists.txt
@@ -75,6 +75,7 @@ rapids_cython_init()
 
 add_subdirectory(pylibraft/common)
 add_subdirectory(pylibraft/distance)
+add_subdirectory(pylibraft/neighbors)
 add_subdirectory(pylibraft/random)
 add_subdirectory(pylibraft/cluster)
 
diff --git a/python/pylibraft/pylibraft/common/input_validation.py b/python/pylibraft/pylibraft/common/input_validation.py
index d5556a79dc..61435a859c 100644
--- a/python/pylibraft/pylibraft/common/input_validation.py
+++ b/python/pylibraft/pylibraft/common/input_validation.py
@@ -18,6 +18,8 @@
 # cython: embedsignature = True
 # cython: language_level = 3
 
+import numpy as np
+
 
 def do_dtypes_match(*cais):
     last_dtype = cais[0].__cuda_array_interface__["typestr"]
@@ -57,3 +59,20 @@ def do_shapes_match(*cais):
             return False
         last_shape = shape
     return True
+
+
+def is_c_contiguous(cai):
+    """
+    Checks whether an array is C contiguous.
+
+    Parameters
+    ----------
+    cai : CUDA array interface
+
+    """
+    dt = np.dtype(cai["typestr"])
+    return (
+        "strides" not in cai
+        or cai["strides"] is None
+        or cai["strides"][1] == dt.itemsize
+    )
diff --git a/python/pylibraft/pylibraft/neighbors/CMakeLists.txt b/python/pylibraft/pylibraft/neighbors/CMakeLists.txt
new file mode 100644
index 0000000000..995f698f2c
--- /dev/null
+++ b/python/pylibraft/pylibraft/neighbors/CMakeLists.txt
@@ -0,0 +1,29 @@
+# =============================================================================
+# Copyright (c) 2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# Set the list of Cython files to build
+set(linked_libraries raft::raft raft::distance)
+
+# Build all of the Cython targets
+rapids_cython_create_modules(
+  CXX
+  SOURCE_FILES ""
+  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_
+)
+
+foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
+  set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../library")
+endforeach()
+
+add_subdirectory(ivf_pq)
diff --git a/python/pylibraft/pylibraft/neighbors/__init__.pxd b/python/pylibraft/pylibraft/neighbors/__init__.pxd
new file mode 100644
index 0000000000..273b4497cc
--- /dev/null
+++ b/python/pylibraft/pylibraft/neighbors/__init__.pxd
@@ -0,0 +1,14 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/python/pylibraft/pylibraft/neighbors/__init__.py b/python/pylibraft/pylibraft/neighbors/__init__.py
new file mode 100644
index 0000000000..273b4497cc
--- /dev/null
+++ b/python/pylibraft/pylibraft/neighbors/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/CMakeLists.txt b/python/pylibraft/pylibraft/neighbors/ivf_pq/CMakeLists.txt
new file mode 100644
index 0000000000..4d51915008
--- /dev/null
+++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/CMakeLists.txt
@@ -0,0 +1,28 @@
+# =============================================================================
+# Copyright (c) 2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# Set the list of Cython files to build
+set(cython_sources ivf_pq.pyx)
+set(linked_libraries raft::raft raft::distance)
+
+# Build all of the Cython targets
+rapids_cython_create_modules(
+  CXX
+  SOURCE_FILES "${cython_sources}"
+  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_ivfpq_
+)
+
+foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
+  set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../library")
+endforeach()
diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/__init__.pxd b/python/pylibraft/pylibraft/neighbors/ivf_pq/__init__.pxd
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/__init__.py b/python/pylibraft/pylibraft/neighbors/ivf_pq/__init__.py
new file mode 100644
index 0000000000..8a231b2c8c
--- /dev/null
+++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from .ivf_pq import Index, IndexParams, SearchParams, build, extend, search
diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/c_ivf_pq.pxd b/python/pylibraft/pylibraft/neighbors/ivf_pq/c_ivf_pq.pxd
new file mode 100644
index 0000000000..9728495bf8
--- /dev/null
+++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/c_ivf_pq.pxd
@@ -0,0 +1,177 @@
+#
+# Copyright (c) 2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+# cython: language_level = 3
+
+import numpy as np
+
+import pylibraft.common.handle
+
+from cython.operator cimport dereference as deref
+from libc.stdint cimport (
+    int8_t,
+    int64_t,
+    uint8_t,
+    uint32_t,
+    uint64_t,
+    uintptr_t,
+)
+from libcpp cimport bool, nullptr
+
+from rmm._lib.memory_resource cimport device_memory_resource
+
+from pylibraft.common.handle cimport handle_t
+from pylibraft.distance.distance_type cimport DistanceType
+
+
+cdef extern from "library_types.h":
+    ctypedef enum cudaDataType_t:
+        CUDA_R_32F "CUDA_R_32F"  # float
+        CUDA_R_16F "CUDA_R_16F"  # half
+
+        # uint8 - used to refer to IVF-PQ's fp8 storage type
+        CUDA_R_8U "CUDA_R_8U"
+
+cdef extern from "raft/neighbors/ann_types.hpp" \
+        namespace "raft::neighbors::ann" nogil:
+
+    cdef cppclass ann_index "raft::neighbors::index":
+        pass
+
+    cdef cppclass ann_index_params "raft::spatial::knn::index_params":
+        DistanceType metric
+        float metric_arg
+        bool add_data_on_build
+
+    cdef cppclass ann_search_params "raft::spatial::knn::search_params":
+        pass
+
+
+cdef extern from "raft/neighbors/ivf_pq_types.hpp" \
+        namespace "raft::neighbors::ivf_pq" nogil:
+
+    ctypedef enum codebook_gen:
+        PER_SUBSPACE "raft::neighbors::ivf_pq::codebook_gen::PER_SUBSPACE",
+        PER_CLUSTER "raft::neighbors::ivf_pq::codebook_gen::PER_CLUSTER"
+
+    cpdef cppclass index_params(ann_index_params):
+        uint32_t n_lists
+        uint32_t kmeans_n_iters
+        double kmeans_trainset_fraction
+        uint32_t pq_bits
+        uint32_t pq_dim
+        codebook_gen codebook_kind
+        bool force_random_rotation
+
+    cdef cppclass index[IdxT](ann_index):
+        index(const handle_t& handle,
+              DistanceType metric,
+              codebook_gen codebook_kind,
+              uint32_t n_lists,
+              uint32_t dim,
+              uint32_t pq_bits,
+              uint32_t pq_dim,
+              uint32_t n_nonempty_lists)
+
+        IdxT size()
+        uint32_t dim()
+        uint32_t pq_dim()
+        uint32_t pq_len()
+        uint32_t pq_bits()
+        DistanceType metric()
+        uint32_t n_lists()
+        uint32_t rot_dim()
+        codebook_gen codebook_kind()
+
+    cpdef cppclass search_params(ann_search_params):
+        uint32_t n_probes
+        cudaDataType_t lut_dtype
+        cudaDataType_t internal_distance_dtype
+
+
+cdef extern from "raft/neighbors/specializations/ivf_pq_specialization.hpp" \
+        namespace "raft::neighbors::ivf_pq" nogil:
+
+    cdef void build(const handle_t& handle,
+                    const index_params& params,
+                    const float* dataset,
+                    uint64_t n_rows,
+                    uint32_t dim,
+                    index[uint64_t]* index) except +
+
+    cdef void build(const handle_t& handle,
+                    const index_params& params,
+                    const int8_t* dataset,
+                    uint64_t n_rows,
+                    uint32_t dim,
+                    index[uint64_t]* index) except +
+
+    cdef void build(const handle_t& handle,
+                    const index_params& params,
+                    const uint8_t* dataset,
+                    uint64_t n_rows,
+                    uint32_t dim,
+                    index[uint64_t]* index) except +
+
+    cdef void extend(const handle_t& handle,
+                     index[uint64_t]* index,
+                     const float* new_vectors,
+                     const uint64_t* new_indices,
+                     uint64_t n_rows) except +
+
+    cdef void extend(const handle_t& handle,
+                     index[uint64_t]* index,
+                     const int8_t* new_vectors,
+                     const uint64_t* new_indices,
+                     uint64_t n_rows) except +
+
+    cdef void extend(const handle_t& handle,
+                     index[uint64_t]* index,
+                     const uint8_t* new_vectors,
+                     const uint64_t* new_indices,
+                     uint64_t n_rows) except +
+
+    cdef void search(const handle_t& handle,
+                     const search_params& params,
+                     const index[uint64_t]& index,
+                     const float* queries,
+                     uint32_t n_queries,
+                     uint32_t k,
+                     uint64_t* neighbors,
+                     float* distances,
+                     device_memory_resource* mr) except +
+
+    cdef void search(const handle_t& handle,
+                     const search_params& params,
+                     const index[uint64_t]& index,
+                     const int8_t* queries,
+                     uint32_t n_queries,
+                     uint32_t k,
+                     uint64_t* neighbors,
+                     float* distances,
+                     device_memory_resource* mr) except +
+
+    cdef void search(const handle_t& handle,
+                     const search_params& params,
+                     const index[uint64_t]& index,
+                     const uint8_t* queries,
+                     uint32_t n_queries,
+                     uint32_t k,
+                     uint64_t* neighbors,
+                     float* distances,
+                     device_memory_resource* mr) except +
diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx
new file mode 100644
index 0000000000..f178eecb1f
--- /dev/null
+++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx
@@ -0,0 +1,727 @@
+#
+# Copyright (c) 2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+# cython: language_level = 3
+
+import numpy as np
+
+from cython.operator cimport dereference as deref
+from libc.stdint cimport (
+    int8_t,
+    int64_t,
+    uint8_t,
+    uint32_t,
+    uint64_t,
+    uintptr_t,
+)
+from libcpp cimport bool, nullptr
+
+from pylibraft.distance.distance_type cimport DistanceType
+
+from pylibraft.common import Handle, device_ndarray
+from pylibraft.common.interruptible import cuda_interruptible
+
+from pylibraft.common.handle cimport handle_t
+
+from pylibraft.common.handle import auto_sync_handle
+from pylibraft.common.input_validation import is_c_contiguous
+
+from rmm._lib.memory_resource cimport (
+    DeviceMemoryResource,
+    device_memory_resource,
+)
+
+cimport pylibraft.neighbors.ivf_pq.c_ivf_pq as c_ivf_pq
+from pylibraft.neighbors.ivf_pq.c_ivf_pq cimport index_params, search_params
+
+
+def _get_metric(metric):
+    SUPPORTED_DISTANCES = {
+        "l2_expanded": DistanceType.L2Expanded,
+        # TODO(tfeher): fix inconsistency: index building for L2SqrtExpanded is
+        # only supported by build, not by search.
+        # "euclidean": DistanceType.L2SqrtExpanded
+        "inner_product": DistanceType.InnerProduct
+    }
+    if metric not in SUPPORTED_DISTANCES:
+        raise ValueError("metric %s is not supported" % metric)
+    return SUPPORTED_DISTANCES[metric]
+
+
+cdef _get_metric_string(DistanceType metric):
+    return {DistanceType.L2Expanded : "l2_expanded",
+            DistanceType.InnerProduct: "inner_product"}[metric]
+
+
+cdef _get_codebook_string(c_ivf_pq.codebook_gen codebook):
+    return {c_ivf_pq.codebook_gen.PER_SUBSPACE: "subspace",
+            c_ivf_pq.codebook_gen.PER_CLUSTER: "cluster"}[codebook]
+
+
+cdef _map_dtype_np_to_cuda(dtype, supported_dtypes=None):
+    if supported_dtypes is not None and dtype not in supported_dtypes:
+        raise TypeError("Type %s is not supported" % str(dtype))
+    return {np.float32: c_ivf_pq.cudaDataType_t.CUDA_R_32F,
+            np.float16: c_ivf_pq.cudaDataType_t.CUDA_R_16F,
+            np.uint8: c_ivf_pq.cudaDataType_t.CUDA_R_8U}[dtype]
+
+
+cdef _get_dtype_string(dtype):
+    return str({c_ivf_pq.cudaDataType_t.CUDA_R_32F: np.float32,
+                c_ivf_pq.cudaDataType_t.CUDA_R_16F: np.float16,
+                c_ivf_pq.cudaDataType_t.CUDA_R_8U: np.uint8}[dtype])
+
+
+def _check_input_array(cai, exp_dt, exp_rows=None, exp_cols=None):
+    if cai["typestr"] not in exp_dt:
+        raise TypeError("dtype %s not supported" % cai["typestr"])
+
+    if not is_c_contiguous(cai):
+        raise ValueError("Row major input is expected")
+
+    if exp_cols is not None and cai["shape"][1] != exp_cols:
+        raise ValueError("Incorrect number of columns, expected {} got {}"
+                         .format(exp_cols, cai["shape"][1]))
+
+    if exp_rows is not None and cai["shape"][0] != exp_rows:
+        raise ValueError("Incorrect number of rows, expected {} , got {}"
+                         .format(exp_rows, cai["shape"][0]))
+
+
+cdef class IndexParams:
+    cdef c_ivf_pq.index_params params
+
+    def __init__(self, *,
+                 n_lists=1024,
+                 metric="l2_expanded",
+                 kmeans_n_iters=20,
+                 kmeans_trainset_fraction=0.5,
+                 pq_bits=8,
+                 pq_dim=0,
+                 codebook_kind="subspace",
+                 force_random_rotation=False,
+                 add_data_on_build=True):
+        """"
+        Parameters to build index for IVF-PQ nearest neighbor search
+
+        Parameters
+        ----------
+        n_list : int, default = 1024
+            The number of clusters used in the coarse quantizer.
+        metric : string denoting the metric type, default="l2_expanded"
+            Valid values for metric: ["l2_expanded", "inner_product"], where
+            - l2_expanded is the equclidean distance without the square root
+              operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2,
+            - inner product distance is defined as
+              distance(a, b) = \\sum_i a_i * b_i.
+        kmeans_n_iters : int, default = 20
+            The number of iterations searching for kmeans centers during index
+            building.
+        kmeans_trainset_fraction : int, default = 0.5
+            If kmeans_trainset_fraction is less than 1, then the dataset is
+            subsampled, and only n_samples * kmeans_trainset_fraction rows
+            are used for training.
+        pq_bits : int, default = 8
+            The bit length of the vector element after quantization.
+        pq_dim : int, default = 0
+            The dimensionality of a the vector after product quantization.
+            When zero, an optimal value is selected using a heuristic. Note
+            pq_dim * pq_bits must be a multiple of 8. Hint: a smaller 'pq_dim'
+            results in a smaller index size and better search performance, but
+            lower recall. If 'pq_bits' is 8, 'pq_dim' can be set to any number,
+            but multiple of 8 are desirable for good performance. If 'pq_bits'
+            is not 8, 'pq_dim' should be a multiple of 8. For good performance,
+            it is desirable that 'pq_dim' is a multiple of 32. Ideally,
+            'pq_dim' should be also a divisor of the dataset dim.
+        codebook_kind : string, default = "subspace"
+            Valid values ["subspace", "cluster"]
+        force_random_rotation : bool, default = False
+            Apply a random rotation matrix on the input data and queries even
+            if `dim % pq_dim == 0`. Note: if `dim` is not multiple of `pq_dim`,
+            a random rotation is always applied to the input data and queries
+            to transform the working space from `dim` to `rot_dim`, which may
+            be slightly larger than the original space and and is a multiple
+            of `pq_dim` (`rot_dim % pq_dim == 0`). However, this transform is
+            not necessary when `dim` is multiple of `pq_dim` (`dim == rot_dim`,
+            hence no need in adding "extra" data columns / features). By
+            default, if `dim == rot_dim`, the rotation transform is
+            initialized with the identity matrix. When
+            `force_random_rotation == True`, a random orthogonal transform
+            matrix is generated regardless of the values of `dim` and `pq_dim`.
+        add_data_on_build : bool, default = True
+            After training the coarse and fine quantizers, we will populate
+            the index with the dataset if add_data_on_build == True, otherwise
+            the index is left empty, and the extend method can be used
+            to add new vectors to the index.
+
+        """
+        self.params.n_lists = n_lists
+        self.params.metric = _get_metric(metric)
+        self.params.metric_arg = 0
+        self.params.kmeans_n_iters = kmeans_n_iters
+        self.params.kmeans_trainset_fraction = kmeans_trainset_fraction
+        self.params.pq_bits = pq_bits
+        self.params.pq_dim = pq_dim
+        if codebook_kind == "subspace":
+            self.params.codebook_kind = c_ivf_pq.codebook_gen.PER_SUBSPACE
+        elif codebook_kind == "cluster":
+            self.params.codebook_kind = c_ivf_pq.codebook_gen.PER_CLUSTER
+        else:
+            raise ValueError("Incorrect codebook kind %s" % codebook_kind)
+        self.params.force_random_rotation = force_random_rotation
+        self.params.add_data_on_build = add_data_on_build
+
+    @property
+    def n_lists(self):
+        return self.params.n_lists
+
+    @property
+    def metric(self):
+        return self.params.metric
+
+    @property
+    def kmeans_n_iters(self):
+        return self.params.kmeans_n_iters
+
+    @property
+    def kmeans_trainset_fraction(self):
+        return self.params.kmeans_trainset_fraction
+
+    @property
+    def pq_bits(self):
+        return self.params.pq_bits
+
+    @property
+    def pq_dim(self):
+        return self.params.pq_dim
+
+    @property
+    def codebook_kind(self):
+        return self.params.codebook_kind
+
+    @property
+    def force_random_rotation(self):
+        return self.params.force_random_rotation
+
+    @property
+    def add_data_on_build(self):
+        return self.params.add_data_on_build
+
+
+cdef class Index:
+    # We store a pointer to the index because it dose not have a trivial
+    # constructor.
+    cdef c_ivf_pq.index[uint64_t] * index
+    cdef readonly bool trained
+
+    def __cinit__(self, handle=None):
+        self.trained = False
+        self.index = NULL
+        if handle is None:
+            handle = Handle()
+        cdef handle_t* handle_ = <handle_t*><size_t>handle.getHandle()
+
+        # We create a placeholder object. The actual parameter values do
+        # not matter, it will be replaced with a built index object later.
+        self.index = new c_ivf_pq.index[uint64_t](
+            deref(handle_), _get_metric("l2_expanded"),
+            c_ivf_pq.codebook_gen.PER_SUBSPACE,
+            <uint32_t>1,
+            <uint32_t>4,
+            <uint32_t>8,
+            <uint32_t>0,
+            <uint32_t>0)
+
+    def __dealloc__(self):
+        if self.index is not NULL:
+            del self.index
+
+    def __repr__(self):
+        m_str = "metric=" + _get_metric_string(self.index.metric())
+        code_str = "codebook=" + _get_codebook_string(
+            self.index.codebook_kind())
+        attr_str = [attr + "=" + str(getattr(self, attr))
+                    for attr in ["size", "dim", "pq_dim", "pq_bits",
+                                 "n_lists", "rot_dim"]]
+        attr_str = [m_str, code_str] + attr_str
+        return "Index(type=IVF-PQ, " + (", ".join(attr_str)) + ")"
+
+    @property
+    def dim(self):
+        return self.index[0].dim()
+
+    @property
+    def size(self):
+        return self.index[0].size()
+
+    @property
+    def pq_dim(self):
+        return self.index[0].pq_dim()
+
+    @property
+    def pq_len(self):
+        return self.index[0].pq_len()
+
+    @property
+    def pq_bits(self):
+        return self.index[0].pq_bits()
+
+    @property
+    def metric(self):
+        return self.index[0].metric()
+
+    @property
+    def n_lists(self):
+        return self.index[0].n_lists()
+
+    @property
+    def rot_dim(self):
+        return self.index[0].rot_dim()
+
+    @property
+    def codebook_kind(self):
+        return self.index[0].codebook_kind()
+
+
+@auto_sync_handle
+def build(IndexParams index_params, dataset, handle=None):
+    """
+    Builds an IVF-PQ index that can be later used for nearest neighbor search.
+
+    Parameters
+    ----------
+    index_params : IndexParams object
+    dataset : CUDA array interface compliant matrix shape (n_samples, dim)
+        Supported dtype [float, int8, uint8]
+    {handle_docstring}
+
+    Returns
+    -------
+    index: ivf_pq.Index
+
+    Examples
+    --------
+
+    .. code-block:: python
+
+        import cupy as cp
+
+        from pylibraft.common import Handle
+        from pylibraft.neighbors import ivf_pq
+
+        n_samples = 50000
+        n_features = 50
+        n_queries = 1000
+
+        dataset = cp.random.random_sample((n_samples, n_features),
+            dtype=cp.float32)
+        handle = Handle()
+        index_params = ivf_pq.IndexParams(
+            n_lists=1024,
+            metric="l2_expanded",
+            pq_dim=10)
+        index = ivf_pq.build(index_params, dataset, handle=handle)
+
+        # Search using the built index
+        queries = cp.random.random_sample((n_queries, n_features),
+                                          dtype=cp.float32)
+        k = 10
+        distances, neighbors = ivf_pq.search(ivf_pq.SearchParams(), index,
+                                             queries, k, handle=handle)
+
+        distances = cp.asarray(distances)
+        neighbors = cp.asarray(neighbors)
+
+        # pylibraft functions are often asynchronous so the
+        # handle needs to be explicitly synchronized
+        handle.sync()
+
+    """
+    dataset_cai = dataset.__cuda_array_interface__
+    dataset_dt = np.dtype(dataset_cai["typestr"])
+    _check_input_array(dataset_cai, [np.dtype('float32'), np.dtype('byte'),
+                                     np.dtype('ubyte')])
+    cdef uintptr_t dataset_ptr = dataset_cai["data"][0]
+
+    cdef uint64_t n_rows = dataset_cai["shape"][0]
+    cdef uint32_t dim = dataset_cai["shape"][1]
+
+    if handle is None:
+        handle = Handle()
+    cdef handle_t* handle_ = <handle_t*><size_t>handle.getHandle()
+
+    idx = Index()
+
+    if dataset_dt == np.float32:
+        with cuda_interruptible():
+            c_ivf_pq.build(deref(handle_),
+                           index_params.params,
+                           <float*> dataset_ptr,
+                           n_rows,
+                           dim,
+                           idx.index)
+        idx.trained = True
+    elif dataset_dt == np.byte:
+        with cuda_interruptible():
+            c_ivf_pq.build(deref(handle_),
+                           index_params.params,
+                           <int8_t*> dataset_ptr,
+                           n_rows,
+                           dim,
+                           idx.index)
+        idx.trained = True
+    elif dataset_dt == np.ubyte:
+        with cuda_interruptible():
+            c_ivf_pq.build(deref(handle_),
+                           index_params.params,
+                           <uint8_t*> dataset_ptr,
+                           n_rows,
+                           dim,
+                           idx.index)
+        idx.trained = True
+    else:
+        raise TypeError("dtype %s not supported" % dataset_dt)
+
+    return idx
+
+
+@auto_sync_handle
+def extend(Index index, new_vectors, new_indices, handle=None):
+    """
+    Extend an existing index with new vectors.
+
+
+    Parameters
+    ----------
+    index : ivf_pq.Index
+        Trained ivf_pq object.
+    new_vectors : CUDA array interface compliant matrix shape (n_samples, dim)
+        Supported dtype [float, int8, uint8]
+    new_indices : CUDA array interface compliant matrix shape (n_samples, dim)
+        Supported dtype [uint64]
+    {handle_docstring}
+
+    Returns
+    -------
+    index: ivf_pq.Index
+
+    Examples
+    --------
+
+    .. code-block:: python
+
+        import cupy as cp
+
+        from pylibraft.common import Handle
+        from pylibraft.neighbors import ivf_pq
+
+        n_samples = 50000
+        n_features = 50
+        n_queries = 1000
+
+        dataset = cp.random.random_sample((n_samples, n_features),
+                                          dtype=cp.float32)
+        handle = Handle()
+        index = ivf_pq.build(ivf_pq.IndexParams(), dataset, handle=handle)
+
+        n_rows = 100
+        more_data = cp.random.random_sample((n_rows, n_features),
+                                            dtype=cp.float32)
+        indices = index.size + cp.arange(n_rows, dtype=cp.uint64)
+        index = ivf_pq.extend(index, more_data, indices)
+
+        # Search using the built index
+        queries = cp.random.random_sample((n_queries, n_features),
+                                          dtype=cp.float32)
+        k = 10
+        distances, neighbors = ivf_pq.search(ivf_pq.SearchParams(),
+                                             index, queries,
+                                             k, handle=handle)
+
+        # pylibraft functions are often asynchronous so the
+        # handle needs to be explicitly synchronized
+        handle.sync()
+
+        distances = cp.asarray(distances)
+        neighbors = cp.asarray(neighbors)
+    """
+    if not index.trained:
+        raise ValueError("Index need to be built before calling extend.")
+
+    if handle is None:
+        handle = Handle()
+    cdef handle_t* handle_ = <handle_t*><size_t>handle.getHandle()
+
+    vecs_cai = new_vectors.__cuda_array_interface__
+    vecs_dt = np.dtype(vecs_cai["typestr"])
+    cdef uint64_t n_rows = vecs_cai["shape"][0]
+    cdef uint32_t dim = vecs_cai["shape"][1]
+
+    _check_input_array(vecs_cai, [np.dtype('float32'), np.dtype('byte'),
+                                  np.dtype('ubyte')],
+                       exp_cols=index.dim)
+
+    idx_cai = new_indices.__cuda_array_interface__
+    _check_input_array(idx_cai, [np.dtype('uint64')], exp_rows=n_rows)
+    if len(idx_cai["shape"])!=1:
+        raise ValueError("Indices array is expected to be 1D")
+
+    cdef uintptr_t vecs_ptr = vecs_cai["data"][0]
+    cdef uintptr_t idx_ptr = idx_cai["data"][0]
+
+    if vecs_dt == np.float32:
+        with cuda_interruptible():
+            c_ivf_pq.extend(deref(handle_),
+                            index.index,
+                            <float*>vecs_ptr,
+                            <uint64_t*> idx_ptr,
+                            <uint64_t> n_rows)
+    elif vecs_dt == np.int8:
+        with cuda_interruptible():
+            c_ivf_pq.extend(deref(handle_),
+                            index.index,
+                            <int8_t*>vecs_ptr,
+                            <uint64_t*> idx_ptr,
+                            <uint64_t> n_rows)
+    elif vecs_dt == np.uint8:
+        with cuda_interruptible():
+            c_ivf_pq.extend(deref(handle_),
+                            index.index,
+                            <uint8_t*>vecs_ptr,
+                            <uint64_t*> idx_ptr,
+                            <uint64_t> n_rows)
+    else:
+        raise TypeError("query dtype %s not supported" % vecs_dt)
+
+    return index
+
+
+cdef class SearchParams:
+    cdef c_ivf_pq.search_params params
+
+    def __init__(self, *, n_probes=20,
+                 lut_dtype=np.float32,
+                 internal_distance_dtype=np.float32):
+        """
+        IVF-PQ search parameters
+
+        Parameters
+        ----------
+        n_probes: int, default = 1024
+            The number of course clusters to select for the fine search.
+        lut_dtype: default = np.float32
+            Data type of look up table to be created dynamically at search
+            time. The use of low-precision types reduces the amount of shared
+            memory required at search time, so fast shared memory kernels can
+            be used even for datasets with large dimansionality. Note that
+            the recall is slightly degraded when low-precision type is
+            selected. Possible values [np.float32, np.float16, np.uint8]
+        internal_distance_dtype: default = np.float32
+            Storage data type for distance/similarity computation.
+            Possible values [np.float32, np.float16]
+        """
+
+        self.params.n_probes = n_probes
+        self.params.lut_dtype = _map_dtype_np_to_cuda(lut_dtype)
+        self.params.internal_distance_dtype = \
+            _map_dtype_np_to_cuda(internal_distance_dtype)
+        # TODO(tfeher): enable if #926 adds this
+        # self.params.shmem_carveout = self.shmem_carveout
+
+    def __repr__(self):
+        lut_str = "lut_dtype=" + _get_dtype_string(self.params.lut_dtype)
+        idt_str = "internal_distance_dtype=" + \
+            _get_dtype_string(self.params.internal_distance_dtype)
+        attr_str = [attr + "=" + str(getattr(self, attr))
+                    for attr in ["n_probes"]]
+        # TODO (tfeher) add "shmem_carveout"
+        attr_str = attr_str + [lut_str, idt_str]
+        return "SearchParams(type=IVF-PQ, " + (", ".join(attr_str)) + ")"
+
+    @property
+    def n_probes(self):
+        return self.params.n_probes
+
+    @property
+    def lut_dtype(self):
+        return self.params.lut_dtype
+
+    @property
+    def internal_distance_dtype(self):
+        return self.params.internal_distance_dtype
+
+
+@auto_sync_handle
+def search(SearchParams search_params,
+           Index index,
+           queries,
+           k,
+           neighbors=None,
+           distances=None,
+           DeviceMemoryResource memory_resource=None,
+           handle=None):
+    """
+    Find the k nearest neighbors for each query.
+
+    Parameters
+    ----------
+    search_params : SearchParams
+    index : Index
+        Trained IVF-PQ index.
+    queries : CUDA array interface compliant matrix shape (n_samples, dim)
+        Supported dtype [float, int8, uint8]
+    k : int
+        The number of neighbors.
+    neighbors : Optional CUDA array interface compliant matrix shape
+                (n_queries, k), dtype uint64_t. If supplied, neighbor
+                indices will be written here in-place. (default None)
+    distances : Optional CUDA array interface compliant matrix shape
+                (n_queries, k) If supplied, the distances to the
+                neighbors will be written here in-place. (default None)
+    memory_resource : RMM DeviceMemoryResource object, optional
+        This can be used to explicitly manage the temporary memory
+        allocation during search. Passing a pooling allocator can reduce
+        memory allocation overhead. If not specified, then the memory
+        resource from the raft handle is used.
+    {handle_docstring}
+
+    Examples
+    --------
+    .. code-block:: python
+
+        import cupy as cp
+
+        from pylibraft.common import Handle
+        from pylibraft.neighbors import ivf_pq
+
+        n_samples = 50000
+        n_features = 50
+        n_queries = 1000
+        dataset = cp.random.random_sample((n_samples, n_features),
+                                          dtype=cp.float32)
+
+        # Build index
+        handle = Handle()
+        index = ivf_pq.build(ivf_pq.IndexParams(), dataset, handle=handle)
+
+        # Search using the built index
+        queries = cp.random.random_sample((n_queries, n_features),
+                                          dtype=cp.float32)
+        k = 10
+        search_params = ivf_pq.SearchParams(
+            n_probes=20,
+            lut_dtype=ivf_pq.np.float16,
+            internal_distance_dtype=ivf_pq.np.float32
+        )
+
+        # Using a pooling allocator reduces overhead of temporary array
+        # creation during search. This is useful if multiple searches
+        # are performad with same query size.
+        mr = rmm.mr.PoolMemoryResource(
+            rmm.mr.CudaMemoryResource(),
+            initial_pool_size=2**29,
+            maximum_pool_size=2**31
+        )
+        distances, neighbors = ivf_pq.search(search_params, index, queries,
+                                             k, memory_resource=mr,
+                                             handle=handle)
+
+        # pylibraft functions are often asynchronous so the
+        # handle needs to be explicitly synchronized
+        handle.sync()
+
+        neighbors = cp.asarray(neighbors)
+        distances = cp.asarray(distances)
+    """
+
+    if not index.trained:
+        raise ValueError("Index need to be built before calling search.")
+
+    if handle is None:
+        handle = Handle()
+    cdef handle_t* handle_ = <handle_t*><size_t>handle.getHandle()
+
+    queries_cai = queries.__cuda_array_interface__
+    queries_dt = np.dtype(queries_cai["typestr"])
+    cdef uint32_t n_queries = queries_cai["shape"][0]
+
+    _check_input_array(queries_cai, [np.dtype('float32'), np.dtype('byte'),
+                                     np.dtype('ubyte')],
+                       exp_cols=index.dim)
+
+    if neighbors is None:
+        neighbors = device_ndarray.empty((n_queries, k), dtype='uint64')
+
+    neighbors_cai = neighbors.__cuda_array_interface__
+    _check_input_array(neighbors_cai, [np.dtype('uint64')],
+                       exp_rows=n_queries, exp_cols=k)
+
+    if distances is None:
+        distances = device_ndarray.empty((n_queries, k), dtype='float32')
+
+    distances_cai = distances.__cuda_array_interface__
+    _check_input_array(distances_cai, [np.dtype('float32')],
+                       exp_rows=n_queries, exp_cols=k)
+
+    cdef c_ivf_pq.search_params params = search_params.params
+
+    cdef uintptr_t queries_ptr = queries_cai["data"][0]
+    cdef uintptr_t neighbors_ptr = neighbors_cai["data"][0]
+    cdef uintptr_t distances_ptr = distances_cai["data"][0]
+    # TODO(tfeher) pass mr_ptr arg
+    cdef device_memory_resource* mr_ptr = <device_memory_resource*> nullptr
+    if memory_resource is not None:
+        mr_ptr = memory_resource.get_mr()
+
+    if queries_dt == np.float32:
+        with cuda_interruptible():
+            c_ivf_pq.search(deref(handle_),
+                            params,
+                            deref(index.index),
+                            <float*>queries_ptr,
+                            <uint32_t> n_queries,
+                            <uint32_t> k,
+                            <uint64_t*> neighbors_ptr,
+                            <float*> distances_ptr,
+                            mr_ptr)
+    elif queries_dt == np.byte:
+        with cuda_interruptible():
+            c_ivf_pq.search(deref(handle_),
+                            params,
+                            deref(index.index),
+                            <int8_t*>queries_ptr,
+                            <uint32_t> n_queries,
+                            <uint32_t> k,
+                            <uint64_t*> neighbors_ptr,
+                            <float*> distances_ptr,
+                            mr_ptr)
+    elif queries_dt == np.ubyte:
+        with cuda_interruptible():
+            c_ivf_pq.search(deref(handle_),
+                            params,
+                            deref(index.index),
+                            <uint8_t*>queries_ptr,
+                            <uint32_t> n_queries,
+                            <uint32_t> k,
+                            <uint64_t*> neighbors_ptr,
+                            <float*> distances_ptr,
+                            mr_ptr)
+    else:
+        raise ValueError("query dtype %s not supported" % queries_dt)
+
+    return (distances, neighbors)
diff --git a/python/pylibraft/pylibraft/test/test_ivf_pq.py b/python/pylibraft/pylibraft/test/test_ivf_pq.py
new file mode 100644
index 0000000000..367ff6d44a
--- /dev/null
+++ b/python/pylibraft/pylibraft/test/test_ivf_pq.py
@@ -0,0 +1,482 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     h ttp://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import pytest
+from sklearn.metrics import pairwise_distances
+from sklearn.neighbors import NearestNeighbors
+from sklearn.preprocessing import normalize
+
+from pylibraft.common import device_ndarray
+from pylibraft.neighbors import ivf_pq
+
+
+def generate_data(shape, dtype):
+    if dtype == np.byte:
+        x = np.random.randint(-127, 128, size=shape, dtype=np.byte)
+    elif dtype == np.ubyte:
+        x = np.random.randint(0, 255, size=shape, dtype=np.ubyte)
+    else:
+        x = np.random.random_sample(shape).astype(dtype)
+
+    return x
+
+
+def calc_recall(ann_idx, true_nn_idx):
+    assert ann_idx.shape == true_nn_idx.shape
+    n = 0
+    for i in range(ann_idx.shape[0]):
+        n += np.intersect1d(ann_idx[i, :], true_nn_idx[i, :]).size
+    recall = n / ann_idx.size
+    return recall
+
+
+def check_distances(dataset, queries, metric, out_idx, out_dist):
+    """
+    Calculate the real distance between queries and dataset[out_idx],
+    and compare it to out_dist.
+    """
+    dist = np.empty(out_dist.shape, out_dist.dtype)
+    for i in range(queries.shape[0]):
+        X = queries[np.newaxis, i, :]
+        Y = dataset[out_idx[i, :], :]
+        if metric == "l2_expanded":
+            dist[i, :] = pairwise_distances(X, Y, "euclidean")
+        elif metric == "inner_product":
+            dist[i, :] = np.matmul(X, Y.T)
+        else:
+            raise ValueError("Invali metric")
+
+    # Note: raft l2 metric does not include the square root operation like
+    # sklearn's euclidean.
+    if metric == "l2_expanded":
+        dist = np.power(dist, 2)
+
+    dist_eps = abs(dist)
+    dist_eps[dist < 1e-3] = 1e-3
+    diff = abs(out_dist - dist) / dist_eps
+
+    # Quantization leads to errors in the distance calculation.
+    # The aim of this test is not to test precision, but to catch obvious
+    # errors.
+    assert np.mean(diff) < 0.1
+
+
+def run_ivf_pq_build_search_test(
+    n_rows,
+    n_cols,
+    n_queries,
+    k,
+    n_lists,
+    metric,
+    dtype,
+    pq_bits=8,
+    pq_dim=0,
+    codebook_kind="subspace",
+    add_data_on_build="True",
+    n_probes=100,
+    lut_dtype=np.float32,
+    internal_distance_dtype=np.float32,
+    force_random_rotation=False,
+    kmeans_trainset_fraction=1,
+    kmeans_n_iters=20,
+    compare=True,
+    inplace=True,
+):
+    dataset = generate_data((n_rows, n_cols), dtype)
+    if metric == "inner_product":
+        dataset = normalize(dataset, norm="l2", axis=1)
+    dataset_device = device_ndarray(dataset)
+
+    build_params = ivf_pq.IndexParams(
+        n_lists=n_lists,
+        metric=metric,
+        kmeans_n_iters=kmeans_n_iters,
+        kmeans_trainset_fraction=kmeans_trainset_fraction,
+        pq_bits=pq_bits,
+        pq_dim=pq_dim,
+        codebook_kind=codebook_kind,
+        force_random_rotation=force_random_rotation,
+        add_data_on_build=add_data_on_build,
+    )
+
+    index = ivf_pq.build(build_params, dataset_device)
+
+    assert index.trained
+    if pq_dim != 0:
+        assert index.pq_dim == build_params.pq_dim
+    assert index.pq_bits == build_params.pq_bits
+    assert index.metric == build_params.metric
+    assert index.n_lists == build_params.n_lists
+
+    if not add_data_on_build:
+        dataset_1_device = device_ndarray(dataset[: n_rows // 2, :])
+        dataset_2_device = device_ndarray(dataset[n_rows // 2 :, :])
+        indices_1 = np.arange(n_rows // 2, dtype=np.uint64)
+        indices_1_device = device_ndarray(indices_1)
+        indices_2 = np.arange(n_rows // 2, n_rows, dtype=np.uint64)
+        indices_2_device = device_ndarray(indices_2)
+        index = ivf_pq.extend(index, dataset_1_device, indices_1_device)
+        index = ivf_pq.extend(index, dataset_2_device, indices_2_device)
+
+    assert index.size == n_rows
+
+    queries = generate_data((n_queries, n_cols), dtype)
+    out_idx = np.zeros((n_queries, k), dtype=np.uint64)
+    out_dist = np.zeros((n_queries, k), dtype=np.float32)
+
+    queries_device = device_ndarray(queries)
+    out_idx_device = device_ndarray(out_idx) if inplace else None
+    out_dist_device = device_ndarray(out_dist) if inplace else None
+
+    search_params = ivf_pq.SearchParams(
+        n_probes=n_probes,
+        lut_dtype=lut_dtype,
+        internal_distance_dtype=internal_distance_dtype,
+    )
+
+    ret_output = ivf_pq.search(
+        search_params,
+        index,
+        queries_device,
+        k,
+        neighbors=out_idx_device,
+        distances=out_dist_device,
+    )
+
+    if not inplace:
+        out_dist_device, out_idx_device = ret_output
+
+    if not compare:
+        return
+
+    out_idx = out_idx_device.copy_to_host()
+    out_dist = out_dist_device.copy_to_host()
+
+    # Calculate reference values with sklearn
+    skl_metric = {"l2_expanded": "euclidean", "inner_product": "cosine"}[
+        metric
+    ]
+    nn_skl = NearestNeighbors(
+        n_neighbors=k, algorithm="brute", metric=skl_metric
+    )
+    nn_skl.fit(dataset)
+    skl_idx = nn_skl.kneighbors(queries, return_distance=False)
+
+    recall = calc_recall(out_idx, skl_idx)
+    assert recall > 0.7
+
+    check_distances(dataset, queries, metric, out_idx, out_dist)
+
+
+@pytest.mark.parametrize("inplace", [True, False])
+@pytest.mark.parametrize("n_rows", [10000])
+@pytest.mark.parametrize("n_cols", [10])
+@pytest.mark.parametrize("n_queries", [100])
+@pytest.mark.parametrize("n_lists", [100])
+@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8])
+def test_ivf_pq_dtypes(n_rows, n_cols, n_queries, n_lists, dtype, inplace):
+    # Note that inner_product tests use normalized input which we cannot
+    # represent in int8, therefore we test only l2_expanded metric here.
+    run_ivf_pq_build_search_test(
+        n_rows=n_rows,
+        n_cols=n_cols,
+        n_queries=n_queries,
+        k=10,
+        n_lists=n_lists,
+        metric="l2_expanded",
+        dtype=dtype,
+        inplace=inplace,
+    )
+
+
+@pytest.mark.parametrize(
+    "params",
+    [
+        pytest.param(
+            {
+                "n_rows": 0,
+                "n_cols": 10,
+                "n_queries": 10,
+                "k": 1,
+                "n_lists": 10,
+            },
+            marks=pytest.mark.xfail(reason="empty dataset"),
+        ),
+        {"n_rows": 1, "n_cols": 10, "n_queries": 10, "k": 1, "n_lists": 10},
+        {"n_rows": 10, "n_cols": 1, "n_queries": 10, "k": 10, "n_lists": 10},
+        # {"n_rows": 999, "n_cols": 42, "n_queries": 453, "k": 137,
+        #  "n_lists": 53},
+    ],
+)
+def test_ivf_pq_n(params):
+    # We do not test recall, just confirm that we can handle edge cases for
+    # certain parameters
+    run_ivf_pq_build_search_test(
+        n_rows=params["n_rows"],
+        n_cols=params["n_cols"],
+        n_queries=params["n_queries"],
+        k=params["k"],
+        n_lists=params["n_lists"],
+        metric="l2_expanded",
+        dtype=np.float32,
+        compare=False,
+    )
+
+
+@pytest.mark.parametrize("metric", ["l2_expanded", "inner_product"])
+@pytest.mark.parametrize("dtype", [np.float32])
+@pytest.mark.parametrize("codebook_kind", ["subspace", "cluster"])
+@pytest.mark.parametrize("rotation", [True, False])
+def test_ivf_pq_build_params(metric, dtype, codebook_kind, rotation):
+    run_ivf_pq_build_search_test(
+        n_rows=10000,
+        n_cols=10,
+        n_queries=1000,
+        k=10,
+        n_lists=100,
+        metric=metric,
+        dtype=dtype,
+        pq_bits=8,
+        pq_dim=0,
+        codebook_kind=codebook_kind,
+        add_data_on_build=True,
+        n_probes=100,
+        force_random_rotation=rotation,
+    )
+
+
+@pytest.mark.parametrize(
+    "params",
+    [
+        {"pq_dims": 10, "pq_bits": 8, "n_lists": 100},
+        {"pq_dims": 16, "pq_bits": 7, "n_lists": 100},
+        {"pq_dims": 0, "pq_bits": 8, "n_lists": 90},
+        {
+            "pq_dims": 0,
+            "pq_bits": 8,
+            "n_lists": 100,
+            "trainset_fraction": 0.9,
+            "n_iters": 30,
+        },
+    ],
+)
+def test_ivf_pq_params(params):
+    run_ivf_pq_build_search_test(
+        n_rows=10000,
+        n_cols=16,
+        n_queries=1000,
+        k=10,
+        n_lists=params["n_lists"],
+        metric="l2_expanded",
+        dtype=np.float32,
+        pq_bits=params["pq_bits"],
+        pq_dim=params["pq_dims"],
+        kmeans_trainset_fraction=params.get("trainset_fraction", 1.0),
+        kmeans_n_iters=params.get("n_iters", 20),
+    )
+
+
+@pytest.mark.parametrize(
+    "params",
+    [
+        {
+            "k": 10,
+            "n_probes": 100,
+            "lut": np.float16,
+            "idd": np.float32,
+        },
+        {
+            "k": 10,
+            "n_probes": 99,
+            "lut": np.uint8,
+            "idd": np.float32,
+        },
+        {
+            "k": 10,
+            "n_probes": 100,
+            "lut": np.float32,
+            "idd": np.float16,
+        },
+        {
+            "k": 129,
+            "n_probes": 100,
+            "lut": np.float32,
+            "idd": np.float32,
+        },
+    ],
+)
+def test_ivf_pq_search_params(params):
+    run_ivf_pq_build_search_test(
+        n_rows=10000,
+        n_cols=16,
+        n_queries=1000,
+        k=params["k"],
+        n_lists=100,
+        n_probes=params["n_probes"],
+        metric="l2_expanded",
+        dtype=np.float32,
+        lut_dtype=params["lut"],
+        internal_distance_dtype=params["idd"],
+    )
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8])
+def test_extend(dtype):
+    run_ivf_pq_build_search_test(
+        n_rows=10000,
+        n_cols=10,
+        n_queries=100,
+        k=10,
+        n_lists=100,
+        metric="l2_expanded",
+        dtype=dtype,
+        add_data_on_build=False,
+    )
+
+
+def test_build_assertions():
+    with pytest.raises(TypeError):
+        run_ivf_pq_build_search_test(
+            n_rows=1000,
+            n_cols=10,
+            n_queries=100,
+            k=10,
+            n_lists=100,
+            metric="l2_expanded",
+            dtype=np.float64,
+        )
+
+    n_rows = 1000
+    n_cols = 100
+    n_queries = 212
+    k = 10
+    dataset = generate_data((n_rows, n_cols), np.float32)
+    dataset_device = device_ndarray(dataset)
+
+    index_params = ivf_pq.IndexParams(
+        n_lists=50,
+        metric="l2_expanded",
+        kmeans_n_iters=20,
+        kmeans_trainset_fraction=1,
+        add_data_on_build=False,
+    )
+
+    index = ivf_pq.Index()
+
+    queries = generate_data((n_queries, n_cols), np.float32)
+    out_idx = np.zeros((n_queries, k), dtype=np.uint64)
+    out_dist = np.zeros((n_queries, k), dtype=np.float32)
+
+    queries_device = device_ndarray(queries)
+    out_idx_device = device_ndarray(out_idx)
+    out_dist_device = device_ndarray(out_dist)
+
+    search_params = ivf_pq.SearchParams(n_probes=50)
+
+    with pytest.raises(ValueError):
+        # Index must be built before search
+        ivf_pq.search(
+            search_params,
+            index,
+            queries_device,
+            k,
+            out_idx_device,
+            out_dist_device,
+        )
+
+    index = ivf_pq.build(index_params, dataset_device)
+    assert index.trained
+
+    indices = np.arange(n_rows + 1, dtype=np.uint64)
+    indices_device = device_ndarray(indices)
+
+    with pytest.raises(ValueError):
+        # Dataset dimension mismatch
+        ivf_pq.extend(index, queries_device, indices_device)
+
+    with pytest.raises(ValueError):
+        # indices dimension mismatch
+        ivf_pq.extend(index, dataset_device, indices_device)
+
+
+@pytest.mark.parametrize(
+    "params",
+    [
+        {"q_dt": np.float64},
+        {"q_order": "F"},
+        {"q_cols": 101},
+        {"idx_dt": np.uint32},
+        {"idx_order": "F"},
+        {"idx_rows": 42},
+        {"idx_cols": 137},
+        {"dist_dt": np.float64},
+        {"dist_order": "F"},
+        {"dist_rows": 42},
+        {"dist_cols": 137},
+    ],
+)
+def test_search_inputs(params):
+    """Test with invalid input dtype, order, or dimension."""
+    n_rows = 1000
+    n_cols = 100
+    n_queries = 256
+    k = 10
+    dtype = np.float32
+
+    q_dt = params.get("q_dt", np.float32)
+    q_order = params.get("q_order", "C")
+    queries = generate_data(
+        (n_queries, params.get("q_cols", n_cols)), q_dt
+    ).astype(q_dt, order=q_order)
+    queries_device = device_ndarray(queries)
+
+    idx_dt = params.get("idx_dt", np.uint64)
+    idx_order = params.get("idx_order", "C")
+    out_idx = np.zeros(
+        (params.get("idx_rows", n_queries), params.get("idx_cols", k)),
+        dtype=idx_dt,
+        order=idx_order,
+    )
+    out_idx_device = device_ndarray(out_idx)
+
+    dist_dt = params.get("dist_dt", np.float32)
+    dist_order = params.get("dist_order", "C")
+    out_dist = np.zeros(
+        (params.get("dist_rows", n_queries), params.get("dist_cols", k)),
+        dtype=dist_dt,
+        order=dist_order,
+    )
+    out_dist_device = device_ndarray(out_dist)
+
+    index_params = ivf_pq.IndexParams(
+        n_lists=50, metric="l2_expanded", add_data_on_build=True
+    )
+
+    dataset = generate_data((n_rows, n_cols), dtype)
+    dataset_device = device_ndarray(dataset)
+    index = ivf_pq.build(index_params, dataset_device)
+    assert index.trained
+
+    with pytest.raises(Exception):
+        search_params = ivf_pq.SearchParams(n_probes=50)
+        ivf_pq.search(
+            search_params,
+            index,
+            queries_device,
+            k,
+            out_idx_device,
+            out_dist_device,
+        )