diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6f0afa4eb0..af08a1a2a4 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -332,6 +332,21 @@ if(RAFT_COMPILE_DIST_LIBRARY) src/distance/specializations/fused_l2_nn_double_int64.cu src/distance/specializations/fused_l2_nn_float_int.cu src/distance/specializations/fused_l2_nn_float_int64.cu + src/nn/specializations/detail/ivfpq_build.cu + src/nn/specializations/detail/ivfpq_compute_similarity_float_fast.cu + src/nn/specializations/detail/ivfpq_compute_similarity_float_no_basediff.cu + src/nn/specializations/detail/ivfpq_compute_similarity_float_no_smem_lut.cu + src/nn/specializations/detail/ivfpq_compute_similarity_fp8s_fast.cu + src/nn/specializations/detail/ivfpq_compute_similarity_fp8s_no_basediff.cu + src/nn/specializations/detail/ivfpq_compute_similarity_fp8s_no_smem_lut.cu + src/nn/specializations/detail/ivfpq_compute_similarity_fp8u_fast.cu + src/nn/specializations/detail/ivfpq_compute_similarity_fp8u_no_basediff.cu + src/nn/specializations/detail/ivfpq_compute_similarity_fp8u_no_smem_lut.cu + src/nn/specializations/detail/ivfpq_compute_similarity_half_fast.cu + src/nn/specializations/detail/ivfpq_compute_similarity_half_no_basediff.cu + src/nn/specializations/detail/ivfpq_compute_similarity_half_no_smem_lut.cu + src/nn/specializations/detail/ivfpq_search.cu + src/nn/specializations/detail/ivfpq_search_float_uint64_t.cu src/random/specializations/rmat_rectangular_generator_int_double.cu src/random/specializations/rmat_rectangular_generator_int64_double.cu src/random/specializations/rmat_rectangular_generator_int_float.cu @@ -400,6 +415,8 @@ if(RAFT_COMPILE_NN_LIBRARY) src/nn/specializations/detail/ivfpq_compute_similarity_half_fast.cu src/nn/specializations/detail/ivfpq_compute_similarity_half_no_basediff.cu src/nn/specializations/detail/ivfpq_compute_similarity_half_no_smem_lut.cu + src/nn/specializations/detail/ivfpq_build.cu + src/nn/specializations/detail/ivfpq_search.cu src/nn/specializations/detail/ivfpq_search_float_int64_t.cu src/nn/specializations/detail/ivfpq_search_float_uint32_t.cu src/nn/specializations/detail/ivfpq_search_float_uint64_t.cu diff --git a/cpp/include/raft/neighbors/specializations/ivf_pq_specialization.hpp b/cpp/include/raft/neighbors/specializations/ivf_pq_specialization.hpp new file mode 100644 index 0000000000..2bce997e18 --- /dev/null +++ b/cpp/include/raft/neighbors/specializations/ivf_pq_specialization.hpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace raft::neighbors ::ivf_pq { + +#define RAFT_INST_SEARCH(T, IdxT) \ + void search(const handle_t&, \ + const search_params&, \ + const index&, \ + const T*, \ + uint32_t, \ + uint32_t, \ + IdxT*, \ + float*, \ + rmm::mr::device_memory_resource*); + +RAFT_INST_SEARCH(float, uint64_t); +RAFT_INST_SEARCH(int8_t, uint64_t); +RAFT_INST_SEARCH(uint8_t, uint64_t); + +#undef RAFT_INST_SEARCH + +// We define overloads for build and extend with void return type. This is used in the Cython +// wrappers, where exception handling is not compatible with return type that has nontrivial +// constructor. +#define RAFT_INST_BUILD_EXTEND(T, IdxT) \ + auto build(const handle_t& handle, \ + const index_params& params, \ + const T* dataset, \ + IdxT n_rows, \ + uint32_t dim) \ + ->index; \ + \ + auto extend(const handle_t& handle, \ + const index& orig_index, \ + const T* new_vectors, \ + const IdxT* new_indices, \ + IdxT n_rows) \ + ->index; \ + \ + void build(const handle_t& handle, \ + const index_params& params, \ + const T* dataset, \ + IdxT n_rows, \ + uint32_t dim, \ + index* idx); \ + \ + void extend(const handle_t& handle, \ + index* idx, \ + const T* new_vectors, \ + const IdxT* new_indices, \ + IdxT n_rows); + +RAFT_INST_BUILD_EXTEND(float, uint64_t) +RAFT_INST_BUILD_EXTEND(int8_t, uint64_t) +RAFT_INST_BUILD_EXTEND(uint8_t, uint64_t) + +#undef RAFT_INST_BUILD_EXTEND + +} // namespace raft::neighbors::ivf_pq diff --git a/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh b/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh index b5ae9e7d5e..5c03f8f67c 100644 --- a/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh +++ b/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh @@ -27,7 +27,6 @@ #include #include -#include namespace raft { namespace spatial { diff --git a/cpp/src/nn/specializations/detail/ivfpq_build.cu b/cpp/src/nn/specializations/detail/ivfpq_build.cu new file mode 100644 index 0000000000..9ff22a3729 --- /dev/null +++ b/cpp/src/nn/specializations/detail/ivfpq_build.cu @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace raft::neighbors::ivf_pq { + +#define RAFT_INST_BUILD_EXTEND(T, IdxT) \ + auto build(const handle_t& handle, \ + const index_params& params, \ + const T* dataset, \ + IdxT n_rows, \ + uint32_t dim) \ + ->index \ + { \ + return build(handle, params, dataset, n_rows, dim); \ + } \ + auto extend(const handle_t& handle, \ + const index& orig_index, \ + const T* new_vectors, \ + const IdxT* new_indices, \ + IdxT n_rows) \ + ->index \ + { \ + return extend(handle, orig_index, new_vectors, new_indices, n_rows); \ + } \ + \ + void build(const handle_t& handle, \ + const index_params& params, \ + const T* dataset, \ + IdxT n_rows, \ + uint32_t dim, \ + index* idx) \ + { \ + *idx = build(handle, params, dataset, n_rows, dim); \ + } \ + void extend(const handle_t& handle, \ + index* idx, \ + const T* new_vectors, \ + const IdxT* new_indices, \ + IdxT n_rows) \ + { \ + extend(handle, idx, new_vectors, new_indices, n_rows); \ + } + +RAFT_INST_BUILD_EXTEND(float, uint64_t); +RAFT_INST_BUILD_EXTEND(int8_t, uint64_t); +RAFT_INST_BUILD_EXTEND(uint8_t, uint64_t); + +#undef RAFT_INST_BUILD_EXTEND + +} // namespace raft::neighbors::ivf_pq diff --git a/cpp/src/nn/specializations/detail/ivfpq_search.cu b/cpp/src/nn/specializations/detail/ivfpq_search.cu new file mode 100644 index 0000000000..80bf589803 --- /dev/null +++ b/cpp/src/nn/specializations/detail/ivfpq_search.cu @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace raft::neighbors::ivf_pq { + +#define RAFT_SEARCH_INST(T, IdxT) \ + void search(const handle_t& handle, \ + const search_params& params, \ + const index& idx, \ + const T* queries, \ + uint32_t n_queries, \ + uint32_t k, \ + IdxT* neighbors, \ + float* distances, \ + rmm::mr::device_memory_resource* mr) \ + { \ + search(handle, params, idx, queries, n_queries, k, neighbors, distances, mr); \ + } + +RAFT_SEARCH_INST(float, uint64_t); +RAFT_SEARCH_INST(int8_t, uint64_t); +RAFT_SEARCH_INST(uint8_t, uint64_t); + +#undef RAFT_INST_SEARCH + +} // namespace raft::neighbors::ivf_pq diff --git a/python/pylibraft/CMakeLists.txt b/python/pylibraft/CMakeLists.txt index 0c69634d16..0a7d700c91 100644 --- a/python/pylibraft/CMakeLists.txt +++ b/python/pylibraft/CMakeLists.txt @@ -75,6 +75,7 @@ rapids_cython_init() add_subdirectory(pylibraft/common) add_subdirectory(pylibraft/distance) +add_subdirectory(pylibraft/neighbors) add_subdirectory(pylibraft/random) add_subdirectory(pylibraft/cluster) diff --git a/python/pylibraft/pylibraft/common/input_validation.py b/python/pylibraft/pylibraft/common/input_validation.py index d5556a79dc..61435a859c 100644 --- a/python/pylibraft/pylibraft/common/input_validation.py +++ b/python/pylibraft/pylibraft/common/input_validation.py @@ -18,6 +18,8 @@ # cython: embedsignature = True # cython: language_level = 3 +import numpy as np + def do_dtypes_match(*cais): last_dtype = cais[0].__cuda_array_interface__["typestr"] @@ -57,3 +59,20 @@ def do_shapes_match(*cais): return False last_shape = shape return True + + +def is_c_contiguous(cai): + """ + Checks whether an array is C contiguous. + + Parameters + ---------- + cai : CUDA array interface + + """ + dt = np.dtype(cai["typestr"]) + return ( + "strides" not in cai + or cai["strides"] is None + or cai["strides"][1] == dt.itemsize + ) diff --git a/python/pylibraft/pylibraft/neighbors/CMakeLists.txt b/python/pylibraft/pylibraft/neighbors/CMakeLists.txt new file mode 100644 index 0000000000..995f698f2c --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/CMakeLists.txt @@ -0,0 +1,29 @@ +# ============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Set the list of Cython files to build +set(linked_libraries raft::raft raft::distance) + +# Build all of the Cython targets +rapids_cython_create_modules( + CXX + SOURCE_FILES "" + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_ +) + +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../library") +endforeach() + +add_subdirectory(ivf_pq) diff --git a/python/pylibraft/pylibraft/neighbors/__init__.pxd b/python/pylibraft/pylibraft/neighbors/__init__.pxd new file mode 100644 index 0000000000..273b4497cc --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/__init__.pxd @@ -0,0 +1,14 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/pylibraft/pylibraft/neighbors/__init__.py b/python/pylibraft/pylibraft/neighbors/__init__.py new file mode 100644 index 0000000000..273b4497cc --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/CMakeLists.txt b/python/pylibraft/pylibraft/neighbors/ivf_pq/CMakeLists.txt new file mode 100644 index 0000000000..4d51915008 --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/CMakeLists.txt @@ -0,0 +1,28 @@ +# ============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Set the list of Cython files to build +set(cython_sources ivf_pq.pyx) +set(linked_libraries raft::raft raft::distance) + +# Build all of the Cython targets +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_ivfpq_ +) + +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../library") +endforeach() diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/__init__.pxd b/python/pylibraft/pylibraft/neighbors/ivf_pq/__init__.pxd new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/__init__.py b/python/pylibraft/pylibraft/neighbors/ivf_pq/__init__.py new file mode 100644 index 0000000000..8a231b2c8c --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .ivf_pq import Index, IndexParams, SearchParams, build, extend, search diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/c_ivf_pq.pxd b/python/pylibraft/pylibraft/neighbors/ivf_pq/c_ivf_pq.pxd new file mode 100644 index 0000000000..9728495bf8 --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/c_ivf_pq.pxd @@ -0,0 +1,177 @@ +# +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +import numpy as np + +import pylibraft.common.handle + +from cython.operator cimport dereference as deref +from libc.stdint cimport ( + int8_t, + int64_t, + uint8_t, + uint32_t, + uint64_t, + uintptr_t, +) +from libcpp cimport bool, nullptr + +from rmm._lib.memory_resource cimport device_memory_resource + +from pylibraft.common.handle cimport handle_t +from pylibraft.distance.distance_type cimport DistanceType + + +cdef extern from "library_types.h": + ctypedef enum cudaDataType_t: + CUDA_R_32F "CUDA_R_32F" # float + CUDA_R_16F "CUDA_R_16F" # half + + # uint8 - used to refer to IVF-PQ's fp8 storage type + CUDA_R_8U "CUDA_R_8U" + +cdef extern from "raft/neighbors/ann_types.hpp" \ + namespace "raft::neighbors::ann" nogil: + + cdef cppclass ann_index "raft::neighbors::index": + pass + + cdef cppclass ann_index_params "raft::spatial::knn::index_params": + DistanceType metric + float metric_arg + bool add_data_on_build + + cdef cppclass ann_search_params "raft::spatial::knn::search_params": + pass + + +cdef extern from "raft/neighbors/ivf_pq_types.hpp" \ + namespace "raft::neighbors::ivf_pq" nogil: + + ctypedef enum codebook_gen: + PER_SUBSPACE "raft::neighbors::ivf_pq::codebook_gen::PER_SUBSPACE", + PER_CLUSTER "raft::neighbors::ivf_pq::codebook_gen::PER_CLUSTER" + + cpdef cppclass index_params(ann_index_params): + uint32_t n_lists + uint32_t kmeans_n_iters + double kmeans_trainset_fraction + uint32_t pq_bits + uint32_t pq_dim + codebook_gen codebook_kind + bool force_random_rotation + + cdef cppclass index[IdxT](ann_index): + index(const handle_t& handle, + DistanceType metric, + codebook_gen codebook_kind, + uint32_t n_lists, + uint32_t dim, + uint32_t pq_bits, + uint32_t pq_dim, + uint32_t n_nonempty_lists) + + IdxT size() + uint32_t dim() + uint32_t pq_dim() + uint32_t pq_len() + uint32_t pq_bits() + DistanceType metric() + uint32_t n_lists() + uint32_t rot_dim() + codebook_gen codebook_kind() + + cpdef cppclass search_params(ann_search_params): + uint32_t n_probes + cudaDataType_t lut_dtype + cudaDataType_t internal_distance_dtype + + +cdef extern from "raft/neighbors/specializations/ivf_pq_specialization.hpp" \ + namespace "raft::neighbors::ivf_pq" nogil: + + cdef void build(const handle_t& handle, + const index_params& params, + const float* dataset, + uint64_t n_rows, + uint32_t dim, + index[uint64_t]* index) except + + + cdef void build(const handle_t& handle, + const index_params& params, + const int8_t* dataset, + uint64_t n_rows, + uint32_t dim, + index[uint64_t]* index) except + + + cdef void build(const handle_t& handle, + const index_params& params, + const uint8_t* dataset, + uint64_t n_rows, + uint32_t dim, + index[uint64_t]* index) except + + + cdef void extend(const handle_t& handle, + index[uint64_t]* index, + const float* new_vectors, + const uint64_t* new_indices, + uint64_t n_rows) except + + + cdef void extend(const handle_t& handle, + index[uint64_t]* index, + const int8_t* new_vectors, + const uint64_t* new_indices, + uint64_t n_rows) except + + + cdef void extend(const handle_t& handle, + index[uint64_t]* index, + const uint8_t* new_vectors, + const uint64_t* new_indices, + uint64_t n_rows) except + + + cdef void search(const handle_t& handle, + const search_params& params, + const index[uint64_t]& index, + const float* queries, + uint32_t n_queries, + uint32_t k, + uint64_t* neighbors, + float* distances, + device_memory_resource* mr) except + + + cdef void search(const handle_t& handle, + const search_params& params, + const index[uint64_t]& index, + const int8_t* queries, + uint32_t n_queries, + uint32_t k, + uint64_t* neighbors, + float* distances, + device_memory_resource* mr) except + + + cdef void search(const handle_t& handle, + const search_params& params, + const index[uint64_t]& index, + const uint8_t* queries, + uint32_t n_queries, + uint32_t k, + uint64_t* neighbors, + float* distances, + device_memory_resource* mr) except + diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx new file mode 100644 index 0000000000..f178eecb1f --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx @@ -0,0 +1,727 @@ +# +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +import numpy as np + +from cython.operator cimport dereference as deref +from libc.stdint cimport ( + int8_t, + int64_t, + uint8_t, + uint32_t, + uint64_t, + uintptr_t, +) +from libcpp cimport bool, nullptr + +from pylibraft.distance.distance_type cimport DistanceType + +from pylibraft.common import Handle, device_ndarray +from pylibraft.common.interruptible import cuda_interruptible + +from pylibraft.common.handle cimport handle_t + +from pylibraft.common.handle import auto_sync_handle +from pylibraft.common.input_validation import is_c_contiguous + +from rmm._lib.memory_resource cimport ( + DeviceMemoryResource, + device_memory_resource, +) + +cimport pylibraft.neighbors.ivf_pq.c_ivf_pq as c_ivf_pq +from pylibraft.neighbors.ivf_pq.c_ivf_pq cimport index_params, search_params + + +def _get_metric(metric): + SUPPORTED_DISTANCES = { + "l2_expanded": DistanceType.L2Expanded, + # TODO(tfeher): fix inconsistency: index building for L2SqrtExpanded is + # only supported by build, not by search. + # "euclidean": DistanceType.L2SqrtExpanded + "inner_product": DistanceType.InnerProduct + } + if metric not in SUPPORTED_DISTANCES: + raise ValueError("metric %s is not supported" % metric) + return SUPPORTED_DISTANCES[metric] + + +cdef _get_metric_string(DistanceType metric): + return {DistanceType.L2Expanded : "l2_expanded", + DistanceType.InnerProduct: "inner_product"}[metric] + + +cdef _get_codebook_string(c_ivf_pq.codebook_gen codebook): + return {c_ivf_pq.codebook_gen.PER_SUBSPACE: "subspace", + c_ivf_pq.codebook_gen.PER_CLUSTER: "cluster"}[codebook] + + +cdef _map_dtype_np_to_cuda(dtype, supported_dtypes=None): + if supported_dtypes is not None and dtype not in supported_dtypes: + raise TypeError("Type %s is not supported" % str(dtype)) + return {np.float32: c_ivf_pq.cudaDataType_t.CUDA_R_32F, + np.float16: c_ivf_pq.cudaDataType_t.CUDA_R_16F, + np.uint8: c_ivf_pq.cudaDataType_t.CUDA_R_8U}[dtype] + + +cdef _get_dtype_string(dtype): + return str({c_ivf_pq.cudaDataType_t.CUDA_R_32F: np.float32, + c_ivf_pq.cudaDataType_t.CUDA_R_16F: np.float16, + c_ivf_pq.cudaDataType_t.CUDA_R_8U: np.uint8}[dtype]) + + +def _check_input_array(cai, exp_dt, exp_rows=None, exp_cols=None): + if cai["typestr"] not in exp_dt: + raise TypeError("dtype %s not supported" % cai["typestr"]) + + if not is_c_contiguous(cai): + raise ValueError("Row major input is expected") + + if exp_cols is not None and cai["shape"][1] != exp_cols: + raise ValueError("Incorrect number of columns, expected {} got {}" + .format(exp_cols, cai["shape"][1])) + + if exp_rows is not None and cai["shape"][0] != exp_rows: + raise ValueError("Incorrect number of rows, expected {} , got {}" + .format(exp_rows, cai["shape"][0])) + + +cdef class IndexParams: + cdef c_ivf_pq.index_params params + + def __init__(self, *, + n_lists=1024, + metric="l2_expanded", + kmeans_n_iters=20, + kmeans_trainset_fraction=0.5, + pq_bits=8, + pq_dim=0, + codebook_kind="subspace", + force_random_rotation=False, + add_data_on_build=True): + """" + Parameters to build index for IVF-PQ nearest neighbor search + + Parameters + ---------- + n_list : int, default = 1024 + The number of clusters used in the coarse quantizer. + metric : string denoting the metric type, default="l2_expanded" + Valid values for metric: ["l2_expanded", "inner_product"], where + - l2_expanded is the equclidean distance without the square root + operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2, + - inner product distance is defined as + distance(a, b) = \\sum_i a_i * b_i. + kmeans_n_iters : int, default = 20 + The number of iterations searching for kmeans centers during index + building. + kmeans_trainset_fraction : int, default = 0.5 + If kmeans_trainset_fraction is less than 1, then the dataset is + subsampled, and only n_samples * kmeans_trainset_fraction rows + are used for training. + pq_bits : int, default = 8 + The bit length of the vector element after quantization. + pq_dim : int, default = 0 + The dimensionality of a the vector after product quantization. + When zero, an optimal value is selected using a heuristic. Note + pq_dim * pq_bits must be a multiple of 8. Hint: a smaller 'pq_dim' + results in a smaller index size and better search performance, but + lower recall. If 'pq_bits' is 8, 'pq_dim' can be set to any number, + but multiple of 8 are desirable for good performance. If 'pq_bits' + is not 8, 'pq_dim' should be a multiple of 8. For good performance, + it is desirable that 'pq_dim' is a multiple of 32. Ideally, + 'pq_dim' should be also a divisor of the dataset dim. + codebook_kind : string, default = "subspace" + Valid values ["subspace", "cluster"] + force_random_rotation : bool, default = False + Apply a random rotation matrix on the input data and queries even + if `dim % pq_dim == 0`. Note: if `dim` is not multiple of `pq_dim`, + a random rotation is always applied to the input data and queries + to transform the working space from `dim` to `rot_dim`, which may + be slightly larger than the original space and and is a multiple + of `pq_dim` (`rot_dim % pq_dim == 0`). However, this transform is + not necessary when `dim` is multiple of `pq_dim` (`dim == rot_dim`, + hence no need in adding "extra" data columns / features). By + default, if `dim == rot_dim`, the rotation transform is + initialized with the identity matrix. When + `force_random_rotation == True`, a random orthogonal transform + matrix is generated regardless of the values of `dim` and `pq_dim`. + add_data_on_build : bool, default = True + After training the coarse and fine quantizers, we will populate + the index with the dataset if add_data_on_build == True, otherwise + the index is left empty, and the extend method can be used + to add new vectors to the index. + + """ + self.params.n_lists = n_lists + self.params.metric = _get_metric(metric) + self.params.metric_arg = 0 + self.params.kmeans_n_iters = kmeans_n_iters + self.params.kmeans_trainset_fraction = kmeans_trainset_fraction + self.params.pq_bits = pq_bits + self.params.pq_dim = pq_dim + if codebook_kind == "subspace": + self.params.codebook_kind = c_ivf_pq.codebook_gen.PER_SUBSPACE + elif codebook_kind == "cluster": + self.params.codebook_kind = c_ivf_pq.codebook_gen.PER_CLUSTER + else: + raise ValueError("Incorrect codebook kind %s" % codebook_kind) + self.params.force_random_rotation = force_random_rotation + self.params.add_data_on_build = add_data_on_build + + @property + def n_lists(self): + return self.params.n_lists + + @property + def metric(self): + return self.params.metric + + @property + def kmeans_n_iters(self): + return self.params.kmeans_n_iters + + @property + def kmeans_trainset_fraction(self): + return self.params.kmeans_trainset_fraction + + @property + def pq_bits(self): + return self.params.pq_bits + + @property + def pq_dim(self): + return self.params.pq_dim + + @property + def codebook_kind(self): + return self.params.codebook_kind + + @property + def force_random_rotation(self): + return self.params.force_random_rotation + + @property + def add_data_on_build(self): + return self.params.add_data_on_build + + +cdef class Index: + # We store a pointer to the index because it dose not have a trivial + # constructor. + cdef c_ivf_pq.index[uint64_t] * index + cdef readonly bool trained + + def __cinit__(self, handle=None): + self.trained = False + self.index = NULL + if handle is None: + handle = Handle() + cdef handle_t* handle_ = handle.getHandle() + + # We create a placeholder object. The actual parameter values do + # not matter, it will be replaced with a built index object later. + self.index = new c_ivf_pq.index[uint64_t]( + deref(handle_), _get_metric("l2_expanded"), + c_ivf_pq.codebook_gen.PER_SUBSPACE, + 1, + 4, + 8, + 0, + 0) + + def __dealloc__(self): + if self.index is not NULL: + del self.index + + def __repr__(self): + m_str = "metric=" + _get_metric_string(self.index.metric()) + code_str = "codebook=" + _get_codebook_string( + self.index.codebook_kind()) + attr_str = [attr + "=" + str(getattr(self, attr)) + for attr in ["size", "dim", "pq_dim", "pq_bits", + "n_lists", "rot_dim"]] + attr_str = [m_str, code_str] + attr_str + return "Index(type=IVF-PQ, " + (", ".join(attr_str)) + ")" + + @property + def dim(self): + return self.index[0].dim() + + @property + def size(self): + return self.index[0].size() + + @property + def pq_dim(self): + return self.index[0].pq_dim() + + @property + def pq_len(self): + return self.index[0].pq_len() + + @property + def pq_bits(self): + return self.index[0].pq_bits() + + @property + def metric(self): + return self.index[0].metric() + + @property + def n_lists(self): + return self.index[0].n_lists() + + @property + def rot_dim(self): + return self.index[0].rot_dim() + + @property + def codebook_kind(self): + return self.index[0].codebook_kind() + + +@auto_sync_handle +def build(IndexParams index_params, dataset, handle=None): + """ + Builds an IVF-PQ index that can be later used for nearest neighbor search. + + Parameters + ---------- + index_params : IndexParams object + dataset : CUDA array interface compliant matrix shape (n_samples, dim) + Supported dtype [float, int8, uint8] + {handle_docstring} + + Returns + ------- + index: ivf_pq.Index + + Examples + -------- + + .. code-block:: python + + import cupy as cp + + from pylibraft.common import Handle + from pylibraft.neighbors import ivf_pq + + n_samples = 50000 + n_features = 50 + n_queries = 1000 + + dataset = cp.random.random_sample((n_samples, n_features), + dtype=cp.float32) + handle = Handle() + index_params = ivf_pq.IndexParams( + n_lists=1024, + metric="l2_expanded", + pq_dim=10) + index = ivf_pq.build(index_params, dataset, handle=handle) + + # Search using the built index + queries = cp.random.random_sample((n_queries, n_features), + dtype=cp.float32) + k = 10 + distances, neighbors = ivf_pq.search(ivf_pq.SearchParams(), index, + queries, k, handle=handle) + + distances = cp.asarray(distances) + neighbors = cp.asarray(neighbors) + + # pylibraft functions are often asynchronous so the + # handle needs to be explicitly synchronized + handle.sync() + + """ + dataset_cai = dataset.__cuda_array_interface__ + dataset_dt = np.dtype(dataset_cai["typestr"]) + _check_input_array(dataset_cai, [np.dtype('float32'), np.dtype('byte'), + np.dtype('ubyte')]) + cdef uintptr_t dataset_ptr = dataset_cai["data"][0] + + cdef uint64_t n_rows = dataset_cai["shape"][0] + cdef uint32_t dim = dataset_cai["shape"][1] + + if handle is None: + handle = Handle() + cdef handle_t* handle_ = handle.getHandle() + + idx = Index() + + if dataset_dt == np.float32: + with cuda_interruptible(): + c_ivf_pq.build(deref(handle_), + index_params.params, + dataset_ptr, + n_rows, + dim, + idx.index) + idx.trained = True + elif dataset_dt == np.byte: + with cuda_interruptible(): + c_ivf_pq.build(deref(handle_), + index_params.params, + dataset_ptr, + n_rows, + dim, + idx.index) + idx.trained = True + elif dataset_dt == np.ubyte: + with cuda_interruptible(): + c_ivf_pq.build(deref(handle_), + index_params.params, + dataset_ptr, + n_rows, + dim, + idx.index) + idx.trained = True + else: + raise TypeError("dtype %s not supported" % dataset_dt) + + return idx + + +@auto_sync_handle +def extend(Index index, new_vectors, new_indices, handle=None): + """ + Extend an existing index with new vectors. + + + Parameters + ---------- + index : ivf_pq.Index + Trained ivf_pq object. + new_vectors : CUDA array interface compliant matrix shape (n_samples, dim) + Supported dtype [float, int8, uint8] + new_indices : CUDA array interface compliant matrix shape (n_samples, dim) + Supported dtype [uint64] + {handle_docstring} + + Returns + ------- + index: ivf_pq.Index + + Examples + -------- + + .. code-block:: python + + import cupy as cp + + from pylibraft.common import Handle + from pylibraft.neighbors import ivf_pq + + n_samples = 50000 + n_features = 50 + n_queries = 1000 + + dataset = cp.random.random_sample((n_samples, n_features), + dtype=cp.float32) + handle = Handle() + index = ivf_pq.build(ivf_pq.IndexParams(), dataset, handle=handle) + + n_rows = 100 + more_data = cp.random.random_sample((n_rows, n_features), + dtype=cp.float32) + indices = index.size + cp.arange(n_rows, dtype=cp.uint64) + index = ivf_pq.extend(index, more_data, indices) + + # Search using the built index + queries = cp.random.random_sample((n_queries, n_features), + dtype=cp.float32) + k = 10 + distances, neighbors = ivf_pq.search(ivf_pq.SearchParams(), + index, queries, + k, handle=handle) + + # pylibraft functions are often asynchronous so the + # handle needs to be explicitly synchronized + handle.sync() + + distances = cp.asarray(distances) + neighbors = cp.asarray(neighbors) + """ + if not index.trained: + raise ValueError("Index need to be built before calling extend.") + + if handle is None: + handle = Handle() + cdef handle_t* handle_ = handle.getHandle() + + vecs_cai = new_vectors.__cuda_array_interface__ + vecs_dt = np.dtype(vecs_cai["typestr"]) + cdef uint64_t n_rows = vecs_cai["shape"][0] + cdef uint32_t dim = vecs_cai["shape"][1] + + _check_input_array(vecs_cai, [np.dtype('float32'), np.dtype('byte'), + np.dtype('ubyte')], + exp_cols=index.dim) + + idx_cai = new_indices.__cuda_array_interface__ + _check_input_array(idx_cai, [np.dtype('uint64')], exp_rows=n_rows) + if len(idx_cai["shape"])!=1: + raise ValueError("Indices array is expected to be 1D") + + cdef uintptr_t vecs_ptr = vecs_cai["data"][0] + cdef uintptr_t idx_ptr = idx_cai["data"][0] + + if vecs_dt == np.float32: + with cuda_interruptible(): + c_ivf_pq.extend(deref(handle_), + index.index, + vecs_ptr, + idx_ptr, + n_rows) + elif vecs_dt == np.int8: + with cuda_interruptible(): + c_ivf_pq.extend(deref(handle_), + index.index, + vecs_ptr, + idx_ptr, + n_rows) + elif vecs_dt == np.uint8: + with cuda_interruptible(): + c_ivf_pq.extend(deref(handle_), + index.index, + vecs_ptr, + idx_ptr, + n_rows) + else: + raise TypeError("query dtype %s not supported" % vecs_dt) + + return index + + +cdef class SearchParams: + cdef c_ivf_pq.search_params params + + def __init__(self, *, n_probes=20, + lut_dtype=np.float32, + internal_distance_dtype=np.float32): + """ + IVF-PQ search parameters + + Parameters + ---------- + n_probes: int, default = 1024 + The number of course clusters to select for the fine search. + lut_dtype: default = np.float32 + Data type of look up table to be created dynamically at search + time. The use of low-precision types reduces the amount of shared + memory required at search time, so fast shared memory kernels can + be used even for datasets with large dimansionality. Note that + the recall is slightly degraded when low-precision type is + selected. Possible values [np.float32, np.float16, np.uint8] + internal_distance_dtype: default = np.float32 + Storage data type for distance/similarity computation. + Possible values [np.float32, np.float16] + """ + + self.params.n_probes = n_probes + self.params.lut_dtype = _map_dtype_np_to_cuda(lut_dtype) + self.params.internal_distance_dtype = \ + _map_dtype_np_to_cuda(internal_distance_dtype) + # TODO(tfeher): enable if #926 adds this + # self.params.shmem_carveout = self.shmem_carveout + + def __repr__(self): + lut_str = "lut_dtype=" + _get_dtype_string(self.params.lut_dtype) + idt_str = "internal_distance_dtype=" + \ + _get_dtype_string(self.params.internal_distance_dtype) + attr_str = [attr + "=" + str(getattr(self, attr)) + for attr in ["n_probes"]] + # TODO (tfeher) add "shmem_carveout" + attr_str = attr_str + [lut_str, idt_str] + return "SearchParams(type=IVF-PQ, " + (", ".join(attr_str)) + ")" + + @property + def n_probes(self): + return self.params.n_probes + + @property + def lut_dtype(self): + return self.params.lut_dtype + + @property + def internal_distance_dtype(self): + return self.params.internal_distance_dtype + + +@auto_sync_handle +def search(SearchParams search_params, + Index index, + queries, + k, + neighbors=None, + distances=None, + DeviceMemoryResource memory_resource=None, + handle=None): + """ + Find the k nearest neighbors for each query. + + Parameters + ---------- + search_params : SearchParams + index : Index + Trained IVF-PQ index. + queries : CUDA array interface compliant matrix shape (n_samples, dim) + Supported dtype [float, int8, uint8] + k : int + The number of neighbors. + neighbors : Optional CUDA array interface compliant matrix shape + (n_queries, k), dtype uint64_t. If supplied, neighbor + indices will be written here in-place. (default None) + distances : Optional CUDA array interface compliant matrix shape + (n_queries, k) If supplied, the distances to the + neighbors will be written here in-place. (default None) + memory_resource : RMM DeviceMemoryResource object, optional + This can be used to explicitly manage the temporary memory + allocation during search. Passing a pooling allocator can reduce + memory allocation overhead. If not specified, then the memory + resource from the raft handle is used. + {handle_docstring} + + Examples + -------- + .. code-block:: python + + import cupy as cp + + from pylibraft.common import Handle + from pylibraft.neighbors import ivf_pq + + n_samples = 50000 + n_features = 50 + n_queries = 1000 + dataset = cp.random.random_sample((n_samples, n_features), + dtype=cp.float32) + + # Build index + handle = Handle() + index = ivf_pq.build(ivf_pq.IndexParams(), dataset, handle=handle) + + # Search using the built index + queries = cp.random.random_sample((n_queries, n_features), + dtype=cp.float32) + k = 10 + search_params = ivf_pq.SearchParams( + n_probes=20, + lut_dtype=ivf_pq.np.float16, + internal_distance_dtype=ivf_pq.np.float32 + ) + + # Using a pooling allocator reduces overhead of temporary array + # creation during search. This is useful if multiple searches + # are performad with same query size. + mr = rmm.mr.PoolMemoryResource( + rmm.mr.CudaMemoryResource(), + initial_pool_size=2**29, + maximum_pool_size=2**31 + ) + distances, neighbors = ivf_pq.search(search_params, index, queries, + k, memory_resource=mr, + handle=handle) + + # pylibraft functions are often asynchronous so the + # handle needs to be explicitly synchronized + handle.sync() + + neighbors = cp.asarray(neighbors) + distances = cp.asarray(distances) + """ + + if not index.trained: + raise ValueError("Index need to be built before calling search.") + + if handle is None: + handle = Handle() + cdef handle_t* handle_ = handle.getHandle() + + queries_cai = queries.__cuda_array_interface__ + queries_dt = np.dtype(queries_cai["typestr"]) + cdef uint32_t n_queries = queries_cai["shape"][0] + + _check_input_array(queries_cai, [np.dtype('float32'), np.dtype('byte'), + np.dtype('ubyte')], + exp_cols=index.dim) + + if neighbors is None: + neighbors = device_ndarray.empty((n_queries, k), dtype='uint64') + + neighbors_cai = neighbors.__cuda_array_interface__ + _check_input_array(neighbors_cai, [np.dtype('uint64')], + exp_rows=n_queries, exp_cols=k) + + if distances is None: + distances = device_ndarray.empty((n_queries, k), dtype='float32') + + distances_cai = distances.__cuda_array_interface__ + _check_input_array(distances_cai, [np.dtype('float32')], + exp_rows=n_queries, exp_cols=k) + + cdef c_ivf_pq.search_params params = search_params.params + + cdef uintptr_t queries_ptr = queries_cai["data"][0] + cdef uintptr_t neighbors_ptr = neighbors_cai["data"][0] + cdef uintptr_t distances_ptr = distances_cai["data"][0] + # TODO(tfeher) pass mr_ptr arg + cdef device_memory_resource* mr_ptr = nullptr + if memory_resource is not None: + mr_ptr = memory_resource.get_mr() + + if queries_dt == np.float32: + with cuda_interruptible(): + c_ivf_pq.search(deref(handle_), + params, + deref(index.index), + queries_ptr, + n_queries, + k, + neighbors_ptr, + distances_ptr, + mr_ptr) + elif queries_dt == np.byte: + with cuda_interruptible(): + c_ivf_pq.search(deref(handle_), + params, + deref(index.index), + queries_ptr, + n_queries, + k, + neighbors_ptr, + distances_ptr, + mr_ptr) + elif queries_dt == np.ubyte: + with cuda_interruptible(): + c_ivf_pq.search(deref(handle_), + params, + deref(index.index), + queries_ptr, + n_queries, + k, + neighbors_ptr, + distances_ptr, + mr_ptr) + else: + raise ValueError("query dtype %s not supported" % queries_dt) + + return (distances, neighbors) diff --git a/python/pylibraft/pylibraft/test/test_ivf_pq.py b/python/pylibraft/pylibraft/test/test_ivf_pq.py new file mode 100644 index 0000000000..367ff6d44a --- /dev/null +++ b/python/pylibraft/pylibraft/test/test_ivf_pq.py @@ -0,0 +1,482 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# h ttp://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from sklearn.metrics import pairwise_distances +from sklearn.neighbors import NearestNeighbors +from sklearn.preprocessing import normalize + +from pylibraft.common import device_ndarray +from pylibraft.neighbors import ivf_pq + + +def generate_data(shape, dtype): + if dtype == np.byte: + x = np.random.randint(-127, 128, size=shape, dtype=np.byte) + elif dtype == np.ubyte: + x = np.random.randint(0, 255, size=shape, dtype=np.ubyte) + else: + x = np.random.random_sample(shape).astype(dtype) + + return x + + +def calc_recall(ann_idx, true_nn_idx): + assert ann_idx.shape == true_nn_idx.shape + n = 0 + for i in range(ann_idx.shape[0]): + n += np.intersect1d(ann_idx[i, :], true_nn_idx[i, :]).size + recall = n / ann_idx.size + return recall + + +def check_distances(dataset, queries, metric, out_idx, out_dist): + """ + Calculate the real distance between queries and dataset[out_idx], + and compare it to out_dist. + """ + dist = np.empty(out_dist.shape, out_dist.dtype) + for i in range(queries.shape[0]): + X = queries[np.newaxis, i, :] + Y = dataset[out_idx[i, :], :] + if metric == "l2_expanded": + dist[i, :] = pairwise_distances(X, Y, "euclidean") + elif metric == "inner_product": + dist[i, :] = np.matmul(X, Y.T) + else: + raise ValueError("Invali metric") + + # Note: raft l2 metric does not include the square root operation like + # sklearn's euclidean. + if metric == "l2_expanded": + dist = np.power(dist, 2) + + dist_eps = abs(dist) + dist_eps[dist < 1e-3] = 1e-3 + diff = abs(out_dist - dist) / dist_eps + + # Quantization leads to errors in the distance calculation. + # The aim of this test is not to test precision, but to catch obvious + # errors. + assert np.mean(diff) < 0.1 + + +def run_ivf_pq_build_search_test( + n_rows, + n_cols, + n_queries, + k, + n_lists, + metric, + dtype, + pq_bits=8, + pq_dim=0, + codebook_kind="subspace", + add_data_on_build="True", + n_probes=100, + lut_dtype=np.float32, + internal_distance_dtype=np.float32, + force_random_rotation=False, + kmeans_trainset_fraction=1, + kmeans_n_iters=20, + compare=True, + inplace=True, +): + dataset = generate_data((n_rows, n_cols), dtype) + if metric == "inner_product": + dataset = normalize(dataset, norm="l2", axis=1) + dataset_device = device_ndarray(dataset) + + build_params = ivf_pq.IndexParams( + n_lists=n_lists, + metric=metric, + kmeans_n_iters=kmeans_n_iters, + kmeans_trainset_fraction=kmeans_trainset_fraction, + pq_bits=pq_bits, + pq_dim=pq_dim, + codebook_kind=codebook_kind, + force_random_rotation=force_random_rotation, + add_data_on_build=add_data_on_build, + ) + + index = ivf_pq.build(build_params, dataset_device) + + assert index.trained + if pq_dim != 0: + assert index.pq_dim == build_params.pq_dim + assert index.pq_bits == build_params.pq_bits + assert index.metric == build_params.metric + assert index.n_lists == build_params.n_lists + + if not add_data_on_build: + dataset_1_device = device_ndarray(dataset[: n_rows // 2, :]) + dataset_2_device = device_ndarray(dataset[n_rows // 2 :, :]) + indices_1 = np.arange(n_rows // 2, dtype=np.uint64) + indices_1_device = device_ndarray(indices_1) + indices_2 = np.arange(n_rows // 2, n_rows, dtype=np.uint64) + indices_2_device = device_ndarray(indices_2) + index = ivf_pq.extend(index, dataset_1_device, indices_1_device) + index = ivf_pq.extend(index, dataset_2_device, indices_2_device) + + assert index.size == n_rows + + queries = generate_data((n_queries, n_cols), dtype) + out_idx = np.zeros((n_queries, k), dtype=np.uint64) + out_dist = np.zeros((n_queries, k), dtype=np.float32) + + queries_device = device_ndarray(queries) + out_idx_device = device_ndarray(out_idx) if inplace else None + out_dist_device = device_ndarray(out_dist) if inplace else None + + search_params = ivf_pq.SearchParams( + n_probes=n_probes, + lut_dtype=lut_dtype, + internal_distance_dtype=internal_distance_dtype, + ) + + ret_output = ivf_pq.search( + search_params, + index, + queries_device, + k, + neighbors=out_idx_device, + distances=out_dist_device, + ) + + if not inplace: + out_dist_device, out_idx_device = ret_output + + if not compare: + return + + out_idx = out_idx_device.copy_to_host() + out_dist = out_dist_device.copy_to_host() + + # Calculate reference values with sklearn + skl_metric = {"l2_expanded": "euclidean", "inner_product": "cosine"}[ + metric + ] + nn_skl = NearestNeighbors( + n_neighbors=k, algorithm="brute", metric=skl_metric + ) + nn_skl.fit(dataset) + skl_idx = nn_skl.kneighbors(queries, return_distance=False) + + recall = calc_recall(out_idx, skl_idx) + assert recall > 0.7 + + check_distances(dataset, queries, metric, out_idx, out_dist) + + +@pytest.mark.parametrize("inplace", [True, False]) +@pytest.mark.parametrize("n_rows", [10000]) +@pytest.mark.parametrize("n_cols", [10]) +@pytest.mark.parametrize("n_queries", [100]) +@pytest.mark.parametrize("n_lists", [100]) +@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8]) +def test_ivf_pq_dtypes(n_rows, n_cols, n_queries, n_lists, dtype, inplace): + # Note that inner_product tests use normalized input which we cannot + # represent in int8, therefore we test only l2_expanded metric here. + run_ivf_pq_build_search_test( + n_rows=n_rows, + n_cols=n_cols, + n_queries=n_queries, + k=10, + n_lists=n_lists, + metric="l2_expanded", + dtype=dtype, + inplace=inplace, + ) + + +@pytest.mark.parametrize( + "params", + [ + pytest.param( + { + "n_rows": 0, + "n_cols": 10, + "n_queries": 10, + "k": 1, + "n_lists": 10, + }, + marks=pytest.mark.xfail(reason="empty dataset"), + ), + {"n_rows": 1, "n_cols": 10, "n_queries": 10, "k": 1, "n_lists": 10}, + {"n_rows": 10, "n_cols": 1, "n_queries": 10, "k": 10, "n_lists": 10}, + # {"n_rows": 999, "n_cols": 42, "n_queries": 453, "k": 137, + # "n_lists": 53}, + ], +) +def test_ivf_pq_n(params): + # We do not test recall, just confirm that we can handle edge cases for + # certain parameters + run_ivf_pq_build_search_test( + n_rows=params["n_rows"], + n_cols=params["n_cols"], + n_queries=params["n_queries"], + k=params["k"], + n_lists=params["n_lists"], + metric="l2_expanded", + dtype=np.float32, + compare=False, + ) + + +@pytest.mark.parametrize("metric", ["l2_expanded", "inner_product"]) +@pytest.mark.parametrize("dtype", [np.float32]) +@pytest.mark.parametrize("codebook_kind", ["subspace", "cluster"]) +@pytest.mark.parametrize("rotation", [True, False]) +def test_ivf_pq_build_params(metric, dtype, codebook_kind, rotation): + run_ivf_pq_build_search_test( + n_rows=10000, + n_cols=10, + n_queries=1000, + k=10, + n_lists=100, + metric=metric, + dtype=dtype, + pq_bits=8, + pq_dim=0, + codebook_kind=codebook_kind, + add_data_on_build=True, + n_probes=100, + force_random_rotation=rotation, + ) + + +@pytest.mark.parametrize( + "params", + [ + {"pq_dims": 10, "pq_bits": 8, "n_lists": 100}, + {"pq_dims": 16, "pq_bits": 7, "n_lists": 100}, + {"pq_dims": 0, "pq_bits": 8, "n_lists": 90}, + { + "pq_dims": 0, + "pq_bits": 8, + "n_lists": 100, + "trainset_fraction": 0.9, + "n_iters": 30, + }, + ], +) +def test_ivf_pq_params(params): + run_ivf_pq_build_search_test( + n_rows=10000, + n_cols=16, + n_queries=1000, + k=10, + n_lists=params["n_lists"], + metric="l2_expanded", + dtype=np.float32, + pq_bits=params["pq_bits"], + pq_dim=params["pq_dims"], + kmeans_trainset_fraction=params.get("trainset_fraction", 1.0), + kmeans_n_iters=params.get("n_iters", 20), + ) + + +@pytest.mark.parametrize( + "params", + [ + { + "k": 10, + "n_probes": 100, + "lut": np.float16, + "idd": np.float32, + }, + { + "k": 10, + "n_probes": 99, + "lut": np.uint8, + "idd": np.float32, + }, + { + "k": 10, + "n_probes": 100, + "lut": np.float32, + "idd": np.float16, + }, + { + "k": 129, + "n_probes": 100, + "lut": np.float32, + "idd": np.float32, + }, + ], +) +def test_ivf_pq_search_params(params): + run_ivf_pq_build_search_test( + n_rows=10000, + n_cols=16, + n_queries=1000, + k=params["k"], + n_lists=100, + n_probes=params["n_probes"], + metric="l2_expanded", + dtype=np.float32, + lut_dtype=params["lut"], + internal_distance_dtype=params["idd"], + ) + + +@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8]) +def test_extend(dtype): + run_ivf_pq_build_search_test( + n_rows=10000, + n_cols=10, + n_queries=100, + k=10, + n_lists=100, + metric="l2_expanded", + dtype=dtype, + add_data_on_build=False, + ) + + +def test_build_assertions(): + with pytest.raises(TypeError): + run_ivf_pq_build_search_test( + n_rows=1000, + n_cols=10, + n_queries=100, + k=10, + n_lists=100, + metric="l2_expanded", + dtype=np.float64, + ) + + n_rows = 1000 + n_cols = 100 + n_queries = 212 + k = 10 + dataset = generate_data((n_rows, n_cols), np.float32) + dataset_device = device_ndarray(dataset) + + index_params = ivf_pq.IndexParams( + n_lists=50, + metric="l2_expanded", + kmeans_n_iters=20, + kmeans_trainset_fraction=1, + add_data_on_build=False, + ) + + index = ivf_pq.Index() + + queries = generate_data((n_queries, n_cols), np.float32) + out_idx = np.zeros((n_queries, k), dtype=np.uint64) + out_dist = np.zeros((n_queries, k), dtype=np.float32) + + queries_device = device_ndarray(queries) + out_idx_device = device_ndarray(out_idx) + out_dist_device = device_ndarray(out_dist) + + search_params = ivf_pq.SearchParams(n_probes=50) + + with pytest.raises(ValueError): + # Index must be built before search + ivf_pq.search( + search_params, + index, + queries_device, + k, + out_idx_device, + out_dist_device, + ) + + index = ivf_pq.build(index_params, dataset_device) + assert index.trained + + indices = np.arange(n_rows + 1, dtype=np.uint64) + indices_device = device_ndarray(indices) + + with pytest.raises(ValueError): + # Dataset dimension mismatch + ivf_pq.extend(index, queries_device, indices_device) + + with pytest.raises(ValueError): + # indices dimension mismatch + ivf_pq.extend(index, dataset_device, indices_device) + + +@pytest.mark.parametrize( + "params", + [ + {"q_dt": np.float64}, + {"q_order": "F"}, + {"q_cols": 101}, + {"idx_dt": np.uint32}, + {"idx_order": "F"}, + {"idx_rows": 42}, + {"idx_cols": 137}, + {"dist_dt": np.float64}, + {"dist_order": "F"}, + {"dist_rows": 42}, + {"dist_cols": 137}, + ], +) +def test_search_inputs(params): + """Test with invalid input dtype, order, or dimension.""" + n_rows = 1000 + n_cols = 100 + n_queries = 256 + k = 10 + dtype = np.float32 + + q_dt = params.get("q_dt", np.float32) + q_order = params.get("q_order", "C") + queries = generate_data( + (n_queries, params.get("q_cols", n_cols)), q_dt + ).astype(q_dt, order=q_order) + queries_device = device_ndarray(queries) + + idx_dt = params.get("idx_dt", np.uint64) + idx_order = params.get("idx_order", "C") + out_idx = np.zeros( + (params.get("idx_rows", n_queries), params.get("idx_cols", k)), + dtype=idx_dt, + order=idx_order, + ) + out_idx_device = device_ndarray(out_idx) + + dist_dt = params.get("dist_dt", np.float32) + dist_order = params.get("dist_order", "C") + out_dist = np.zeros( + (params.get("dist_rows", n_queries), params.get("dist_cols", k)), + dtype=dist_dt, + order=dist_order, + ) + out_dist_device = device_ndarray(out_dist) + + index_params = ivf_pq.IndexParams( + n_lists=50, metric="l2_expanded", add_data_on_build=True + ) + + dataset = generate_data((n_rows, n_cols), dtype) + dataset_device = device_ndarray(dataset) + index = ivf_pq.build(index_params, dataset_device) + assert index.trained + + with pytest.raises(Exception): + search_params = ivf_pq.SearchParams(n_probes=50) + ivf_pq.search( + search_params, + index, + queries_device, + k, + out_idx_device, + out_dist_device, + )