diff --git a/cpp/include/raft/neighbors/specializations/ivf_pq_specialization.hpp b/cpp/include/raft/neighbors/specializations/ivf_pq_specialization.hpp index c91ee90eed..2bce997e18 100644 --- a/cpp/include/raft/neighbors/specializations/ivf_pq_specialization.hpp +++ b/cpp/include/raft/neighbors/specializations/ivf_pq_specialization.hpp @@ -37,6 +37,9 @@ RAFT_INST_SEARCH(uint8_t, uint64_t); #undef RAFT_INST_SEARCH +// We define overloads for build and extend with void return type. This is used in the Cython +// wrappers, where exception handling is not compatible with return type that has nontrivial +// constructor. #define RAFT_INST_BUILD_EXTEND(T, IdxT) \ auto build(const handle_t& handle, \ const index_params& params, \ @@ -50,7 +53,20 @@ RAFT_INST_SEARCH(uint8_t, uint64_t); const T* new_vectors, \ const IdxT* new_indices, \ IdxT n_rows) \ - ->index; + ->index; \ + \ + void build(const handle_t& handle, \ + const index_params& params, \ + const T* dataset, \ + IdxT n_rows, \ + uint32_t dim, \ + index* idx); \ + \ + void extend(const handle_t& handle, \ + index* idx, \ + const T* new_vectors, \ + const IdxT* new_indices, \ + IdxT n_rows); RAFT_INST_BUILD_EXTEND(float, uint64_t) RAFT_INST_BUILD_EXTEND(int8_t, uint64_t) diff --git a/cpp/src/nn/specializations/detail/ivfpq_build.cu b/cpp/src/nn/specializations/detail/ivfpq_build.cu index 37e17e35b1..44a245d112 100644 --- a/cpp/src/nn/specializations/detail/ivfpq_build.cu +++ b/cpp/src/nn/specializations/detail/ivfpq_build.cu @@ -37,6 +37,24 @@ namespace raft::neighbors::ivf_pq { ->index \ { \ return extend(handle, orig_index, new_vectors, new_indices, n_rows); \ + } \ + \ + void build(const handle_t& handle, \ + const index_params& params, \ + const T* dataset, \ + IdxT n_rows, \ + uint32_t dim, \ + index* idx) \ + { \ + *idx = build(handle, params, dataset, n_rows, dim); \ + } \ + void extend(const handle_t& handle, \ + index* idx, \ + const T* new_vectors, \ + const IdxT* new_indices, \ + IdxT n_rows) \ + { \ + extend(handle, *idx, new_vectors, new_indices, n_rows); \ } RAFT_INST_BUILD_EXTEND(float, uint64_t); diff --git a/python/pylibraft/pylibraft/neighbors/c_ivf_pq.pxd b/python/pylibraft/pylibraft/neighbors/c_ivf_pq.pxd index dfc416b054..894100de45 100644 --- a/python/pylibraft/pylibraft/neighbors/c_ivf_pq.pxd +++ b/python/pylibraft/pylibraft/neighbors/c_ivf_pq.pxd @@ -58,7 +58,6 @@ cdef extern from "raft/neighbors/ivf_pq_types.hpp" \ PER_SUBSPACE "raft::neighbors::ivf_pq::codebook_gen::PER_SUBSPACE", PER_CLUSTER "raft::neighbors::ivf_pq::codebook_gen::PER_CLUSTER" - cpdef cppclass index_params(ann_index_params): uint32_t n_lists uint32_t kmeans_n_iters @@ -87,52 +86,53 @@ cdef extern from "raft/neighbors/ivf_pq_types.hpp" \ uint32_t n_lists() uint32_t rot_dim() - - - cpdef cppclass search_params(ann_search_params): uint32_t n_probes cudaDataType_t lut_dtype cudaDataType_t internal_distance_dtype + cdef extern from "raft/neighbors/specializations/ivf_pq_specialization.hpp" \ namespace "raft::neighbors::ivf_pq": - cdef index[uint64_t] build(const handle_t& handle, + cdef void build(const handle_t& handle, const index_params& params, const float* dataset, uint64_t n_rows, - uint32_t dim) #except + + uint32_t dim, + index[uint64_t]* index) except + - cdef index[uint64_t] build(const handle_t& handle, + cdef void build(const handle_t& handle, const index_params& params, const int8_t* dataset, uint64_t n_rows, - uint32_t dim) #except + + uint32_t dim, + index[uint64_t]* index) except + - cdef index[uint64_t] build(const handle_t& handle, + cdef void build(const handle_t& handle, const index_params& params, const uint8_t* dataset, uint64_t n_rows, - uint32_t dim) #except + + uint32_t dim, + index[uint64_t]* index) except + - cdef index[uint64_t] extend(const handle_t& handle, - const index[uint64_t]& orig_index, + cdef void extend(const handle_t& handle, + index[uint64_t]* index, const float* new_vectors, const uint64_t* new_indices, - uint64_t n_rows) # except + + uint64_t n_rows) except + - cdef index[uint64_t] extend(const handle_t& handle, - const index[uint64_t]& orig_index, + cdef void extend(const handle_t& handle, + index[uint64_t]* index, const int8_t* new_vectors, const uint64_t* new_indices, - uint64_t n_rows) #except + + uint64_t n_rows) except + - cdef index[uint64_t] extend(const handle_t& handle, - const index[uint64_t]& orig_index, + cdef void extend(const handle_t& handle, + index[uint64_t]* index, const uint8_t* new_vectors, const uint64_t* new_indices, - uint64_t n_rows) #except + + uint64_t n_rows) except + cdef void search(const handle_t& handle, const search_params& params, diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq.pyx index 6b2e943929..3e012395a8 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq.pyx +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq.pyx @@ -261,6 +261,10 @@ def build(IndexParams index_params, dataset, handle=None): dataset : CUDA array interface compliant matrix shape (n_samples, dim) Supported dtype [float, int8, uint8] + Returns + ------- + inde x: ivf_pq.Index + Examples -------- @@ -308,25 +312,28 @@ def build(IndexParams index_params, dataset, handle=None): idx = Index() if dataset_dt == np.float32: - idx.index[0] = c_ivf_pq.build(deref(handle_), - index_params.params, - dataset_ptr, - n_rows, - dim) + c_ivf_pq.build(deref(handle_), + index_params.params, + dataset_ptr, + n_rows, + dim, + idx.index) idx.trained = True elif dataset_dt == np.byte: - idx.index[0] = c_ivf_pq.build(deref(handle_), - index_params.params, - dataset_ptr, - n_rows, - dim) + c_ivf_pq.build(deref(handle_), + index_params.params, + dataset_ptr, + n_rows, + dim, + idx.index) idx.trained = True elif dataset_dt == np.ubyte: - idx.index[0] = c_ivf_pq.build(deref(handle_), - index_params.params, - dataset_ptr, - n_rows, - dim) + c_ivf_pq.build(deref(handle_), + index_params.params, + dataset_ptr, + n_rows, + dim, + idx.index) idx.trained = True else: raise TypeError("dtype %s not supported" % dataset_dt) @@ -376,29 +383,28 @@ def extend(Index index, new_vectors, new_indices, handle=None): cdef uintptr_t idx_ptr = idx_cai["data"][0] if vecs_dt == np.float32: - index.index[0] = c_ivf_pq.extend(deref(handle_), - deref(index.index), - vecs_ptr, - idx_ptr, - n_rows) + c_ivf_pq.extend(deref(handle_), + index.index, + vecs_ptr, + idx_ptr, + n_rows) elif vecs_dt == np.int8: - index.index[0] = c_ivf_pq.extend(deref(handle_), - deref(index.index), - vecs_ptr, - idx_ptr, - n_rows) + c_ivf_pq.extend(deref(handle_), + index.index, + vecs_ptr, + idx_ptr, + n_rows) elif vecs_dt == np.uint8: - index.index[0] = c_ivf_pq.extend(deref(handle_), - deref(index.index), - vecs_ptr, - idx_ptr, - n_rows) + c_ivf_pq.extend(deref(handle_), + index.index, + vecs_ptr, + idx_ptr, + n_rows) else: raise TypeError("query dtype %s not supported" % vecs_dt) - handle.sync() - - return index + handle.sync() + return index cdef class SearchParams: @@ -449,8 +455,7 @@ def search(SearchParams search_params, k, neighbors, distances, - handle=None - ): + handle=None): """ Find the k nearest neighbors for each query. @@ -469,6 +474,7 @@ def search(SearchParams search_params, distances : CUDA array interface compliant matrix shape (n_queries, k) If this parameter is specified, then the distances to the neighbors will be returned here. Otherwise a new array is created. + mr_ptr : pointer to a raft device_memory_resource Returns ------- @@ -501,6 +507,7 @@ def search(SearchParams search_params, cdef uintptr_t queries_ptr = queries_cai["data"][0] cdef uintptr_t neighbors_ptr = neighbors_cai["data"][0] cdef uintptr_t distances_ptr = distances_cai["data"][0] + # TODO(tfeher) pass mr_ptr arg cdef device_memory_resource* mr_ptr = nullptr if queries_dt == np.float32: @@ -538,5 +545,3 @@ def search(SearchParams search_params, handle.sync() - return (neighbors, distances) - diff --git a/python/pylibraft/pylibraft/test/test_ivf_pq.py b/python/pylibraft/pylibraft/test/test_ivf_pq.py index dc83a9ad83..ee8cbb3a3e 100644 --- a/python/pylibraft/pylibraft/test/test_ivf_pq.py +++ b/python/pylibraft/pylibraft/test/test_ivf_pq.py @@ -128,6 +128,8 @@ def run_ivf_pq_build_search_test( index = ivf_pq.extend(index, dataset_1_device, indices_1_device) index = ivf_pq.extend(index, dataset_2_device, indices_2_device) + assert index.size == n_rows + queries = generate_data((n_queries, n_cols), dtype) out_idx = np.zeros((n_queries, k), dtype=np.uint64) out_dist = np.zeros((n_queries, k), dtype=np.float32) @@ -188,26 +190,30 @@ def test_ivf_pq_dtypes(n_rows, n_cols, n_queries, n_lists, dtype): ) -# @pytest.mark.parametrize( -# "params", -# [ -# {"n_rows": 1, "n_cols": 10, "n_queries": 10, "k": 1, "n_lists": 10}, -# {"n_rows": 10, "n_cols": 1, "n_queries": 10, "k": 10, "n_lists": 10}, -# {"n_rows": 999, "n_cols": 42, "n_queries": 4953, "k": 137, "n_lists": 53}, -# ], -# ) -# def test_ivf_pq_n(params): -# # We do not test recall, just confirm that we can handle edge cases for certain parameters -# run_ivf_pq_build_search_test( -# n_rows=params["n_rows"], -# n_cols=params["n_cols"], -# n_queries=params["n_queries"], -# k=params["k"], -# n_lists=params["n_lists"], -# metric="l2_expanded", -# dtype=np.float32, -# compare=False, -# ) +@pytest.mark.parametrize( + "params", + [ + pytest.param( + {"n_rows": 0, "n_cols": 10, "n_queries": 10, "k": 1, "n_lists": 10}, + marks=pytest.mark.xfail(reason="empty dataset"), + ), + {"n_rows": 1, "n_cols": 10, "n_queries": 10, "k": 1, "n_lists": 10}, + {"n_rows": 10, "n_cols": 1, "n_queries": 10, "k": 10, "n_lists": 10}, + # {"n_rows": 999, "n_cols": 42, "n_queries": 453, "k": 137, "n_lists": 53}, + ], +) +def test_ivf_pq_n(params): + # We do not test recall, just confirm that we can handle edge cases for certain parameters + run_ivf_pq_build_search_test( + n_rows=params["n_rows"], + n_cols=params["n_cols"], + n_queries=params["n_queries"], + k=params["k"], + n_lists=params["n_lists"], + metric="l2_expanded", + dtype=np.float32, + compare=False, + ) @pytest.mark.parametrize("metric", ["l2_expanded", "inner_product"])