Skip to content

Commit

Permalink
Forward exceptions from build and extend to Python
Browse files Browse the repository at this point in the history
  • Loading branch information
tfeher committed Nov 9, 2022
1 parent d1c0f1b commit 2d6da94
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 77 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ RAFT_INST_SEARCH(uint8_t, uint64_t);

#undef RAFT_INST_SEARCH

// We define overloads for build and extend with void return type. This is used in the Cython
// wrappers, where exception handling is not compatible with return type that has nontrivial
// constructor.
#define RAFT_INST_BUILD_EXTEND(T, IdxT) \
auto build(const handle_t& handle, \
const index_params& params, \
Expand All @@ -50,7 +53,20 @@ RAFT_INST_SEARCH(uint8_t, uint64_t);
const T* new_vectors, \
const IdxT* new_indices, \
IdxT n_rows) \
->index<IdxT>;
->index<IdxT>; \
\
void build(const handle_t& handle, \
const index_params& params, \
const T* dataset, \
IdxT n_rows, \
uint32_t dim, \
index<IdxT>* idx); \
\
void extend(const handle_t& handle, \
index<IdxT>* idx, \
const T* new_vectors, \
const IdxT* new_indices, \
IdxT n_rows);

RAFT_INST_BUILD_EXTEND(float, uint64_t)
RAFT_INST_BUILD_EXTEND(int8_t, uint64_t)
Expand Down
18 changes: 18 additions & 0 deletions cpp/src/nn/specializations/detail/ivfpq_build.cu
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,24 @@ namespace raft::neighbors::ivf_pq {
->index<IdxT> \
{ \
return extend<T, IdxT>(handle, orig_index, new_vectors, new_indices, n_rows); \
} \
\
void build(const handle_t& handle, \
const index_params& params, \
const T* dataset, \
IdxT n_rows, \
uint32_t dim, \
index<IdxT>* idx) \
{ \
*idx = build<T, IdxT>(handle, params, dataset, n_rows, dim); \
} \
void extend(const handle_t& handle, \
index<IdxT>* idx, \
const T* new_vectors, \
const IdxT* new_indices, \
IdxT n_rows) \
{ \
extend<T, IdxT>(handle, *idx, new_vectors, new_indices, n_rows); \
}

RAFT_INST_BUILD_EXTEND(float, uint64_t);
Expand Down
38 changes: 19 additions & 19 deletions python/pylibraft/pylibraft/neighbors/c_ivf_pq.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ cdef extern from "raft/neighbors/ivf_pq_types.hpp" \
PER_SUBSPACE "raft::neighbors::ivf_pq::codebook_gen::PER_SUBSPACE",
PER_CLUSTER "raft::neighbors::ivf_pq::codebook_gen::PER_CLUSTER"


cpdef cppclass index_params(ann_index_params):
uint32_t n_lists
uint32_t kmeans_n_iters
Expand Down Expand Up @@ -87,52 +86,53 @@ cdef extern from "raft/neighbors/ivf_pq_types.hpp" \
uint32_t n_lists()
uint32_t rot_dim()




cpdef cppclass search_params(ann_search_params):
uint32_t n_probes
cudaDataType_t lut_dtype
cudaDataType_t internal_distance_dtype


cdef extern from "raft/neighbors/specializations/ivf_pq_specialization.hpp" \
namespace "raft::neighbors::ivf_pq":

cdef index[uint64_t] build(const handle_t& handle,
cdef void build(const handle_t& handle,
const index_params& params,
const float* dataset,
uint64_t n_rows,
uint32_t dim) #except +
uint32_t dim,
index[uint64_t]* index) except +

cdef index[uint64_t] build(const handle_t& handle,
cdef void build(const handle_t& handle,
const index_params& params,
const int8_t* dataset,
uint64_t n_rows,
uint32_t dim) #except +
uint32_t dim,
index[uint64_t]* index) except +

cdef index[uint64_t] build(const handle_t& handle,
cdef void build(const handle_t& handle,
const index_params& params,
const uint8_t* dataset,
uint64_t n_rows,
uint32_t dim) #except +
uint32_t dim,
index[uint64_t]* index) except +

cdef index[uint64_t] extend(const handle_t& handle,
const index[uint64_t]& orig_index,
cdef void extend(const handle_t& handle,
index[uint64_t]* index,
const float* new_vectors,
const uint64_t* new_indices,
uint64_t n_rows) # except +
uint64_t n_rows) except +

cdef index[uint64_t] extend(const handle_t& handle,
const index[uint64_t]& orig_index,
cdef void extend(const handle_t& handle,
index[uint64_t]* index,
const int8_t* new_vectors,
const uint64_t* new_indices,
uint64_t n_rows) #except +
uint64_t n_rows) except +

cdef index[uint64_t] extend(const handle_t& handle,
const index[uint64_t]& orig_index,
cdef void extend(const handle_t& handle,
index[uint64_t]* index,
const uint8_t* new_vectors,
const uint64_t* new_indices,
uint64_t n_rows) #except +
uint64_t n_rows) except +

cdef void search(const handle_t& handle,
const search_params& params,
Expand Down
79 changes: 42 additions & 37 deletions python/pylibraft/pylibraft/neighbors/ivf_pq.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,10 @@ def build(IndexParams index_params, dataset, handle=None):
dataset : CUDA array interface compliant matrix shape (n_samples, dim)
Supported dtype [float, int8, uint8]
Returns
-------
inde x: ivf_pq.Index
Examples
--------
Expand Down Expand Up @@ -308,25 +312,28 @@ def build(IndexParams index_params, dataset, handle=None):
idx = Index()

if dataset_dt == np.float32:
idx.index[0] = c_ivf_pq.build(deref(handle_),
index_params.params,
<float*> dataset_ptr,
n_rows,
dim)
c_ivf_pq.build(deref(handle_),
index_params.params,
<float*> dataset_ptr,
n_rows,
dim,
idx.index)
idx.trained = True
elif dataset_dt == np.byte:
idx.index[0] = c_ivf_pq.build(deref(handle_),
index_params.params,
<int8_t*> dataset_ptr,
n_rows,
dim)
c_ivf_pq.build(deref(handle_),
index_params.params,
<int8_t*> dataset_ptr,
n_rows,
dim,
idx.index)
idx.trained = True
elif dataset_dt == np.ubyte:
idx.index[0] = c_ivf_pq.build(deref(handle_),
index_params.params,
<uint8_t*> dataset_ptr,
n_rows,
dim)
c_ivf_pq.build(deref(handle_),
index_params.params,
<uint8_t*> dataset_ptr,
n_rows,
dim,
idx.index)
idx.trained = True
else:
raise TypeError("dtype %s not supported" % dataset_dt)
Expand Down Expand Up @@ -376,29 +383,28 @@ def extend(Index index, new_vectors, new_indices, handle=None):
cdef uintptr_t idx_ptr = idx_cai["data"][0]

if vecs_dt == np.float32:
index.index[0] = c_ivf_pq.extend(deref(handle_),
deref(index.index),
<float*>vecs_ptr,
<uint64_t*> idx_ptr,
<uint64_t> n_rows)
c_ivf_pq.extend(deref(handle_),
index.index,
<float*>vecs_ptr,
<uint64_t*> idx_ptr,
<uint64_t> n_rows)
elif vecs_dt == np.int8:
index.index[0] = c_ivf_pq.extend(deref(handle_),
deref(index.index),
<int8_t*>vecs_ptr,
<uint64_t*> idx_ptr,
<uint64_t> n_rows)
c_ivf_pq.extend(deref(handle_),
index.index,
<int8_t*>vecs_ptr,
<uint64_t*> idx_ptr,
<uint64_t> n_rows)
elif vecs_dt == np.uint8:
index.index[0] = c_ivf_pq.extend(deref(handle_),
deref(index.index),
<uint8_t*>vecs_ptr,
<uint64_t*> idx_ptr,
<uint64_t> n_rows)
c_ivf_pq.extend(deref(handle_),
index.index,
<uint8_t*>vecs_ptr,
<uint64_t*> idx_ptr,
<uint64_t> n_rows)
else:
raise TypeError("query dtype %s not supported" % vecs_dt)

handle.sync()

return index
handle.sync()
return index


cdef class SearchParams:
Expand Down Expand Up @@ -449,8 +455,7 @@ def search(SearchParams search_params,
k,
neighbors,
distances,
handle=None
):
handle=None):
"""
Find the k nearest neighbors for each query.
Expand All @@ -469,6 +474,7 @@ def search(SearchParams search_params,
distances : CUDA array interface compliant matrix shape (n_queries, k)
If this parameter is specified, then the distances to the neighbors will be returned here.
Otherwise a new array is created.
mr_ptr : pointer to a raft device_memory_resource
Returns
-------
Expand Down Expand Up @@ -501,6 +507,7 @@ def search(SearchParams search_params,
cdef uintptr_t queries_ptr = queries_cai["data"][0]
cdef uintptr_t neighbors_ptr = neighbors_cai["data"][0]
cdef uintptr_t distances_ptr = distances_cai["data"][0]
# TODO(tfeher) pass mr_ptr arg
cdef device_memory_resource* mr_ptr = <device_memory_resource*> nullptr

if queries_dt == np.float32:
Expand Down Expand Up @@ -538,5 +545,3 @@ def search(SearchParams search_params,

handle.sync()

return (neighbors, distances)

46 changes: 26 additions & 20 deletions python/pylibraft/pylibraft/test/test_ivf_pq.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ def run_ivf_pq_build_search_test(
index = ivf_pq.extend(index, dataset_1_device, indices_1_device)
index = ivf_pq.extend(index, dataset_2_device, indices_2_device)

assert index.size == n_rows

queries = generate_data((n_queries, n_cols), dtype)
out_idx = np.zeros((n_queries, k), dtype=np.uint64)
out_dist = np.zeros((n_queries, k), dtype=np.float32)
Expand Down Expand Up @@ -188,26 +190,30 @@ def test_ivf_pq_dtypes(n_rows, n_cols, n_queries, n_lists, dtype):
)


# @pytest.mark.parametrize(
# "params",
# [
# {"n_rows": 1, "n_cols": 10, "n_queries": 10, "k": 1, "n_lists": 10},
# {"n_rows": 10, "n_cols": 1, "n_queries": 10, "k": 10, "n_lists": 10},
# {"n_rows": 999, "n_cols": 42, "n_queries": 4953, "k": 137, "n_lists": 53},
# ],
# )
# def test_ivf_pq_n(params):
# # We do not test recall, just confirm that we can handle edge cases for certain parameters
# run_ivf_pq_build_search_test(
# n_rows=params["n_rows"],
# n_cols=params["n_cols"],
# n_queries=params["n_queries"],
# k=params["k"],
# n_lists=params["n_lists"],
# metric="l2_expanded",
# dtype=np.float32,
# compare=False,
# )
@pytest.mark.parametrize(
"params",
[
pytest.param(
{"n_rows": 0, "n_cols": 10, "n_queries": 10, "k": 1, "n_lists": 10},
marks=pytest.mark.xfail(reason="empty dataset"),
),
{"n_rows": 1, "n_cols": 10, "n_queries": 10, "k": 1, "n_lists": 10},
{"n_rows": 10, "n_cols": 1, "n_queries": 10, "k": 10, "n_lists": 10},
# {"n_rows": 999, "n_cols": 42, "n_queries": 453, "k": 137, "n_lists": 53},
],
)
def test_ivf_pq_n(params):
# We do not test recall, just confirm that we can handle edge cases for certain parameters
run_ivf_pq_build_search_test(
n_rows=params["n_rows"],
n_cols=params["n_cols"],
n_queries=params["n_queries"],
k=params["k"],
n_lists=params["n_lists"],
metric="l2_expanded",
dtype=np.float32,
compare=False,
)


@pytest.mark.parametrize("metric", ["l2_expanded", "inner_product"])
Expand Down

0 comments on commit 2d6da94

Please sign in to comment.