diff --git a/python/pylibraft/pylibraft/common/__init__.py b/python/pylibraft/pylibraft/common/__init__.py index 4f87720030..f8f9b58426 100644 --- a/python/pylibraft/pylibraft/common/__init__.py +++ b/python/pylibraft/pylibraft/common/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ # limitations under the License. # +from .ai_wrapper import ai_wrapper from .cai_wrapper import cai_wrapper from .cuda import Stream from .device_ndarray import device_ndarray diff --git a/python/pylibraft/pylibraft/common/ai_wrapper.py b/python/pylibraft/pylibraft/common/ai_wrapper.py new file mode 100644 index 0000000000..b6b1f02187 --- /dev/null +++ b/python/pylibraft/pylibraft/common/ai_wrapper.py @@ -0,0 +1,89 @@ +# +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import numpy as np + +from pylibraft.common import input_validation + + +class ai_wrapper: + """ + Simple wrapper around a array interface object to reduce + boilerplate for extracting common information from the underlying + dictionary. + """ + + def __init__(self, ai_arr): + """ + Constructor accepts an array interface compliant array + + Parameters + ---------- + ai_arr : array interface array + """ + self.ai_ = ai_arr.__array_interface__ + + @property + def dtype(self): + """ + Returns the dtype of the underlying array interface + """ + return np.dtype(self.ai_["typestr"]) + + @property + def shape(self): + """ + Returns the shape of the underlying array interface + """ + return self.ai_["shape"] + + @property + def c_contiguous(self): + """ + Returns whether the underlying array interface has + c-ordered (row-major) layout + """ + return input_validation.is_c_contiguous(self.ai_) + + @property + def f_contiguous(self): + """ + Returns whether the underlying array interface has + f-ordered (column-major) layout + """ + return not input_validation.is_c_contiguous(self.ai_) + + @property + def data(self): + """ + Returns the data pointer of the underlying array interface + """ + return self.ai_["data"][0] + + def validate_shape_dtype(self, expected_dims=None, expected_dtype=None): + """Checks to see if the shape, dtype, and strides match expectations""" + if expected_dims is not None and len(self.shape) != expected_dims: + raise ValueError( + f"unexpected shape {self.shape} - " + f"expected {expected_dims} dimensions" + ) + + if expected_dtype is not None and self.dtype != expected_dtype: + raise ValueError( + f"invalid dtype {self.dtype}: expected " f"{expected_dtype}" + ) + + if not self.c_contiguous: + raise ValueError("input must be c-contiguous") diff --git a/python/pylibraft/pylibraft/common/cai_wrapper.py b/python/pylibraft/pylibraft/common/cai_wrapper.py index 5851821f57..cf11ea29ce 100644 --- a/python/pylibraft/pylibraft/common/cai_wrapper.py +++ b/python/pylibraft/pylibraft/common/cai_wrapper.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,12 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import numpy as np +from types import SimpleNamespace -from pylibraft.common import input_validation +from pylibraft.common.ai_wrapper import ai_wrapper -class cai_wrapper: +class cai_wrapper(ai_wrapper): """ Simple wrapper around a CUDA array interface object to reduce boilerplate for extracting common information from the underlying @@ -33,57 +33,14 @@ def __init__(self, cai_arr): ---------- cai_arr : CUDA array interface array """ - self.cai_ = cai_arr.__cuda_array_interface__ + helper = SimpleNamespace( + __array_interface__=cai_arr.__cuda_array_interface__ + ) + super().__init__(helper) - @property - def dtype(self): - """ - Returns the dtype of the underlying CUDA array interface - """ - return np.dtype(self.cai_["typestr"]) - - @property - def shape(self): - """ - Returns the shape of the underlying CUDA array interface - """ - return self.cai_["shape"] - - @property - def c_contiguous(self): - """ - Returns whether the underlying CUDA array interface has - c-ordered (row-major) layout - """ - return input_validation.is_c_contiguous(self.cai_) - - @property - def f_contiguous(self): - """ - Returns whether the underlying CUDA array interface has - f-ordered (column-major) layout - """ - return not input_validation.is_c_contiguous(self.cai_) - - @property - def data(self): - """ - Returns the data pointer of the underlying CUDA array interface - """ - return self.cai_["data"][0] - - def validate_shape_dtype(self, expected_dims=None, expected_dtype=None): - """Checks to see if the shape, dtype, and strides match expectations""" - if expected_dims is not None and len(self.shape) != expected_dims: - raise ValueError( - f"unexpected shape {self.shape} - " - f"expected {expected_dims} dimensions" - ) - - if expected_dtype is not None and self.dtype != expected_dtype: - raise ValueError( - f"invalid dtype {self.dtype}: expected " f"{expected_dtype}" - ) - if not self.c_contiguous: - raise ValueError("input must be c-contiguous") +def wrap_array(array): + try: + return cai_wrapper(array) + except AttributeError: + return ai_wrapper(array) diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx index a7137e4d08..002a097d0f 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -36,10 +36,12 @@ from pylibraft.distance.distance_type cimport DistanceType from pylibraft.common import ( Handle, + ai_wrapper, auto_convert_output, cai_wrapper, device_ndarray, ) +from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible from pylibraft.common.handle cimport handle_t @@ -313,10 +315,13 @@ def build(IndexParams index_params, dataset, handle=None): """ Builds an IVF-PQ index that can be later used for nearest neighbor search. + The input array can be either CUDA array interface compliant matrix or + array interface compliant matrix in host memory. + Parameters ---------- index_params : IndexParams object - dataset : CUDA array interface compliant matrix shape (n_samples, dim) + dataset : array interface compliant matrix shape (n_samples, dim) Supported dtype [float, int8, uint8] {handle_docstring} @@ -359,7 +364,7 @@ def build(IndexParams index_params, dataset, handle=None): >>> # handle needs to be explicitly synchronized >>> handle.sync() """ - dataset_cai = cai_wrapper(dataset) + dataset_cai = wrap_array(dataset) dataset_dt = dataset_cai.dtype _check_input_array(dataset_cai, [np.dtype('float32'), np.dtype('byte'), np.dtype('ubyte')]) @@ -413,14 +418,16 @@ def extend(Index index, new_vectors, new_indices, handle=None): """ Extend an existing index with new vectors. + The input array can be either CUDA array interface compliant matrix or + array interface compliant matrix in host memory. Parameters ---------- index : ivf_pq.Index Trained ivf_pq object. - new_vectors : CUDA array interface compliant matrix shape (n_samples, dim) + new_vectors : array interface compliant matrix shape (n_samples, dim) Supported dtype [float, int8, uint8] - new_indices : CUDA array interface compliant matrix shape (n_samples, dim) + new_indices : array interface compliant matrix shape (n_samples, dim) Supported dtype [uint64] {handle_docstring} @@ -473,7 +480,7 @@ def extend(Index index, new_vectors, new_indices, handle=None): handle = Handle() cdef handle_t* handle_ = handle.getHandle() - vecs_cai = cai_wrapper(new_vectors) + vecs_cai = wrap_array(new_vectors) vecs_dt = vecs_cai.dtype cdef uint64_t n_rows = vecs_cai.shape[0] cdef uint32_t dim = vecs_cai.shape[1] @@ -482,7 +489,7 @@ def extend(Index index, new_vectors, new_indices, handle=None): np.dtype('ubyte')], exp_cols=index.dim) - idx_cai = cai_wrapper(new_indices) + idx_cai = wrap_array(new_indices) _check_input_array(idx_cai, [np.dtype('uint64')], exp_rows=n_rows) if len(idx_cai.shape)!=1: raise ValueError("Indices array is expected to be 1D") diff --git a/python/pylibraft/pylibraft/test/test_ivf_pq.py b/python/pylibraft/pylibraft/test/test_ivf_pq.py index 2c6e0dd14c..35738cd471 100644 --- a/python/pylibraft/pylibraft/test/test_ivf_pq.py +++ b/python/pylibraft/pylibraft/test/test_ivf_pq.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -97,6 +97,7 @@ def run_ivf_pq_build_search_test( kmeans_n_iters=20, compare=True, inplace=True, + array_type="device", ): dataset = generate_data((n_rows, n_cols), dtype) if metric == "inner_product": @@ -115,7 +116,10 @@ def run_ivf_pq_build_search_test( add_data_on_build=add_data_on_build, ) - index = ivf_pq.build(build_params, dataset_device) + if array_type == "device": + index = ivf_pq.build(build_params, dataset_device) + else: + index = ivf_pq.build(build_params, dataset) assert index.trained if pq_dim != 0: @@ -125,14 +129,20 @@ def run_ivf_pq_build_search_test( assert index.n_lists == build_params.n_lists if not add_data_on_build: - dataset_1_device = device_ndarray(dataset[: n_rows // 2, :]) - dataset_2_device = device_ndarray(dataset[n_rows // 2 :, :]) + dataset_1 = dataset[: n_rows // 2, :] + dataset_2 = dataset[n_rows // 2 :, :] indices_1 = np.arange(n_rows // 2, dtype=np.uint64) - indices_1_device = device_ndarray(indices_1) indices_2 = np.arange(n_rows // 2, n_rows, dtype=np.uint64) - indices_2_device = device_ndarray(indices_2) - index = ivf_pq.extend(index, dataset_1_device, indices_1_device) - index = ivf_pq.extend(index, dataset_2_device, indices_2_device) + if array_type == "device": + dataset_1_device = device_ndarray(dataset_1) + dataset_2_device = device_ndarray(dataset_2) + indices_1_device = device_ndarray(indices_1) + indices_2_device = device_ndarray(indices_2) + index = ivf_pq.extend(index, dataset_1_device, indices_1_device) + index = ivf_pq.extend(index, dataset_2_device, indices_2_device) + else: + index = ivf_pq.extend(index, dataset_1, indices_1) + index = ivf_pq.extend(index, dataset_2, indices_2) assert index.size >= n_rows @@ -190,7 +200,10 @@ def run_ivf_pq_build_search_test( @pytest.mark.parametrize("n_queries", [100]) @pytest.mark.parametrize("n_lists", [100]) @pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8]) -def test_ivf_pq_dtypes(n_rows, n_cols, n_queries, n_lists, dtype, inplace): +@pytest.mark.parametrize("array_type", ["host", "device"]) +def test_ivf_pq_dtypes( + n_rows, n_cols, n_queries, n_lists, dtype, inplace, array_type +): # Note that inner_product tests use normalized input which we cannot # represent in int8, therefore we test only l2_expanded metric here. run_ivf_pq_build_search_test( @@ -202,6 +215,7 @@ def test_ivf_pq_dtypes(n_rows, n_cols, n_queries, n_lists, dtype, inplace): metric="l2_expanded", dtype=dtype, inplace=inplace, + array_type=array_type, ) @@ -337,7 +351,8 @@ def test_ivf_pq_search_params(params): @pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8]) -def test_extend(dtype): +@pytest.mark.parametrize("array_type", ["host", "device"]) +def test_extend(dtype, array_type): run_ivf_pq_build_search_test( n_rows=10000, n_cols=10, @@ -347,6 +362,7 @@ def test_extend(dtype): metric="l2_expanded", dtype=dtype, add_data_on_build=False, + array_type=array_type, )