diff --git a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx index 5fb837c114..880bb46a05 100644 --- a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx +++ b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx @@ -25,6 +25,7 @@ from cython.operator cimport dereference as deref from libcpp cimport bool from .distance_type cimport DistanceType +from pylibraft.common import Handle from pylibraft.common.handle cimport handle_t @@ -58,7 +59,7 @@ cdef extern from "raft_distance/fused_l2_min_arg.hpp" \ bool sqrt) -def fused_l2_nn_argmin(X, Y, output, sqrt=True): +def fused_l2_nn_argmin(X, Y, output, sqrt=True, handle=None): """ Compute the 1-nearest neighbors between X and Y using the L2 distance @@ -68,6 +69,7 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True): X : CUDA array interface compliant matrix shape (m, k) Y : CUDA array interface compliant matrix shape (n, k) output : Writable CUDA array interface matrix shape (m, 1) + handle : Optional RAFT handle for reusing expensive CUDA resources Examples -------- @@ -76,6 +78,7 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True): import cupy as cp + from pylibraft.common import Handle from pylibraft.distance import fused_l2_nn n_samples = 5000 @@ -88,7 +91,15 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True): dtype=cp.float32) output = cp.empty((n_samples, 1), dtype=cp.int32) - fused_l2_nn_argmin(in1, in2, output) + # A single RAFT handle can optionally be reused across + # pylibraft functions. + handle = Handle() + ... + fused_l2_nn_argmin(in1, in2, output, handle=handle) + ... + # pylibraft functions are often asynchronous so the + # handle needs to be explicitly synchronized + handle.sync() """ x_cai = X.__cuda_array_interface__ @@ -110,7 +121,8 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True): d_ptr = output_cai["data"][0] - cdef handle_t *h = new handle_t() + handle = handle if handle is not None else Handle() + cdef handle_t *h = handle.getHandle() x_dt = np.dtype(x_cai["typestr"]) y_dt = np.dtype(y_cai["typestr"]) diff --git a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx index 8d55402e23..0f7626e8d1 100644 --- a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx +++ b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx @@ -25,6 +25,8 @@ from cython.operator cimport dereference as deref from libcpp cimport bool from .distance_type cimport DistanceType + +from pylibraft.common import Handle from pylibraft.common.handle cimport handle_t @@ -88,7 +90,7 @@ SUPPORTED_DISTANCES = ["euclidean", "l1", "cityblock", "l2", "inner_product", "hamming", "jensenshannon", "cosine", "sqeuclidean"] -def distance(X, Y, dists, metric="euclidean", p=2.0): +def distance(X, Y, dists, metric="euclidean", p=2.0, handle=None): """ Compute pairwise distances between X and Y @@ -106,6 +108,7 @@ def distance(X, Y, dists, metric="euclidean", p=2.0): dists : Writable CUDA array interface matrix shape (m, n) metric : string denoting the metric type (default="euclidean") p : metric parameter (currently used only for "minkowski") + handle : Optional RAFT handle for reusing expensive CUDA resources Examples -------- @@ -114,6 +117,7 @@ def distance(X, Y, dists, metric="euclidean", p=2.0): import cupy as cp + from pylibraft.common import Handle from pylibraft.distance import pairwise_distance n_samples = 5000 @@ -125,7 +129,15 @@ def distance(X, Y, dists, metric="euclidean", p=2.0): dtype=cp.float32) output = cp.empty((n_samples, n_samples), dtype=cp.float32) - pairwise_distance(in1, in2, output, metric="euclidean") + # A single RAFT handle can optionally be reused across + # pylibraft functions. + handle = Handle() + ... + pairwise_distance(in1, in2, output, metric="euclidean", handle=handle) + ... + # pylibraft functions are often asynchronous so the + # handle needs to be explicitly synchronized + handle.sync() """ x_cai = X.__cuda_array_interface__ @@ -146,7 +158,8 @@ def distance(X, Y, dists, metric="euclidean", p=2.0): y_ptr = y_cai["data"][0] d_ptr = dists_cai["data"][0] - cdef handle_t *h = new handle_t() + handle = handle if handle is not None else Handle() + cdef handle_t *h = handle.getHandle() x_dt = np.dtype(x_cai["typestr"]) y_dt = np.dtype(y_cai["typestr"]) diff --git a/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx b/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx index ea28357201..cef19295ac 100644 --- a/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx +++ b/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx @@ -22,6 +22,7 @@ import numpy as np from libc.stdint cimport uintptr_t, int64_t from cython.operator cimport dereference as deref +from pylibraft.common import Handle from pylibraft.common.handle cimport handle_t from .rng_state cimport RngState @@ -72,7 +73,7 @@ cdef extern from "raft_distance/random/rmat_rectangular_generator.hpp" \ RngState& r) -def rmat(out, theta, r_scale, c_scale, seed=12345): +def rmat(out, theta, r_scale, c_scale, seed=12345, handle=None): """ Generate RMAT adjacency list based on the input distribution. @@ -87,6 +88,7 @@ def rmat(out, theta, r_scale, c_scale, seed=12345): r_scale: log2 of number of source nodes c_scale: log2 of number of destination nodes seed: random seed used for reproducibility + handle : Optional RAFT handle for reusing expensive CUDA resources Examples -------- @@ -95,6 +97,7 @@ def rmat(out, theta, r_scale, c_scale, seed=12345): import cupy as cp + from pylibraft.common import Handle from pylibraft.random import rmat n_edges = 5000 @@ -105,7 +108,15 @@ def rmat(out, theta, r_scale, c_scale, seed=12345): out = cp.empty((n_edges, 2), dtype=cp.int32) theta = cp.random.random_sample(theta_len, dtype=cp.float32) - rmat(out, theta, r_scale, c_scale) + # A single RAFT handle can optionally be reused across + # pylibraft functions. + handle = Handle() + ... + rmat(out, theta, r_scale, c_scale, handle=handle) + ... + # pylibraft functions are often asynchronous so the + # handle needs to be explicitly synchronized + handle.sync() """ if theta is None: @@ -123,7 +134,9 @@ def rmat(out, theta, r_scale, c_scale, seed=12345): theta_dt = np.dtype(theta_cai["typestr"]) cdef RngState *rng = new RngState(seed) - cdef handle_t *h = new handle_t() + + handle = handle if handle is not None else Handle() + cdef handle_t *h = handle.getHandle() if out_dt == np.int32 and theta_dt == np.float32: rmat_rectangular_gen(deref(h), diff --git a/python/pylibraft/pylibraft/test/test_distance.py b/python/pylibraft/pylibraft/test/test_distance.py index b9b4ba9e30..7f35a25493 100644 --- a/python/pylibraft/pylibraft/test/test_distance.py +++ b/python/pylibraft/pylibraft/test/test_distance.py @@ -17,6 +17,7 @@ import pytest import numpy as np +from pylibraft.common import Handle from pylibraft.distance import pairwise_distance from pylibraft.testing.utils import TestDeviceBuffer @@ -53,7 +54,10 @@ def test_distance(n_rows, n_cols, metric, order, dtype): input1_device = TestDeviceBuffer(input1, order) output_device = TestDeviceBuffer(output, order) + handle = Handle() pairwise_distance(input1_device, input1_device, output_device, metric) + handle.sync() + actual = output_device.copy_to_host() actual[actual <= 1e-5] = 0.0 diff --git a/python/pylibraft/pylibraft/test/test_fused_l2_argmin.py b/python/pylibraft/pylibraft/test/test_fused_l2_argmin.py index b12cc30472..1ce1ee2d1f 100644 --- a/python/pylibraft/pylibraft/test/test_fused_l2_argmin.py +++ b/python/pylibraft/pylibraft/test/test_fused_l2_argmin.py @@ -17,6 +17,7 @@ import pytest import numpy as np +from pylibraft.common import Handle from pylibraft.distance import fused_l2_nn_argmin from pylibraft.testing.utils import TestDeviceBuffer @@ -41,7 +42,10 @@ def test_fused_l2_nn_minarg(n_rows, n_cols, n_clusters, dtype): input2_device = TestDeviceBuffer(input2, "C") output_device = TestDeviceBuffer(output, "C") - fused_l2_nn_argmin(input1_device, input2_device, output_device, True) + handle = Handle() + fused_l2_nn_argmin(input1_device, input2_device, output_device, + True, handle=handle) + handle.sync() actual = output_device.copy_to_host() assert np.allclose(expected, actual, rtol=1e-4) diff --git a/python/pylibraft/pylibraft/test/test_random.py b/python/pylibraft/pylibraft/test/test_random.py index 8a04f707de..e0b7140f1c 100644 --- a/python/pylibraft/pylibraft/test/test_random.py +++ b/python/pylibraft/pylibraft/test/test_random.py @@ -16,6 +16,7 @@ import pytest import numpy as np +from pylibraft.common import Handle from pylibraft.random import rmat from pylibraft.testing.utils import TestDeviceBuffer @@ -46,14 +47,18 @@ def test_rmat(n_edges, r_scale, c_scale, dtype): theta, theta_device = generate_theta(r_scale, c_scale) out_buff = np.empty((n_edges, 2), dtype=dtype) output_device = TestDeviceBuffer(out_buff, "C") - rmat(output_device, theta_device, r_scale, c_scale, 12345) + + handle = Handle() + rmat(output_device, theta_device, r_scale, c_scale, 12345, handle=handle) + handle.sync() output = output_device.copy_to_host() # a more rigorous tests have been done at the c++ level assert np.all(output[:, 0] >= 0) assert np.all(output[:, 0] < 2**r_scale) assert np.all(output[:, 1] >= 0) assert np.all(output[:, 1] < 2**c_scale) - rmat(output_device, theta_device, r_scale, c_scale, 12345) + rmat(output_device, theta_device, r_scale, c_scale, 12345, handle=handle) + handle.sync() output1 = output_device.copy_to_host() assert np.all(np.equal(output, output1)) diff --git a/python/pylibraft/pylibraft/test/test_interruptible.py b/python/pylibraft/pylibraft/test/test_z_interruptible.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_interruptible.py rename to python/pylibraft/pylibraft/test/test_z_interruptible.py