Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moving TestDeviceBuffer to pylibraft.common.device_ndarray #1008

Merged
merged 14 commits into from
Nov 14, 2022
Merged
2 changes: 2 additions & 0 deletions python/pylibraft/pylibraft/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,7 @@
# limitations under the License.
#


from .cuda import Stream
from .device_ndarray import device_ndarray
from .handle import Handle
147 changes: 147 additions & 0 deletions python/pylibraft/pylibraft/common/device_ndarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import numpy as np

import rmm


class device_ndarray:
"""
pylibraft.device_ndarray is meant to be a very lightweight
__cuda_array_interface__ wrapper around a numpy.ndarray.
"""

def __init__(self, np_ndarray):
"""
Construct a pylibraft.device_ndarray wrapper around a numpy.ndarray

Parameters
----------
ndarray : A numpy.ndarray which will be copied and moved to the device

Examples
--------
The device_ndarray is __cuda_array_interface__ compliant so it is
interoperable with other libraries that also support it, such as
CuPy and PyTorch. The following usage example demonstrates
converting a pylibraft.device_ndarray to a cupy.ndarray:

.. code-block:: python

import cupy as cp
from pylibraft import device_ndarray

raft_array = device_ndarray.empty((100, 50))
cupy_array = cp.asarray(raft_array)
"""
self.ndarray_ = np_ndarray
order = "C" if self.c_contiguous else "F"
self.device_buffer_ = rmm.DeviceBuffer.to_device(
self.ndarray_.tobytes(order=order)
)

@staticmethod
def empty(shape, dtype=np.float32, order="C"):
"""
Return a new device_ndarray of given shape and type, without
initializing entries.

Parameters
----------
shape : int or tuple of int
Shape of the empty array, e.g., (2, 3) or 2.
dtype : data-type, optional
Desired output data-type for the array, e.g, numpy.int8.
Default is numpy.float32.
order : {'C', 'F'}, optional (default: 'C')
Whether to store multi-dimensional dat ain row-major (C-style)
or column-major (Fortran-style) order in memory
"""
arr = np.empty(shape, dtype=dtype, order=order)
return device_ndarray(arr)
cjnolet marked this conversation as resolved.
Show resolved Hide resolved

@property
def c_contiguous(self):
"""
Is the current device_ndarray laid out in row-major format?
"""
array_interface = self.ndarray_.__array_interface__
strides = self.strides
return (
strides is None
or array_interface["strides"][1] == self.dtype.itemsize
)

@property
def f_contiguous(self):
"""
Is the current device_ndarray laid out in column-major format?
"""
return not self.c_contiguous

@property
def dtype(self):
"""
Datatype of the current device_ndarray instance
"""
array_interface = self.ndarray_.__array_interface__
return np.dtype(array_interface["typestr"])

@property
def shape(self):
"""
Shape of the current device_ndarray instance
"""
array_interface = self.ndarray_.__array_interface__
return array_interface["shape"]

@property
def strides(self):
"""
Strides of the current device_ndarray instance
"""
array_interface = self.ndarray_.__array_interface__
return (
None
if "strides" not in array_interface
else array_interface["strides"]
)

@property
def __cuda_array_interface__(self):
"""
Returns the __cuda_array_interface__ compliant dict for
integrating with other device-enabled libraries using
zero-copy semantics.
"""
device_cai = self.device_buffer_.__cuda_array_interface__
host_cai = self.ndarray_.__array_interface__.copy()
host_cai["data"] = (device_cai["data"][0], device_cai["data"][1])

return host_cai

def copy_to_host(self):
"""
Returns a new numpy.ndarray object on host with the current contents of
this device_ndarray
"""
ret = np.frombuffer(
self.device_buffer_.tobytes(),
dtype=self.dtype,
like=self.ndarray_,
).astype(self.dtype)
ret = np.lib.stride_tricks.as_strided(ret, self.shape, self.strides)
return ret
27 changes: 22 additions & 5 deletions python/pylibraft/pylibraft/distance/pairwise_distance.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,11 @@ from .distance_type cimport DistanceType

from pylibraft.common import Handle
from pylibraft.common.handle import auto_sync_handle

from pylibraft.common.handle cimport handle_t

from pylibraft.common import device_ndarray


def is_c_cont(cai, dt):
return "strides" not in cai or \
Expand Down Expand Up @@ -92,7 +95,7 @@ SUPPORTED_DISTANCES = ["euclidean", "l1", "cityblock", "l2", "inner_product",


@auto_sync_handle
def distance(X, Y, dists, metric="euclidean", p=2.0, handle=None):
def distance(X, Y, out=None, metric="euclidean", p=2.0, handle=None):
"""
Compute pairwise distances between X and Y

Expand All @@ -107,11 +110,16 @@ def distance(X, Y, dists, metric="euclidean", p=2.0, handle=None):

X : CUDA array interface compliant matrix shape (m, k)
Y : CUDA array interface compliant matrix shape (n, k)
dists : Writable CUDA array interface matrix shape (m, n)
out : Optional writable CUDA array interface matrix shape (m, n)
metric : string denoting the metric type (default="euclidean")
p : metric parameter (currently used only for "minkowski")
{handle_docstring}

Returns
-------

raft.device_ndarray containing pairwise distances

Examples
--------

Expand Down Expand Up @@ -144,14 +152,23 @@ def distance(X, Y, dists, metric="euclidean", p=2.0, handle=None):

x_cai = X.__cuda_array_interface__
y_cai = Y.__cuda_array_interface__
dists_cai = dists.__cuda_array_interface__

m = x_cai["shape"][0]
n = y_cai["shape"][0]

x_dt = np.dtype(x_cai["typestr"])
y_dt = np.dtype(y_cai["typestr"])

if out is None:
dists = device_ndarray.empty((m, n), dtype=y_dt)
else:
dists = out

x_k = x_cai["shape"][1]
y_k = y_cai["shape"][1]

dists_cai = dists.__cuda_array_interface__

if x_k != y_k:
raise ValueError("Inputs must have same number of columns. "
"a=%s, b=%s" % (x_k, y_k))
Expand All @@ -163,8 +180,6 @@ def distance(X, Y, dists, metric="euclidean", p=2.0, handle=None):
handle = handle if handle is not None else Handle()
cdef handle_t *h = <handle_t*><size_t>handle.getHandle()

x_dt = np.dtype(x_cai["typestr"])
y_dt = np.dtype(y_cai["typestr"])
d_dt = np.dtype(dists_cai["typestr"])

x_c_contiguous = is_c_cont(x_cai, x_dt)
Expand Down Expand Up @@ -205,3 +220,5 @@ def distance(X, Y, dists, metric="euclidean", p=2.0, handle=None):
<float>p)
else:
raise ValueError("dtype %s not supported" % x_dt)

return dists
63 changes: 63 additions & 0 deletions python/pylibraft/pylibraft/test/test_device_ndarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import numpy as np
import pytest

from pylibraft.common import device_ndarray


@pytest.mark.parametrize("order", ["F", "C"])
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
def test_basic_attributes(order, dtype):

a = np.random.random((500, 2)).astype(dtype)

if order == "C":
a = np.ascontiguousarray(a)
else:
a = np.asfortranarray(a)

db = device_ndarray(a)
db_host = db.copy_to_host()

assert a.shape == db.shape
assert a.dtype == db.dtype
assert a.data.f_contiguous == db.f_contiguous
assert a.data.f_contiguous == db_host.data.f_contiguous
assert a.data.c_contiguous == db.c_contiguous
assert a.data.c_contiguous == db_host.data.c_contiguous
np.testing.assert_array_equal(a.tolist(), db_host.tolist())


@pytest.mark.parametrize("order", ["F", "C"])
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
def test_empty(order, dtype):

a = np.random.random((500, 2)).astype(dtype)
if order == "C":
a = np.ascontiguousarray(a)
else:
a = np.asfortranarray(a)

db = device_ndarray.empty(a.shape, dtype=dtype, order=order)
db_host = db.copy_to_host()

assert a.shape == db.shape
assert a.dtype == db.dtype
assert a.data.f_contiguous == db.f_contiguous
assert a.data.f_contiguous == db_host.data.f_contiguous
assert a.data.c_contiguous == db.c_contiguous
assert a.data.c_contiguous == db_host.data.c_contiguous
16 changes: 10 additions & 6 deletions python/pylibraft/pylibraft/test/test_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
import pytest
from scipy.spatial.distance import cdist

from pylibraft.common import Handle
from pylibraft.common import Handle, device_ndarray
from pylibraft.distance import pairwise_distance
from pylibraft.testing.utils import TestDeviceBuffer


@pytest.mark.parametrize("n_rows", [100])
Expand All @@ -39,9 +38,10 @@
"sqeuclidean",
],
)
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize("order", ["F", "C"])
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
def test_distance(n_rows, n_cols, metric, order, dtype):
def test_distance(n_rows, n_cols, inplace, metric, order, dtype):
input1 = np.random.random_sample((n_rows, n_cols))
input1 = np.asarray(input1, order=order).astype(dtype)

Expand All @@ -61,13 +61,17 @@ def test_distance(n_rows, n_cols, metric, order, dtype):

expected[expected <= 1e-5] = 0.0

input1_device = TestDeviceBuffer(input1, order)
output_device = TestDeviceBuffer(output, order)
input1_device = device_ndarray(input1)
output_device = device_ndarray(output) if inplace else None

handle = Handle()
pairwise_distance(input1_device, input1_device, output_device, metric)
ret_output = pairwise_distance(
input1_device, input1_device, output_device, metric
)
handle.sync()

output_device = ret_output if not inplace else output_device

actual = output_device.copy_to_host()

actual[actual <= 1e-5] = 0.0
Expand Down
9 changes: 4 additions & 5 deletions python/pylibraft/pylibraft/test/test_fused_l2_argmin.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
import pytest
from scipy.spatial.distance import cdist

from pylibraft.common import Handle
from pylibraft.common import Handle, device_ndarray
from pylibraft.distance import fused_l2_nn_argmin
from pylibraft.testing.utils import TestDeviceBuffer


@pytest.mark.parametrize("n_rows", [10, 100])
Expand All @@ -38,9 +37,9 @@ def test_fused_l2_nn_minarg(n_rows, n_cols, n_clusters, dtype):

expected = expected.argmin(axis=1)

input1_device = TestDeviceBuffer(input1, "C")
input2_device = TestDeviceBuffer(input2, "C")
output_device = TestDeviceBuffer(output, "C")
input1_device = device_ndarray(input1)
input2_device = device_ndarray(input2)
output_device = device_ndarray(output)

handle = Handle()
fused_l2_nn_argmin(
Expand Down
Loading