Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Cython/Python copy_to_host and to_device #268

Merged
merged 32 commits into from
Feb 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
de7c128
Adjust `copy_to_host` to take pointer directly
jakirkham Jan 30, 2020
93742ea
Rename `copy_to_host` in Cython
jakirkham Jan 30, 2020
7d7b987
Add Cython wrapper around `copy_to_host`
jakirkham Jan 30, 2020
cd2f0dc
Call Cython `copy_to_host` wrapper in `tobytes`
jakirkham Jan 30, 2020
bfb828a
Workaround assignment issue with empty strings
jakirkham Jan 30, 2020
71f9e3a
Skip copy `rmm::device_buffer` when it is trivial
jakirkham Jan 30, 2020
f51bd64
Add cudaMemcpyKind and cudaMemcpyAsync in Cython
jakirkham Jan 30, 2020
24faec8
Call cudaMemcpyAsync in `copy_to_host`
jakirkham Jan 30, 2020
05dd6bf
Drop unused import
jakirkham Jan 30, 2020
76b6b03
Drop `copy_to_host` from C++
jakirkham Jan 30, 2020
d21f7aa
Note Cython/Python `copy_to_host` function
jakirkham Jan 30, 2020
7e1a15f
Add `copy_to_host` method to `DeviceBuffer`
jakirkham Jan 30, 2020
d5c775f
Call `copy_to_host` method in `tobytes`
jakirkham Jan 30, 2020
80bd86c
Test round-trip `DeviceBuffer` with output buffers
jakirkham Jan 30, 2020
6142676
Make `copy_to_host`'s `stream` argument optional
jakirkham Jan 30, 2020
70bea9f
Make `black` happy
jakirkham Jan 30, 2020
7f996a7
Update python/rmm/_lib/device_buffer.pyx
jakirkham Jan 30, 2020
38c04b1
Rename `frombytes` to `to_device`
jakirkham Jan 30, 2020
2c581f0
Drop explicit synchronization when copying
jakirkham Jan 30, 2020
50d48e5
Refactor out `to_device` function
jakirkham Jan 30, 2020
0888170
Update changelog entry
jakirkham Jan 31, 2020
f50da49
Comment on NumPy's use of huge pages
jakirkham Jan 31, 2020
a0eee55
Synchronize on the default stream
jakirkham Jan 31, 2020
3f9de93
Merge rapidsai/branch-0.13 into jakirkham/add_cy_copy_to_host
jakirkham Jan 31, 2020
71fc46e
Fix error message to say "bytes-like"
jakirkham Jan 31, 2020
480a3fa
Drop `RuntimeWarning` for larger buffers
jakirkham Jan 31, 2020
db4be74
Rename `copy_to_host` func to `copy_ptr_to_host`
jakirkham Jan 31, 2020
f75ebaf
Just pass the subselection to `copy_ptr_to_host`
jakirkham Jan 31, 2020
17c7bb9
Merge rapidsai/branch-0.13 into jakirkham/add_cy_copy_to_host
jakirkham Feb 3, 2020
c9c4cfe
Add docstrings to new functions/methods
jakirkham Feb 3, 2020
02c45da
Match Numba signature with `ary`
jakirkham Feb 3, 2020
3f6d997
Verify `copy_to_host` returns the same object
jakirkham Feb 3, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- PR #261 Add `__bytes__` method to `DeviceBuffer`
- PR #262 Moved device memory resource files to `mr/device` directory
- PR #266 Drop `rmm.auto_device`
- PR #268 Add Cython/Python `copy_to_host` and `to_device`

## Improvements

Expand Down
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT

add_library(rmm SHARED
src/rmm.cpp
src/device_buffer.cpp
src/memory_manager.cpp
thirdparty/cnmem/src/cnmem.cpp
src/mr/device/default_memory_resource.cpp)
Expand Down
13 changes: 0 additions & 13 deletions include/rmm/device_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,17 +392,4 @@ class device_buffer {
mr::get_default_resource()}; ///< The memory resource used to
///< allocate/deallocate device memory
};

/**--------------------------------------------------------------------------*
* @brief Copies rmm::device_buffer to a preallocated host buffer.
*
* Copies device memory asynchronously on the specified stream
*
* @throws std::runtime_error if `hb` is `nullptr` or copy fails
*
* @param db `rmm::device_buffer` to copy to host
* @param hb host allocated buffer to copy data to
* @param stream CUDA stream on which the device to host copy will be performed
*-------------------------------------------------------------------------**/
void copy_to_host(const device_buffer& db, void* hb, cudaStream_t stream = 0);
} // namespace rmm
15 changes: 9 additions & 6 deletions python/rmm/_lib/device_buffer.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from libcpp.memory cimport unique_ptr
from libc.stdint cimport uintptr_t

from rmm._lib.lib cimport cudaStream_t
from rmm._lib.lib cimport cudaStream_t, cudaMemcpyAsync, cudaMemcpyDeviceToHost

cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil:
cdef cppclass device_buffer:
Expand All @@ -37,10 +37,6 @@ cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil:
size_t size()
size_t capacity()

void copy_to_host(const device_buffer& db, void* hb) except +
void copy_to_host(const device_buffer& db,
void* hb,
cudaStream_t stream) except +

cdef class DeviceBuffer:
cdef unique_ptr[device_buffer] c_obj
Expand All @@ -49,8 +45,9 @@ cdef class DeviceBuffer:
cdef DeviceBuffer c_from_unique_ptr(unique_ptr[device_buffer] ptr)

@staticmethod
cdef DeviceBuffer c_frombytes(const unsigned char[::1] b,
cdef DeviceBuffer c_to_device(const unsigned char[::1] b,
uintptr_t stream=*)
cpdef copy_to_host(self, ary=*, uintptr_t stream=*)
cpdef bytes tobytes(self, uintptr_t stream=*)

cdef size_t c_size(self)
Expand All @@ -59,5 +56,11 @@ cdef class DeviceBuffer:
cdef void* c_data(self)


cpdef DeviceBuffer to_device(const unsigned char[::1] b, uintptr_t stream=*)
cpdef void copy_ptr_to_host(uintptr_t db,
unsigned char[::1] hb,
uintptr_t stream=*) nogil except *


cdef extern from "<utility>" namespace "std" nogil:
cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer])
150 changes: 126 additions & 24 deletions python/rmm/_lib/device_buffer.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
# cython: embedsignature = True
# cython: language_level = 3


import numpy as np

from libcpp.memory cimport unique_ptr
from libc.stdint cimport uintptr_t

Expand Down Expand Up @@ -71,7 +74,7 @@ cdef class DeviceBuffer:
return self.tobytes()

def __setstate__(self, state):
cdef DeviceBuffer other = DeviceBuffer.c_frombytes(state)
cdef DeviceBuffer other = DeviceBuffer.c_to_device(state)
self.c_obj = move(other.c_obj)

@property
Expand All @@ -92,39 +95,62 @@ cdef class DeviceBuffer:
return buf

@staticmethod
@cython.boundscheck(False)
cdef DeviceBuffer c_frombytes(const unsigned char[::1] b,
cdef DeviceBuffer c_to_device(const unsigned char[::1] b,
uintptr_t stream=0):
if b is None:
raise TypeError(
"Argument 'b' has incorrect type"
" (expected bytes, got NoneType)"
"""Calls ``to_device`` function on arguments provided"""
return to_device(b, stream)

@staticmethod
def to_device(const unsigned char[::1] b, uintptr_t stream=0):
"""Calls ``to_device`` function on arguments provided"""
return to_device(b, stream)

cpdef copy_to_host(self, ary=None, uintptr_t stream=0):
"""Copy from a ``DeviceBuffer`` to a buffer on host

Parameters
----------
hb : ``bytes``-like buffer to write into
stream : CUDA stream to use for copying, default 0

Examples
--------
>>> import rmm
>>> db = rmm.DeviceBuffer.to_device(b"abc")
>>> hb = bytearray(db.nbytes)
>>> db.copy_to_host(hb)
>>> print(hb)
bytearray(b'abc')
>>> hb = db.copy_to_host()
>>> print(hb)
bytearray(b'abc')
"""
cdef const device_buffer* dbp = self.c_obj.get()
cdef size_t s = dbp.size()

cdef unsigned char[::1] hb = ary
if hb is None:
# NumPy leverages huge pages under-the-hood,
# which speeds up the copy from device to host.
hb = ary = np.empty((s,), dtype="u1")
elif len(hb) < s:
raise ValueError(
"Argument `hb` is too small. Need space for %i bytes." % s
)

cdef uintptr_t p = <uintptr_t>&b[0]
cdef size_t s = len(b)
return DeviceBuffer(ptr=p, size=s, stream=stream)
with nogil:
copy_ptr_to_host(<uintptr_t>dbp.data(), hb[:s], stream)

@staticmethod
def frombytes(const unsigned char[::1] b, uintptr_t stream=0):
return DeviceBuffer.c_frombytes(b, stream)
return ary

cpdef bytes tobytes(self, uintptr_t stream=0):
cdef const device_buffer* dbp = self.c_obj.get()
cdef size_t s = dbp.size()
if s == 0:
return b""

cdef bytes b = PyBytes_FromStringAndSize(NULL, s)
cdef void* p = <void*>PyBytes_AS_STRING(b)
cdef cudaStream_t c_stream
cdef cudaError_t err
with nogil:
c_stream = <cudaStream_t>stream
copy_to_host(dbp[0], p, c_stream)
err = cudaStreamSynchronize(c_stream)
if err != cudaSuccess:
raise RuntimeError(f"Stream sync failed with error: {err}")
cdef unsigned char* p = <unsigned char*>PyBytes_AS_STRING(b)
cdef unsigned char[::1] mv = (<unsigned char[:(s + 1):1]>p)[:s]
self.copy_to_host(mv, stream)

return b

Expand All @@ -139,3 +165,79 @@ cdef class DeviceBuffer:

cdef void* c_data(self):
return self.c_obj.get()[0].data()


@cython.boundscheck(False)
cpdef DeviceBuffer to_device(const unsigned char[::1] b, uintptr_t stream=0):
"""Return a new ``DeviceBuffer`` with a copy of the data

Parameters
----------
b : ``bytes``-like data on host to copy to device
stream : CUDA stream to use for copying, default 0

Returns
-------
``DeviceBuffer`` with copy of data from host

Examples
--------
>>> import rmm
>>> db = rmm._lib.device_buffer.to_device(b"abc")
>>> print(bytes(db))
b'abc'
"""

if b is None:
raise TypeError(
"Argument 'b' has incorrect type"
" (expected bytes-like, got NoneType)"
)

cdef uintptr_t p = <uintptr_t>&b[0]
cdef size_t s = len(b)
return DeviceBuffer(ptr=p, size=s, stream=stream)


@cython.boundscheck(False)
cpdef void copy_ptr_to_host(uintptr_t db,
unsigned char[::1] hb,
uintptr_t stream=0) nogil except *:
"""Copy from a device pointer to a buffer on host

Parameters
----------
db : pointer to data on device to copy
hb : ``bytes``-like buffer to write into
stream : CUDA stream to use for copying, default 0

Examples
--------
>>> import rmm
>>> db = rmm.DeviceBuffer.to_device(b"abc")
>>> hb = bytearray(db.nbytes)
>>> rmm._lib.device_buffer.copy_ptr_to_host(db.ptr, hb)
>>> print(hb)
bytearray(b'abc')
"""

if hb is None:
with gil:
raise TypeError(
"Argument `hb` has incorrect type"
" (expected bytes-like, got NoneType)"
)

cdef cudaError_t err

err = cudaMemcpyAsync(<void*>&hb[0], <const void*>db, len(hb),
cudaMemcpyDeviceToHost, <cudaStream_t>stream)
if err != cudaSuccess:
with gil:
raise RuntimeError(f"Memcpy failed with error: {err}")

if stream == 0:
err = cudaStreamSynchronize(<cudaStream_t>stream)
kkraus14 marked this conversation as resolved.
Show resolved Hide resolved
if err != cudaSuccess:
with gil:
raise RuntimeError(f"Stream sync failed with error: {err}")
11 changes: 11 additions & 0 deletions python/rmm/_lib/lib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,17 @@ cdef extern from * nogil:

ctypedef void* cudaStream_t "cudaStream_t"

ctypedef enum cudaMemcpyKind "cudaMemcpyKind":
cudaMemcpyHostToHost = 0
cudaMemcpyHostToDevice = 1
cudaMemcpyDeviceToHost = 2
cudaMemcpyDeviceToDevice = 3
cudaMemcpyDefault = 4

cudaError_t cudaMemcpyAsync(void* dst, const void* src, size_t count,
cudaMemcpyKind kind)
cudaError_t cudaMemcpyAsync(void* dst, const void* src, size_t count,
cudaMemcpyKind kind, cudaStream_t stream)
cudaError_t cudaStreamSynchronize(cudaStream_t stream)


Expand Down
43 changes: 36 additions & 7 deletions python/rmm/tests/test_rmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def test_rmm_device_buffer(size):
assert len(s) == len(b)

# Test conversion from bytes
b2 = rmm.DeviceBuffer.frombytes(s)
b2 = rmm.DeviceBuffer.to_device(s)
assert isinstance(b2, rmm.DeviceBuffer)
assert len(b2) == len(s)

Expand All @@ -124,6 +124,35 @@ def test_rmm_device_buffer(size):
assert b.capacity() >= b.size


@pytest.mark.parametrize(
"hb",
[
b"abc",
bytearray(b"abc"),
memoryview(b"abc"),
np.asarray(memoryview(b"abc")),
np.arange(3, dtype="u1"),
],
)
def test_rmm_device_buffer_memoryview_roundtrip(hb):
mv = memoryview(hb)
db = rmm.DeviceBuffer.to_device(hb)
hb2 = db.copy_to_host()
assert isinstance(hb2, np.ndarray)
mv2 = memoryview(hb2)
assert mv == mv2
hb3a = bytearray(mv.nbytes)
hb3b = db.copy_to_host(hb3a)
assert hb3a is hb3b
mv3 = memoryview(hb3b)
assert mv == mv3
hb4a = np.empty_like(mv)
hb4b = db.copy_to_host(hb4a)
assert hb4a is hb4b
mv4 = memoryview(hb4b)
assert mv == mv4


@pytest.mark.parametrize(
"hb",
[
Expand All @@ -146,19 +175,19 @@ def test_rmm_device_buffer_bytes_roundtrip(hb):
mv = memoryview(hb)
except TypeError:
with pytest.raises(TypeError):
rmm.DeviceBuffer.frombytes(hb)
rmm.DeviceBuffer.to_device(hb)
else:
if mv.format != "B":
with pytest.raises(ValueError):
rmm.DeviceBuffer.frombytes(hb)
rmm.DeviceBuffer.to_device(hb)
elif len(mv.strides) != 1:
with pytest.raises(ValueError):
rmm.DeviceBuffer.frombytes(hb)
rmm.DeviceBuffer.to_device(hb)
elif mv.strides[0] != 1:
with pytest.raises(ValueError):
rmm.DeviceBuffer.frombytes(hb)
rmm.DeviceBuffer.to_device(hb)
else:
db = rmm.DeviceBuffer.frombytes(hb)
db = rmm.DeviceBuffer.to_device(hb)
hb2 = db.tobytes()
mv2 = memoryview(hb2)
assert mv == mv2
Expand All @@ -169,7 +198,7 @@ def test_rmm_device_buffer_bytes_roundtrip(hb):

@pytest.mark.parametrize("hb", [b"", b"123", b"abc"])
def test_rmm_device_buffer_pickle_roundtrip(hb):
db = rmm.DeviceBuffer.frombytes(hb)
db = rmm.DeviceBuffer.to_device(hb)
pb = pickle.dumps(db)
del db
db2 = pickle.loads(pb)
Expand Down
33 changes: 0 additions & 33 deletions src/device_buffer.cpp

This file was deleted.

Loading