Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Cython/Python copy_to_host and to_device #268

Merged
merged 32 commits into from
Feb 3, 2020
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
de7c128
Adjust `copy_to_host` to take pointer directly
jakirkham Jan 30, 2020
93742ea
Rename `copy_to_host` in Cython
jakirkham Jan 30, 2020
7d7b987
Add Cython wrapper around `copy_to_host`
jakirkham Jan 30, 2020
cd2f0dc
Call Cython `copy_to_host` wrapper in `tobytes`
jakirkham Jan 30, 2020
bfb828a
Workaround assignment issue with empty strings
jakirkham Jan 30, 2020
71f9e3a
Skip copy `rmm::device_buffer` when it is trivial
jakirkham Jan 30, 2020
f51bd64
Add cudaMemcpyKind and cudaMemcpyAsync in Cython
jakirkham Jan 30, 2020
24faec8
Call cudaMemcpyAsync in `copy_to_host`
jakirkham Jan 30, 2020
05dd6bf
Drop unused import
jakirkham Jan 30, 2020
76b6b03
Drop `copy_to_host` from C++
jakirkham Jan 30, 2020
d21f7aa
Note Cython/Python `copy_to_host` function
jakirkham Jan 30, 2020
7e1a15f
Add `copy_to_host` method to `DeviceBuffer`
jakirkham Jan 30, 2020
d5c775f
Call `copy_to_host` method in `tobytes`
jakirkham Jan 30, 2020
80bd86c
Test round-trip `DeviceBuffer` with output buffers
jakirkham Jan 30, 2020
6142676
Make `copy_to_host`'s `stream` argument optional
jakirkham Jan 30, 2020
70bea9f
Make `black` happy
jakirkham Jan 30, 2020
7f996a7
Update python/rmm/_lib/device_buffer.pyx
jakirkham Jan 30, 2020
38c04b1
Rename `frombytes` to `to_device`
jakirkham Jan 30, 2020
2c581f0
Drop explicit synchronization when copying
jakirkham Jan 30, 2020
50d48e5
Refactor out `to_device` function
jakirkham Jan 30, 2020
0888170
Update changelog entry
jakirkham Jan 31, 2020
f50da49
Comment on NumPy's use of huge pages
jakirkham Jan 31, 2020
a0eee55
Synchronize on the default stream
jakirkham Jan 31, 2020
3f9de93
Merge rapidsai/branch-0.13 into jakirkham/add_cy_copy_to_host
jakirkham Jan 31, 2020
71fc46e
Fix error message to say "bytes-like"
jakirkham Jan 31, 2020
480a3fa
Drop `RuntimeWarning` for larger buffers
jakirkham Jan 31, 2020
db4be74
Rename `copy_to_host` func to `copy_ptr_to_host`
jakirkham Jan 31, 2020
f75ebaf
Just pass the subselection to `copy_ptr_to_host`
jakirkham Jan 31, 2020
17c7bb9
Merge rapidsai/branch-0.13 into jakirkham/add_cy_copy_to_host
jakirkham Feb 3, 2020
c9c4cfe
Add docstrings to new functions/methods
jakirkham Feb 3, 2020
02c45da
Match Numba signature with `ary`
jakirkham Feb 3, 2020
3f6d997
Verify `copy_to_host` returns the same object
jakirkham Feb 3, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- PR #252 Add `__sizeof__` method to `DeviceBuffer`
- PR #258 Define pickling behavior for `DeviceBuffer`
- PR #261 Add `__bytes__` method to `DeviceBuffer`
- PR #268 Add Cython/Python `copy_to_host` function

## Improvements

Expand Down
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT

add_library(rmm SHARED
src/rmm.cpp
src/device_buffer.cpp
src/memory_manager.cpp
thirdparty/cnmem/src/cnmem.cpp
src/mr/default_memory_resource.cpp)
Expand Down
13 changes: 0 additions & 13 deletions include/rmm/device_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,17 +392,4 @@ class device_buffer {
mr::get_default_resource()}; ///< The memory resource used to
///< allocate/deallocate device memory
};

/**--------------------------------------------------------------------------*
* @brief Copies rmm::device_buffer to a preallocated host buffer.
*
* Copies device memory asynchronously on the specified stream
*
* @throws std::runtime_error if `hb` is `nullptr` or copy fails
*
* @param db `rmm::device_buffer` to copy to host
* @param hb host allocated buffer to copy data to
* @param stream CUDA stream on which the device to host copy will be performed
*-------------------------------------------------------------------------**/
void copy_to_host(const device_buffer& db, void* hb, cudaStream_t stream = 0);
} // namespace rmm
12 changes: 7 additions & 5 deletions python/rmm/_lib/device_buffer.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from libcpp.memory cimport unique_ptr
from libc.stdint cimport uintptr_t

from rmm._lib.lib cimport cudaStream_t
from rmm._lib.lib cimport cudaStream_t, cudaMemcpyAsync, cudaMemcpyDeviceToHost

cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil:
cdef cppclass device_buffer:
Expand All @@ -37,10 +37,6 @@ cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil:
size_t size()
size_t capacity()

void copy_to_host(const device_buffer& db, void* hb) except +
void copy_to_host(const device_buffer& db,
void* hb,
cudaStream_t stream) except +

cdef class DeviceBuffer:
cdef unique_ptr[device_buffer] c_obj
Expand All @@ -51,6 +47,7 @@ cdef class DeviceBuffer:
@staticmethod
cdef DeviceBuffer c_frombytes(const unsigned char[::1] b,
uintptr_t stream=*)
cpdef copy_to_host(self, unsigned char[::1] hb=*, uintptr_t stream=*)
kkraus14 marked this conversation as resolved.
Show resolved Hide resolved
cpdef bytes tobytes(self, uintptr_t stream=*)

cdef size_t c_size(self)
Expand All @@ -59,5 +56,10 @@ cdef class DeviceBuffer:
cdef void* c_data(self)


cpdef void copy_to_host(uintptr_t db,
unsigned char[::1] hb,
uintptr_t stream=*) nogil except *


cdef extern from "<utility>" namespace "std" nogil:
cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer])
67 changes: 56 additions & 11 deletions python/rmm/_lib/device_buffer.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
# cython: embedsignature = True
# cython: language_level = 3


import warnings

import numpy as np

from libcpp.memory cimport unique_ptr
from libc.stdint cimport uintptr_t

Expand Down Expand Up @@ -109,22 +114,37 @@ cdef class DeviceBuffer:
def frombytes(const unsigned char[::1] b, uintptr_t stream=0):
return DeviceBuffer.c_frombytes(b, stream)

cpdef copy_to_host(self, unsigned char[::1] hb=None, uintptr_t stream=0):
cdef const device_buffer* dbp = self.c_obj.get()
cdef size_t s = dbp.size()

if hb is None:
hb = np.empty((s,), dtype="u1")
kkraus14 marked this conversation as resolved.
Show resolved Hide resolved
kkraus14 marked this conversation as resolved.
Show resolved Hide resolved
elif len(hb) < s:
raise ValueError(
"Argument `hb` is too small. Need space for %i bytes." % s
)
elif len(hb) > s:
hb = hb[:s]
jakirkham marked this conversation as resolved.
Show resolved Hide resolved
warnings.warn(
"Argument `hb` larger than needed."
" Will fill only first %i bytes." % s,
RuntimeWarning
jakirkham marked this conversation as resolved.
Show resolved Hide resolved
)

with nogil:
copy_to_host(<uintptr_t>dbp.data(), hb, stream)
jakirkham marked this conversation as resolved.
Show resolved Hide resolved

return hb
kkraus14 marked this conversation as resolved.
Show resolved Hide resolved

cpdef bytes tobytes(self, uintptr_t stream=0):
cdef const device_buffer* dbp = self.c_obj.get()
cdef size_t s = dbp.size()
if s == 0:
return b""

cdef bytes b = PyBytes_FromStringAndSize(NULL, s)
cdef void* p = <void*>PyBytes_AS_STRING(b)
cdef cudaStream_t c_stream
cdef cudaError_t err
with nogil:
c_stream = <cudaStream_t>stream
copy_to_host(dbp[0], p, c_stream)
err = cudaStreamSynchronize(c_stream)
if err != cudaSuccess:
raise RuntimeError(f"Stream sync failed with error: {err}")
cdef unsigned char* p = <unsigned char*>PyBytes_AS_STRING(b)
cdef unsigned char[::1] mv = (<unsigned char[:(s + 1):1]>p)[:s]
self.copy_to_host(mv, stream)

return b

Expand All @@ -139,3 +159,28 @@ cdef class DeviceBuffer:

cdef void* c_data(self):
return self.c_obj.get()[0].data()


@cython.boundscheck(False)
cpdef void copy_to_host(uintptr_t db,
unsigned char[::1] hb,
uintptr_t stream=0) nogil except *:
if hb is None:
with gil:
raise TypeError(
"Argument `hb` has incorrect type"
" (expected bytes-like, got NoneType)"
)

cdef cudaError_t err

err = cudaMemcpyAsync(<void*>&hb[0], <const void*>db, len(hb),
cudaMemcpyDeviceToHost, <cudaStream_t>stream)
if err != cudaSuccess:
with gil:
raise RuntimeError(f"Memcpy failed with error: {err}")

err = cudaStreamSynchronize(<cudaStream_t>stream)
jakirkham marked this conversation as resolved.
Show resolved Hide resolved
if err != cudaSuccess:
with gil:
raise RuntimeError(f"Stream sync failed with error: {err}")
11 changes: 11 additions & 0 deletions python/rmm/_lib/lib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,17 @@ cdef extern from * nogil:

ctypedef void* cudaStream_t "cudaStream_t"

ctypedef enum cudaMemcpyKind "cudaMemcpyKind":
cudaMemcpyHostToHost = 0
cudaMemcpyHostToDevice = 1
cudaMemcpyDeviceToHost = 2
cudaMemcpyDeviceToDevice = 3
cudaMemcpyDefault = 4

cudaError_t cudaMemcpyAsync(void* dst, const void* src, size_t count,
cudaMemcpyKind kind)
cudaError_t cudaMemcpyAsync(void* dst, const void* src, size_t count,
cudaMemcpyKind kind, cudaStream_t stream)
cudaError_t cudaStreamSynchronize(cudaStream_t stream)


Expand Down
26 changes: 26 additions & 0 deletions python/rmm/tests/test_rmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,32 @@ def test_rmm_device_buffer(size):
assert b.capacity() >= b.size


@pytest.mark.parametrize(
"hb",
[
b"abc",
bytearray(b"abc"),
memoryview(b"abc"),
np.asarray(memoryview(b"abc")),
np.arange(3, dtype="u1"),
],
)
def test_rmm_device_buffer_memoryview_roundtrip(hb):
mv = memoryview(hb)
db = rmm.DeviceBuffer.frombytes(hb)
hb2 = db.copy_to_host()
mv2 = memoryview(hb2)
assert mv == mv2
hb3 = bytearray(mv.nbytes)
hb3 = db.copy_to_host(hb3)
mv3 = memoryview(hb3)
assert mv == mv3
hb4 = np.empty_like(mv)
hb4 = db.copy_to_host(hb4)
mv4 = memoryview(hb4)
assert mv == mv4


@pytest.mark.parametrize(
"hb",
[
Expand Down
33 changes: 0 additions & 33 deletions src/device_buffer.cpp

This file was deleted.

9 changes: 0 additions & 9 deletions tests/device_buffer_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,6 @@ TYPED_TEST(DeviceBufferTest, CopyFromRawDevicePointer) {
EXPECT_EQ(cudaSuccess, cudaFree(device_memory));
}

TYPED_TEST(DeviceBufferTest, CopyToRawHostPointer) {
rmm::device_buffer buff(this->size);
std::vector<uint8_t> host_data(this->size);
uint8_t* host_data_ptr = host_data.data();
rmm::copy_to_host(buff, host_data_ptr);
EXPECT_EQ(0, buff.stream());
// TODO check for equality between the contents of the two allocations
}

TYPED_TEST(DeviceBufferTest, CopyFromRawHostPointer) {
std::vector<uint8_t> host_data(this->size);
rmm::device_buffer buff(static_cast<void*>(host_data.data()), this->size);
Expand Down