From de7c12833f1d127426dbd81023fe808a699f812c Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 29 Jan 2020 19:48:21 -0800 Subject: [PATCH 01/30] Adjust `copy_to_host` to take pointer directly --- include/rmm/device_buffer.hpp | 5 +++-- python/rmm/_lib/device_buffer.pxd | 7 +++---- python/rmm/_lib/device_buffer.pyx | 2 +- src/device_buffer.cpp | 9 ++++++--- tests/device_buffer_tests.cpp | 2 +- 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp index 410edc1e2..fd1e01faa 100644 --- a/include/rmm/device_buffer.hpp +++ b/include/rmm/device_buffer.hpp @@ -400,9 +400,10 @@ class device_buffer { * * @throws std::runtime_error if `hb` is `nullptr` or copy fails * - * @param db `rmm::device_buffer` to copy to host + * @param db device allocated buffer to copy to host * @param hb host allocated buffer to copy data to + * @param s number of bytes to copy * @param stream CUDA stream on which the device to host copy will be performed *-------------------------------------------------------------------------**/ -void copy_to_host(const device_buffer& db, void* hb, cudaStream_t stream = 0); +void copy_to_host(const void* db, void* hb, size_t s, cudaStream_t stream = 0); } // namespace rmm diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 96745e811..400806a5a 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -37,10 +37,9 @@ cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: size_t size() size_t capacity() - void copy_to_host(const device_buffer& db, void* hb) except + - void copy_to_host(const device_buffer& db, - void* hb, - cudaStream_t stream) except + + void copy_to_host(const void* db, void* hb, size_t s) except + + void copy_to_host(const void* db, void* hb, + size_t s, cudaStream_t stream) except + cdef class DeviceBuffer: cdef unique_ptr[device_buffer] c_obj diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 28b817d8b..0bc0c6ffb 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -121,7 +121,7 @@ cdef class DeviceBuffer: cdef cudaError_t err with nogil: c_stream = stream - copy_to_host(dbp[0], p, c_stream) + copy_to_host(dbp.data(), p, s, c_stream) err = cudaStreamSynchronize(c_stream) if err != cudaSuccess: raise RuntimeError(f"Stream sync failed with error: {err}") diff --git a/src/device_buffer.cpp b/src/device_buffer.cpp index b28cee70e..918625c6a 100644 --- a/src/device_buffer.cpp +++ b/src/device_buffer.cpp @@ -17,13 +17,16 @@ #include namespace rmm { -void copy_to_host(const device_buffer& db, void* hb, cudaStream_t stream) { +void copy_to_host(const void* db, void* hb, size_t s, cudaStream_t stream) { + if (db == nullptr) { + throw std::runtime_error{"Cannot copy from `nullptr`."}; + } if (hb == nullptr) { throw std::runtime_error{"Cannot copy to `nullptr`."}; } cudaError_t err = cudaMemcpyAsync(hb, - db.data(), - db.size(), + db, + s, cudaMemcpyDeviceToHost, stream); if (err != cudaSuccess) { diff --git a/tests/device_buffer_tests.cpp b/tests/device_buffer_tests.cpp index 0b32e70c0..6109cd789 100644 --- a/tests/device_buffer_tests.cpp +++ b/tests/device_buffer_tests.cpp @@ -115,7 +115,7 @@ TYPED_TEST(DeviceBufferTest, CopyToRawHostPointer) { rmm::device_buffer buff(this->size); std::vector host_data(this->size); uint8_t* host_data_ptr = host_data.data(); - rmm::copy_to_host(buff, host_data_ptr); + rmm::copy_to_host(buff.data(), host_data_ptr, buff.size()); EXPECT_EQ(0, buff.stream()); // TODO check for equality between the contents of the two allocations } From 93742eac0a545019bbf0107587ecb1b5847e1d34 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 29 Jan 2020 19:59:00 -0800 Subject: [PATCH 02/30] Rename `copy_to_host` in Cython --- python/rmm/_lib/device_buffer.pxd | 10 +++++++--- python/rmm/_lib/device_buffer.pyx | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 400806a5a..8f7746793 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -37,9 +37,13 @@ cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: size_t size() size_t capacity() - void copy_to_host(const void* db, void* hb, size_t s) except + - void copy_to_host(const void* db, void* hb, - size_t s, cudaStream_t stream) except + + void cpp_copy_to_host "rmm::copy_to_host"(const void* db, + void* hb, + size_t s) except + + void cpp_copy_to_host "rmm::copy_to_host"(const void* db, + void* hb, + size_t s, + cudaStream_t stream) except + cdef class DeviceBuffer: cdef unique_ptr[device_buffer] c_obj diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 0bc0c6ffb..c49bb7928 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -121,7 +121,7 @@ cdef class DeviceBuffer: cdef cudaError_t err with nogil: c_stream = stream - copy_to_host(dbp.data(), p, s, c_stream) + cpp_copy_to_host(dbp.data(), p, s, c_stream) err = cudaStreamSynchronize(c_stream) if err != cudaSuccess: raise RuntimeError(f"Stream sync failed with error: {err}") From 7d7b9876e913b4dbf7faae8c00fc0ea9dd2fd2c9 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 29 Jan 2020 20:11:48 -0800 Subject: [PATCH 03/30] Add Cython wrapper around `copy_to_host` --- python/rmm/_lib/device_buffer.pxd | 5 +++++ python/rmm/_lib/device_buffer.pyx | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 8f7746793..a437fce92 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -62,5 +62,10 @@ cdef class DeviceBuffer: cdef void* c_data(self) +cpdef void copy_to_host(uintptr_t db, + unsigned char[::1] hb, + uintptr_t stream) nogil except * + + cdef extern from "" namespace "std" nogil: cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer]) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index c49bb7928..99b7bf637 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -27,6 +27,8 @@ from rmm._lib.lib cimport (cudaError_t, cudaSuccess, cimport cython +import warnings + cdef class DeviceBuffer: @@ -139,3 +141,22 @@ cdef class DeviceBuffer: cdef void* c_data(self): return self.c_obj.get()[0].data() + + +@cython.boundscheck(False) +cpdef void copy_to_host(uintptr_t db, + unsigned char[::1] hb, + uintptr_t stream) nogil except *: + if hb is None: + with gil: + raise TypeError( + "Argument `hb` has incorrect type" + " (expected bytes-like, got NoneType)" + ) + + cpp_copy_to_host(db, &hb[0], len(hb), stream) + + cdef cudaError_t err = cudaStreamSynchronize(stream) + if err != cudaSuccess: + with gil: + raise RuntimeError(f"Stream sync failed with error: {err}") From cd2f0dcbc307433abb008c0eedb4eec69ed1f3b5 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 29 Jan 2020 20:17:30 -0800 Subject: [PATCH 04/30] Call Cython `copy_to_host` wrapper in `tobytes` --- python/rmm/_lib/device_buffer.pyx | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 99b7bf637..6c12467fb 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -118,15 +118,10 @@ cdef class DeviceBuffer: return b"" cdef bytes b = PyBytes_FromStringAndSize(NULL, s) - cdef void* p = PyBytes_AS_STRING(b) - cdef cudaStream_t c_stream - cdef cudaError_t err + cdef unsigned char* p = PyBytes_AS_STRING(b) + cdef unsigned char[::1] mv = p with nogil: - c_stream = stream - cpp_copy_to_host(dbp.data(), p, s, c_stream) - err = cudaStreamSynchronize(c_stream) - if err != cudaSuccess: - raise RuntimeError(f"Stream sync failed with error: {err}") + copy_to_host(dbp.data(), mv, stream) return b From bfb828a87cd531a78295ca6334a46c13557809ce Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 29 Jan 2020 20:19:33 -0800 Subject: [PATCH 05/30] Workaround assignment issue with empty strings Pad the size of the memoryview to workaround Cython issue creating size 0 memoryviews. Then trim the added length afterwards. Should allow us to pass size 0 memoryviews to `copy_to_host`. --- python/rmm/_lib/device_buffer.pyx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 6c12467fb..72d010110 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -114,12 +114,10 @@ cdef class DeviceBuffer: cpdef bytes tobytes(self, uintptr_t stream=0): cdef const device_buffer* dbp = self.c_obj.get() cdef size_t s = dbp.size() - if s == 0: - return b"" cdef bytes b = PyBytes_FromStringAndSize(NULL, s) cdef unsigned char* p = PyBytes_AS_STRING(b) - cdef unsigned char[::1] mv = p + cdef unsigned char[::1] mv = (p)[:s] with nogil: copy_to_host(dbp.data(), mv, stream) From 71f9e3af07c06b0a85bc98dc0966a7ba6ee4efda Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 29 Jan 2020 20:26:39 -0800 Subject: [PATCH 06/30] Skip copy `rmm::device_buffer` when it is trivial This should avoid raising unnecessarily when the host and/or device pointers are `nullptr`. --- src/device_buffer.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/device_buffer.cpp b/src/device_buffer.cpp index 918625c6a..2876dfca5 100644 --- a/src/device_buffer.cpp +++ b/src/device_buffer.cpp @@ -18,6 +18,9 @@ namespace rmm { void copy_to_host(const void* db, void* hb, size_t s, cudaStream_t stream) { + if (s == 0) { + return; + } if (db == nullptr) { throw std::runtime_error{"Cannot copy from `nullptr`."}; } From f51bd644db86f2fe35b484cfd351676bf6d19001 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 29 Jan 2020 20:41:16 -0800 Subject: [PATCH 07/30] Add cudaMemcpyKind and cudaMemcpyAsync in Cython Go ahead and pull these into Cython for simplicity. After all we are basically just calling `cudaMemcpyAsync` at this point. --- python/rmm/_lib/lib.pxd | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/python/rmm/_lib/lib.pxd b/python/rmm/_lib/lib.pxd index 5805bf44b..9f26326c0 100644 --- a/python/rmm/_lib/lib.pxd +++ b/python/rmm/_lib/lib.pxd @@ -33,6 +33,17 @@ cdef extern from * nogil: ctypedef void* cudaStream_t "cudaStream_t" + ctypedef enum cudaMemcpyKind "cudaMemcpyKind": + cudaMemcpyHostToHost = 0 + cudaMemcpyHostToDevice = 1 + cudaMemcpyDeviceToHost = 2 + cudaMemcpyDeviceToDevice = 3 + cudaMemcpyDefault = 4 + + cudaError_t cudaMemcpyAsync(void* dst, const void* src, size_t count, + cudaMemcpyKind kind) + cudaError_t cudaMemcpyAsync(void* dst, const void* src, size_t count, + cudaMemcpyKind kind, cudaStream_t stream) cudaError_t cudaStreamSynchronize(cudaStream_t stream) From 24faec8233ab11dfc41c9bfeca8b4c9d6294a4f5 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 29 Jan 2020 20:52:10 -0800 Subject: [PATCH 08/30] Call cudaMemcpyAsync in `copy_to_host` --- python/rmm/_lib/device_buffer.pxd | 2 +- python/rmm/_lib/device_buffer.pyx | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index a437fce92..1799648fd 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -20,7 +20,7 @@ from libcpp.memory cimport unique_ptr from libc.stdint cimport uintptr_t -from rmm._lib.lib cimport cudaStream_t +from rmm._lib.lib cimport cudaStream_t, cudaMemcpyAsync, cudaMemcpyDeviceToHost cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: cdef cppclass device_buffer: diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 72d010110..6ff132aab 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -147,9 +147,15 @@ cpdef void copy_to_host(uintptr_t db, " (expected bytes-like, got NoneType)" ) - cpp_copy_to_host(db, &hb[0], len(hb), stream) + cdef cudaError_t err - cdef cudaError_t err = cudaStreamSynchronize(stream) + err = cudaMemcpyAsync(&hb[0], db, len(hb), + cudaMemcpyDeviceToHost, stream) + if err != cudaSuccess: + with gil: + raise RuntimeError(f"Memcpy failed with error: {err}") + + err = cudaStreamSynchronize(stream) if err != cudaSuccess: with gil: raise RuntimeError(f"Stream sync failed with error: {err}") From 05dd6bfa07c1496dd044ea6ad882dcdc43cbc4b3 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 29 Jan 2020 21:05:05 -0800 Subject: [PATCH 09/30] Drop unused import --- python/rmm/_lib/device_buffer.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 6ff132aab..a8acfa192 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -27,8 +27,6 @@ from rmm._lib.lib cimport (cudaError_t, cudaSuccess, cimport cython -import warnings - cdef class DeviceBuffer: From 76b6b034823177da7cf0bafc29aa36aa7ef45a22 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 29 Jan 2020 21:13:39 -0800 Subject: [PATCH 10/30] Drop `copy_to_host` from C++ --- CMakeLists.txt | 1 - include/rmm/device_buffer.hpp | 14 ----------- python/rmm/_lib/device_buffer.pxd | 7 ------ src/device_buffer.cpp | 39 ------------------------------- tests/device_buffer_tests.cpp | 9 ------- 5 files changed, 70 deletions(-) delete mode 100644 src/device_buffer.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ff74dc40..4dd269302 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -109,7 +109,6 @@ link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT add_library(rmm SHARED src/rmm.cpp - src/device_buffer.cpp src/memory_manager.cpp thirdparty/cnmem/src/cnmem.cpp src/mr/default_memory_resource.cpp) diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp index fd1e01faa..b2bcc4936 100644 --- a/include/rmm/device_buffer.hpp +++ b/include/rmm/device_buffer.hpp @@ -392,18 +392,4 @@ class device_buffer { mr::get_default_resource()}; ///< The memory resource used to ///< allocate/deallocate device memory }; - -/**--------------------------------------------------------------------------* - * @brief Copies rmm::device_buffer to a preallocated host buffer. - * - * Copies device memory asynchronously on the specified stream - * - * @throws std::runtime_error if `hb` is `nullptr` or copy fails - * - * @param db device allocated buffer to copy to host - * @param hb host allocated buffer to copy data to - * @param s number of bytes to copy - * @param stream CUDA stream on which the device to host copy will be performed - *-------------------------------------------------------------------------**/ -void copy_to_host(const void* db, void* hb, size_t s, cudaStream_t stream = 0); } // namespace rmm diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 1799648fd..575d01624 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -37,13 +37,6 @@ cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: size_t size() size_t capacity() - void cpp_copy_to_host "rmm::copy_to_host"(const void* db, - void* hb, - size_t s) except + - void cpp_copy_to_host "rmm::copy_to_host"(const void* db, - void* hb, - size_t s, - cudaStream_t stream) except + cdef class DeviceBuffer: cdef unique_ptr[device_buffer] c_obj diff --git a/src/device_buffer.cpp b/src/device_buffer.cpp deleted file mode 100644 index 2876dfca5..000000000 --- a/src/device_buffer.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -namespace rmm { -void copy_to_host(const void* db, void* hb, size_t s, cudaStream_t stream) { - if (s == 0) { - return; - } - if (db == nullptr) { - throw std::runtime_error{"Cannot copy from `nullptr`."}; - } - if (hb == nullptr) { - throw std::runtime_error{"Cannot copy to `nullptr`."}; - } - cudaError_t err = cudaMemcpyAsync(hb, - db, - s, - cudaMemcpyDeviceToHost, - stream); - if (err != cudaSuccess) { - throw std::runtime_error{"Failed to copy to host."}; - } -} -} // namespace rmm diff --git a/tests/device_buffer_tests.cpp b/tests/device_buffer_tests.cpp index 6109cd789..45ebd9de8 100644 --- a/tests/device_buffer_tests.cpp +++ b/tests/device_buffer_tests.cpp @@ -111,15 +111,6 @@ TYPED_TEST(DeviceBufferTest, CopyFromRawDevicePointer) { EXPECT_EQ(cudaSuccess, cudaFree(device_memory)); } -TYPED_TEST(DeviceBufferTest, CopyToRawHostPointer) { - rmm::device_buffer buff(this->size); - std::vector host_data(this->size); - uint8_t* host_data_ptr = host_data.data(); - rmm::copy_to_host(buff.data(), host_data_ptr, buff.size()); - EXPECT_EQ(0, buff.stream()); - // TODO check for equality between the contents of the two allocations -} - TYPED_TEST(DeviceBufferTest, CopyFromRawHostPointer) { std::vector host_data(this->size); rmm::device_buffer buff(static_cast(host_data.data()), this->size); From d21f7aac3f0fd296bdaf33f32a5900a87a9dc465 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 29 Jan 2020 21:14:07 -0800 Subject: [PATCH 11/30] Note Cython/Python `copy_to_host` function --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c00209c00..212a1d251 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - PR #252 Add `__sizeof__` method to `DeviceBuffer` - PR #258 Define pickling behavior for `DeviceBuffer` - PR #261 Add `__bytes__` method to `DeviceBuffer` +- PR #268 Add Cython/Python `copy_to_host` function ## Improvements From 7e1a15f27c69e454a112b05be7db7025c1dde2df Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 29 Jan 2020 21:38:42 -0800 Subject: [PATCH 12/30] Add `copy_to_host` method to `DeviceBuffer` Creates an analogous method to `copy_to_host` on `DeviceNDArray`s. --- python/rmm/_lib/device_buffer.pxd | 1 + python/rmm/_lib/device_buffer.pyx | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 575d01624..81cb173db 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -47,6 +47,7 @@ cdef class DeviceBuffer: @staticmethod cdef DeviceBuffer c_frombytes(const unsigned char[::1] b, uintptr_t stream=*) + cpdef copy_to_host(self, unsigned char[::1] hb, uintptr_t stream=*) cpdef bytes tobytes(self, uintptr_t stream=*) cdef size_t c_size(self) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index a8acfa192..bb368560b 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -17,6 +17,11 @@ # cython: embedsignature = True # cython: language_level = 3 + +import warnings + +import numpy as np + from libcpp.memory cimport unique_ptr from libc.stdint cimport uintptr_t @@ -109,6 +114,27 @@ cdef class DeviceBuffer: def frombytes(const unsigned char[::1] b, uintptr_t stream=0): return DeviceBuffer.c_frombytes(b, stream) + cpdef copy_to_host(self, unsigned char[::1] hb, uintptr_t stream=0): + cdef const device_buffer* dbp = self.c_obj.get() + cdef size_t s = dbp.size() + + if hb is None: + hb = np.empty((s,), dtype="u1") + elif len(hb) < s: + raise ValueError( + "Argument `hb` is to small. Need space for %i bytes." % s + ) + elif len(hb) > s: + hb = hb[:s] + warnings.warn( + "Argument `hb` larger than needed." + " Will fill only first %i bytes." % s, + RuntimeWarning + ) + + with nogil: + copy_to_host(dbp.data(), hb, stream) + cpdef bytes tobytes(self, uintptr_t stream=0): cdef const device_buffer* dbp = self.c_obj.get() cdef size_t s = dbp.size() From d5c775f9a1b84dec0702060d43b3de6e0e183bbe Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 29 Jan 2020 21:56:08 -0800 Subject: [PATCH 13/30] Call `copy_to_host` method in `tobytes` --- python/rmm/_lib/device_buffer.pxd | 2 +- python/rmm/_lib/device_buffer.pyx | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 81cb173db..22730099f 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -47,7 +47,7 @@ cdef class DeviceBuffer: @staticmethod cdef DeviceBuffer c_frombytes(const unsigned char[::1] b, uintptr_t stream=*) - cpdef copy_to_host(self, unsigned char[::1] hb, uintptr_t stream=*) + cpdef copy_to_host(self, unsigned char[::1] hb=*, uintptr_t stream=*) cpdef bytes tobytes(self, uintptr_t stream=*) cdef size_t c_size(self) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index bb368560b..d6dd9aa59 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -114,7 +114,7 @@ cdef class DeviceBuffer: def frombytes(const unsigned char[::1] b, uintptr_t stream=0): return DeviceBuffer.c_frombytes(b, stream) - cpdef copy_to_host(self, unsigned char[::1] hb, uintptr_t stream=0): + cpdef copy_to_host(self, unsigned char[::1] hb=None, uintptr_t stream=0): cdef const device_buffer* dbp = self.c_obj.get() cdef size_t s = dbp.size() @@ -135,6 +135,8 @@ cdef class DeviceBuffer: with nogil: copy_to_host(dbp.data(), hb, stream) + return hb + cpdef bytes tobytes(self, uintptr_t stream=0): cdef const device_buffer* dbp = self.c_obj.get() cdef size_t s = dbp.size() @@ -142,8 +144,7 @@ cdef class DeviceBuffer: cdef bytes b = PyBytes_FromStringAndSize(NULL, s) cdef unsigned char* p = PyBytes_AS_STRING(b) cdef unsigned char[::1] mv = (p)[:s] - with nogil: - copy_to_host(dbp.data(), mv, stream) + self.copy_to_host(mv, stream) return b From 80bd86c1c41b151c56b214c680ff076d031881e6 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 29 Jan 2020 21:56:08 -0800 Subject: [PATCH 14/30] Test round-trip `DeviceBuffer` with output buffers --- python/rmm/tests/test_rmm.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 2c64aee94..593ec0f80 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -124,6 +124,32 @@ def test_rmm_device_buffer(size): assert b.capacity() >= b.size +@pytest.mark.parametrize( + "hb", + [ + b"abc", + bytearray(b"abc"), + memoryview(b"abc"), + np.asarray(memoryview(b"abc")), + np.arange(3, dtype="u1"), + ], +) +def test_rmm_device_buffer_memoryview_roundtrip(hb): + mv = memoryview(hb) + db = rmm.DeviceBuffer.frombytes(hb) + hb2 = db.copy_to_host() + mv2 = memoryview(hb2) + assert mv == mv2 + hb3 = bytearray(mv.nbytes) + hb3 = db.copy_to_host(hb3) + mv3 = memoryview(hb3) + assert mv == mv3 + hb4 = np.empty_like(mv) + hb4 = db.copy_to_host(hb4) + mv4 = memoryview(hb4) + assert mv == mv4 + + @pytest.mark.parametrize( "hb", [ From 61426760f0d6ba107345c78616b65a8a35f0128d Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 29 Jan 2020 22:16:33 -0800 Subject: [PATCH 15/30] Make `copy_to_host`'s `stream` argument optional --- python/rmm/_lib/device_buffer.pxd | 2 +- python/rmm/_lib/device_buffer.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 22730099f..65ca1e22e 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -58,7 +58,7 @@ cdef class DeviceBuffer: cpdef void copy_to_host(uintptr_t db, unsigned char[::1] hb, - uintptr_t stream) nogil except * + uintptr_t stream=*) nogil except * cdef extern from "" namespace "std" nogil: diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index d6dd9aa59..0e54c8219 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -164,7 +164,7 @@ cdef class DeviceBuffer: @cython.boundscheck(False) cpdef void copy_to_host(uintptr_t db, unsigned char[::1] hb, - uintptr_t stream) nogil except *: + uintptr_t stream=0) nogil except *: if hb is None: with gil: raise TypeError( From 70bea9f8b9645d250b9a391d5f32a54ad8fef4eb Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 29 Jan 2020 22:21:26 -0800 Subject: [PATCH 16/30] Make `black` happy --- python/rmm/tests/test_rmm.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 593ec0f80..adc70cf73 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -135,19 +135,19 @@ def test_rmm_device_buffer(size): ], ) def test_rmm_device_buffer_memoryview_roundtrip(hb): - mv = memoryview(hb) - db = rmm.DeviceBuffer.frombytes(hb) - hb2 = db.copy_to_host() - mv2 = memoryview(hb2) - assert mv == mv2 - hb3 = bytearray(mv.nbytes) - hb3 = db.copy_to_host(hb3) - mv3 = memoryview(hb3) - assert mv == mv3 - hb4 = np.empty_like(mv) - hb4 = db.copy_to_host(hb4) - mv4 = memoryview(hb4) - assert mv == mv4 + mv = memoryview(hb) + db = rmm.DeviceBuffer.frombytes(hb) + hb2 = db.copy_to_host() + mv2 = memoryview(hb2) + assert mv == mv2 + hb3 = bytearray(mv.nbytes) + hb3 = db.copy_to_host(hb3) + mv3 = memoryview(hb3) + assert mv == mv3 + hb4 = np.empty_like(mv) + hb4 = db.copy_to_host(hb4) + mv4 = memoryview(hb4) + assert mv == mv4 @pytest.mark.parametrize( From 7f996a7460dc33de8c1cb8955d4c3ea5644eef9c Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 30 Jan 2020 10:18:10 -0800 Subject: [PATCH 17/30] Update python/rmm/_lib/device_buffer.pyx Co-Authored-By: Peter Andreas Entschev --- python/rmm/_lib/device_buffer.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 0e54c8219..eabe0c77f 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -122,7 +122,7 @@ cdef class DeviceBuffer: hb = np.empty((s,), dtype="u1") elif len(hb) < s: raise ValueError( - "Argument `hb` is to small. Need space for %i bytes." % s + "Argument `hb` is too small. Need space for %i bytes." % s ) elif len(hb) > s: hb = hb[:s] From 38c04b14a14e5a1a1202be825f5062042f9e248c Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 30 Jan 2020 10:35:15 -0800 Subject: [PATCH 18/30] Rename `frombytes` to `to_device` --- python/rmm/_lib/device_buffer.pxd | 2 +- python/rmm/_lib/device_buffer.pyx | 8 ++++---- python/rmm/tests/test_rmm.py | 16 ++++++++-------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 65ca1e22e..7f7afa258 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -45,7 +45,7 @@ cdef class DeviceBuffer: cdef DeviceBuffer c_from_unique_ptr(unique_ptr[device_buffer] ptr) @staticmethod - cdef DeviceBuffer c_frombytes(const unsigned char[::1] b, + cdef DeviceBuffer c_to_device(const unsigned char[::1] b, uintptr_t stream=*) cpdef copy_to_host(self, unsigned char[::1] hb=*, uintptr_t stream=*) cpdef bytes tobytes(self, uintptr_t stream=*) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index eabe0c77f..7dd7ce3f9 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -76,7 +76,7 @@ cdef class DeviceBuffer: return self.tobytes() def __setstate__(self, state): - cdef DeviceBuffer other = DeviceBuffer.c_frombytes(state) + cdef DeviceBuffer other = DeviceBuffer.c_to_device(state) self.c_obj = move(other.c_obj) @property @@ -98,7 +98,7 @@ cdef class DeviceBuffer: @staticmethod @cython.boundscheck(False) - cdef DeviceBuffer c_frombytes(const unsigned char[::1] b, + cdef DeviceBuffer c_to_device(const unsigned char[::1] b, uintptr_t stream=0): if b is None: raise TypeError( @@ -111,8 +111,8 @@ cdef class DeviceBuffer: return DeviceBuffer(ptr=p, size=s, stream=stream) @staticmethod - def frombytes(const unsigned char[::1] b, uintptr_t stream=0): - return DeviceBuffer.c_frombytes(b, stream) + def to_device(const unsigned char[::1] b, uintptr_t stream=0): + return DeviceBuffer.c_to_device(b, stream) cpdef copy_to_host(self, unsigned char[::1] hb=None, uintptr_t stream=0): cdef const device_buffer* dbp = self.c_obj.get() diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index adc70cf73..638e79e34 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -114,7 +114,7 @@ def test_rmm_device_buffer(size): assert len(s) == len(b) # Test conversion from bytes - b2 = rmm.DeviceBuffer.frombytes(s) + b2 = rmm.DeviceBuffer.to_device(s) assert isinstance(b2, rmm.DeviceBuffer) assert len(b2) == len(s) @@ -136,7 +136,7 @@ def test_rmm_device_buffer(size): ) def test_rmm_device_buffer_memoryview_roundtrip(hb): mv = memoryview(hb) - db = rmm.DeviceBuffer.frombytes(hb) + db = rmm.DeviceBuffer.to_device(hb) hb2 = db.copy_to_host() mv2 = memoryview(hb2) assert mv == mv2 @@ -172,19 +172,19 @@ def test_rmm_device_buffer_bytes_roundtrip(hb): mv = memoryview(hb) except TypeError: with pytest.raises(TypeError): - rmm.DeviceBuffer.frombytes(hb) + rmm.DeviceBuffer.to_device(hb) else: if mv.format != "B": with pytest.raises(ValueError): - rmm.DeviceBuffer.frombytes(hb) + rmm.DeviceBuffer.to_device(hb) elif len(mv.strides) != 1: with pytest.raises(ValueError): - rmm.DeviceBuffer.frombytes(hb) + rmm.DeviceBuffer.to_device(hb) elif mv.strides[0] != 1: with pytest.raises(ValueError): - rmm.DeviceBuffer.frombytes(hb) + rmm.DeviceBuffer.to_device(hb) else: - db = rmm.DeviceBuffer.frombytes(hb) + db = rmm.DeviceBuffer.to_device(hb) hb2 = db.tobytes() mv2 = memoryview(hb2) assert mv == mv2 @@ -195,7 +195,7 @@ def test_rmm_device_buffer_bytes_roundtrip(hb): @pytest.mark.parametrize("hb", [b"", b"123", b"abc"]) def test_rmm_device_buffer_pickle_roundtrip(hb): - db = rmm.DeviceBuffer.frombytes(hb) + db = rmm.DeviceBuffer.to_device(hb) pb = pickle.dumps(db) del db db2 = pickle.loads(pb) From 2c581f05aac0c66f63b169892205d41e47b8f535 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 30 Jan 2020 15:53:49 -0800 Subject: [PATCH 19/30] Drop explicit synchronization when copying To better align with how other libraries handle streams, skip the synchronization step. This should happen anyways for the default stream (if the user doesn't specify one). Also should give users who want to manage their concurrency more control. --- python/rmm/_lib/device_buffer.pyx | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 7dd7ce3f9..74a412e58 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -27,8 +27,7 @@ from libc.stdint cimport uintptr_t from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING -from rmm._lib.lib cimport (cudaError_t, cudaSuccess, - cudaStream_t, cudaStreamSynchronize) +from rmm._lib.lib cimport cudaError_t, cudaSuccess, cudaStream_t cimport cython @@ -179,8 +178,3 @@ cpdef void copy_to_host(uintptr_t db, if err != cudaSuccess: with gil: raise RuntimeError(f"Memcpy failed with error: {err}") - - err = cudaStreamSynchronize(stream) - if err != cudaSuccess: - with gil: - raise RuntimeError(f"Stream sync failed with error: {err}") From 50d48e5edeacdcc7d0998c0030b3e7bfd415d466 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 30 Jan 2020 15:59:24 -0800 Subject: [PATCH 20/30] Refactor out `to_device` function Add a `to_device` function that the static method uses. --- python/rmm/_lib/device_buffer.pxd | 1 + python/rmm/_lib/device_buffer.pyx | 26 +++++++++++++++----------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 7f7afa258..f602353a9 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -56,6 +56,7 @@ cdef class DeviceBuffer: cdef void* c_data(self) +cpdef DeviceBuffer to_device(const unsigned char[::1] b, uintptr_t stream=*) cpdef void copy_to_host(uintptr_t db, unsigned char[::1] hb, uintptr_t stream=*) nogil except * diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 74a412e58..57e180f3f 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -96,22 +96,13 @@ cdef class DeviceBuffer: return buf @staticmethod - @cython.boundscheck(False) cdef DeviceBuffer c_to_device(const unsigned char[::1] b, uintptr_t stream=0): - if b is None: - raise TypeError( - "Argument 'b' has incorrect type" - " (expected bytes, got NoneType)" - ) - - cdef uintptr_t p = &b[0] - cdef size_t s = len(b) - return DeviceBuffer(ptr=p, size=s, stream=stream) + return to_device(b, stream) @staticmethod def to_device(const unsigned char[::1] b, uintptr_t stream=0): - return DeviceBuffer.c_to_device(b, stream) + return to_device(b, stream) cpdef copy_to_host(self, unsigned char[::1] hb=None, uintptr_t stream=0): cdef const device_buffer* dbp = self.c_obj.get() @@ -160,6 +151,19 @@ cdef class DeviceBuffer: return self.c_obj.get()[0].data() +@cython.boundscheck(False) +cpdef DeviceBuffer to_device(const unsigned char[::1] b, uintptr_t stream=0): + if b is None: + raise TypeError( + "Argument 'b' has incorrect type" + " (expected bytes, got NoneType)" + ) + + cdef uintptr_t p = &b[0] + cdef size_t s = len(b) + return DeviceBuffer(ptr=p, size=s, stream=stream) + + @cython.boundscheck(False) cpdef void copy_to_host(uintptr_t db, unsigned char[::1] hb, From 08881701566a1d461ec22c9fdf6db7377ee69784 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 30 Jan 2020 16:50:12 -0800 Subject: [PATCH 21/30] Update changelog entry --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 212a1d251..999514f6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ - PR #252 Add `__sizeof__` method to `DeviceBuffer` - PR #258 Define pickling behavior for `DeviceBuffer` - PR #261 Add `__bytes__` method to `DeviceBuffer` -- PR #268 Add Cython/Python `copy_to_host` function +- PR #268 Add Cython/Python `copy_to_host` and `to_device` ## Improvements From f50da49a5ad4e2623e76744d14978dfa178ee3f5 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 30 Jan 2020 18:07:18 -0800 Subject: [PATCH 22/30] Comment on NumPy's use of huge pages --- python/rmm/_lib/device_buffer.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 57e180f3f..2c7d86b6c 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -109,6 +109,8 @@ cdef class DeviceBuffer: cdef size_t s = dbp.size() if hb is None: + # NumPy leverages huge pages under-the-hood, + # which speeds up the copy from device to host. hb = np.empty((s,), dtype="u1") elif len(hb) < s: raise ValueError( From a0eee55482d495aed1982f8f15e7c504601a4906 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 30 Jan 2020 18:09:58 -0800 Subject: [PATCH 23/30] Synchronize on the default stream Make sure to synchronize when the default stream is used. Otherwise leave it up to the user to synchronize. --- python/rmm/_lib/device_buffer.pyx | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 2c7d86b6c..1f1064ac0 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -27,7 +27,8 @@ from libc.stdint cimport uintptr_t from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING -from rmm._lib.lib cimport cudaError_t, cudaSuccess, cudaStream_t +from rmm._lib.lib cimport (cudaError_t, cudaSuccess, + cudaStream_t, cudaStreamSynchronize) cimport cython @@ -184,3 +185,9 @@ cpdef void copy_to_host(uintptr_t db, if err != cudaSuccess: with gil: raise RuntimeError(f"Memcpy failed with error: {err}") + + if stream == 0: + err = cudaStreamSynchronize(stream) + if err != cudaSuccess: + with gil: + raise RuntimeError(f"Stream sync failed with error: {err}") From 71fc46edfd9269029c77cf2ea894c5ad78f0d00f Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 31 Jan 2020 11:55:25 -0800 Subject: [PATCH 24/30] Fix error message to say "bytes-like" --- python/rmm/_lib/device_buffer.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 1f1064ac0..4dfede4c0 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -159,7 +159,7 @@ cpdef DeviceBuffer to_device(const unsigned char[::1] b, uintptr_t stream=0): if b is None: raise TypeError( "Argument 'b' has incorrect type" - " (expected bytes, got NoneType)" + " (expected bytes-like, got NoneType)" ) cdef uintptr_t p = &b[0] From 480a3fa4ea003575b172fbcac6155aff9eb314e9 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 31 Jan 2020 11:56:10 -0800 Subject: [PATCH 25/30] Drop `RuntimeWarning` for larger buffers --- python/rmm/_lib/device_buffer.pyx | 7 ------- 1 file changed, 7 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 4dfede4c0..0f3f19bbe 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -18,8 +18,6 @@ # cython: language_level = 3 -import warnings - import numpy as np from libcpp.memory cimport unique_ptr @@ -119,11 +117,6 @@ cdef class DeviceBuffer: ) elif len(hb) > s: hb = hb[:s] - warnings.warn( - "Argument `hb` larger than needed." - " Will fill only first %i bytes." % s, - RuntimeWarning - ) with nogil: copy_to_host(dbp.data(), hb, stream) From db4be746fee217224caa84e9a18c370a93e673df Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 31 Jan 2020 11:57:35 -0800 Subject: [PATCH 26/30] Rename `copy_to_host` func to `copy_ptr_to_host` --- python/rmm/_lib/device_buffer.pxd | 6 +++--- python/rmm/_lib/device_buffer.pyx | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index f602353a9..b8943b9a6 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -57,9 +57,9 @@ cdef class DeviceBuffer: cpdef DeviceBuffer to_device(const unsigned char[::1] b, uintptr_t stream=*) -cpdef void copy_to_host(uintptr_t db, - unsigned char[::1] hb, - uintptr_t stream=*) nogil except * +cpdef void copy_ptr_to_host(uintptr_t db, + unsigned char[::1] hb, + uintptr_t stream=*) nogil except * cdef extern from "" namespace "std" nogil: diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 0f3f19bbe..02e2fafa0 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -119,7 +119,7 @@ cdef class DeviceBuffer: hb = hb[:s] with nogil: - copy_to_host(dbp.data(), hb, stream) + copy_ptr_to_host(dbp.data(), hb, stream) return hb @@ -161,9 +161,9 @@ cpdef DeviceBuffer to_device(const unsigned char[::1] b, uintptr_t stream=0): @cython.boundscheck(False) -cpdef void copy_to_host(uintptr_t db, - unsigned char[::1] hb, - uintptr_t stream=0) nogil except *: +cpdef void copy_ptr_to_host(uintptr_t db, + unsigned char[::1] hb, + uintptr_t stream=0) nogil except *: if hb is None: with gil: raise TypeError( From f75ebaf6092e311339c9fd434cb26e9a16a6afa7 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 31 Jan 2020 12:35:15 -0800 Subject: [PATCH 27/30] Just pass the subselection to `copy_ptr_to_host` --- python/rmm/_lib/device_buffer.pyx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 02e2fafa0..cf2847959 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -115,11 +115,9 @@ cdef class DeviceBuffer: raise ValueError( "Argument `hb` is too small. Need space for %i bytes." % s ) - elif len(hb) > s: - hb = hb[:s] with nogil: - copy_ptr_to_host(dbp.data(), hb, stream) + copy_ptr_to_host(dbp.data(), hb[:s], stream) return hb From c9c4cfea2624192a0948f5a661c2eaf80512e7ec Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Mon, 3 Feb 2020 14:03:07 -0800 Subject: [PATCH 28/30] Add docstrings to new functions/methods --- python/rmm/_lib/device_buffer.pyx | 58 +++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index cf2847959..439740bfc 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -97,13 +97,34 @@ cdef class DeviceBuffer: @staticmethod cdef DeviceBuffer c_to_device(const unsigned char[::1] b, uintptr_t stream=0): + """Calls ``to_device`` function on arguments provided""" return to_device(b, stream) @staticmethod def to_device(const unsigned char[::1] b, uintptr_t stream=0): + """Calls ``to_device`` function on arguments provided""" return to_device(b, stream) cpdef copy_to_host(self, unsigned char[::1] hb=None, uintptr_t stream=0): + """Copy from a ``DeviceBuffer`` to a buffer on host + + Parameters + ---------- + hb : ``bytes``-like buffer to write into + stream : CUDA stream to use for copying, default 0 + + Examples + -------- + >>> import rmm + >>> db = rmm.DeviceBuffer.to_device(b"abc") + >>> hb = bytearray(db.nbytes) + >>> db.copy_to_host(hb) + >>> print(hb) + bytearray(b'abc') + >>> hb = db.copy_to_host() + >>> print(hb) + bytearray(b'abc') + """ cdef const device_buffer* dbp = self.c_obj.get() cdef size_t s = dbp.size() @@ -147,6 +168,25 @@ cdef class DeviceBuffer: @cython.boundscheck(False) cpdef DeviceBuffer to_device(const unsigned char[::1] b, uintptr_t stream=0): + """Return a new ``DeviceBuffer`` with a copy of the data + + Parameters + ---------- + b : ``bytes``-like data on host to copy to device + stream : CUDA stream to use for copying, default 0 + + Returns + ------- + ``DeviceBuffer`` with copy of data from host + + Examples + -------- + >>> import rmm + >>> db = rmm._lib.device_buffer.to_device(b"abc") + >>> print(bytes(db)) + b'abc' + """ + if b is None: raise TypeError( "Argument 'b' has incorrect type" @@ -162,6 +202,24 @@ cpdef DeviceBuffer to_device(const unsigned char[::1] b, uintptr_t stream=0): cpdef void copy_ptr_to_host(uintptr_t db, unsigned char[::1] hb, uintptr_t stream=0) nogil except *: + """Copy from a device pointer to a buffer on host + + Parameters + ---------- + db : pointer to data on device to copy + hb : ``bytes``-like buffer to write into + stream : CUDA stream to use for copying, default 0 + + Examples + -------- + >>> import rmm + >>> db = rmm.DeviceBuffer.to_device(b"abc") + >>> hb = bytearray(db.nbytes) + >>> rmm._lib.device_buffer.copy_ptr_to_host(db.ptr, hb) + >>> print(hb) + bytearray(b'abc') + """ + if hb is None: with gil: raise TypeError( From 02c45daae881738a715beb2a2234650d3be115bf Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Mon, 3 Feb 2020 14:10:34 -0800 Subject: [PATCH 29/30] Match Numba signature with `ary` Also make sure we return the exact same input provided to us (as opposed to a Cython memoryview). --- python/rmm/_lib/device_buffer.pxd | 2 +- python/rmm/_lib/device_buffer.pyx | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index b8943b9a6..70f21852d 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -47,7 +47,7 @@ cdef class DeviceBuffer: @staticmethod cdef DeviceBuffer c_to_device(const unsigned char[::1] b, uintptr_t stream=*) - cpdef copy_to_host(self, unsigned char[::1] hb=*, uintptr_t stream=*) + cpdef copy_to_host(self, ary=*, uintptr_t stream=*) cpdef bytes tobytes(self, uintptr_t stream=*) cdef size_t c_size(self) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 439740bfc..855d50f97 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -105,7 +105,7 @@ cdef class DeviceBuffer: """Calls ``to_device`` function on arguments provided""" return to_device(b, stream) - cpdef copy_to_host(self, unsigned char[::1] hb=None, uintptr_t stream=0): + cpdef copy_to_host(self, ary=None, uintptr_t stream=0): """Copy from a ``DeviceBuffer`` to a buffer on host Parameters @@ -128,10 +128,11 @@ cdef class DeviceBuffer: cdef const device_buffer* dbp = self.c_obj.get() cdef size_t s = dbp.size() + cdef unsigned char[::1] hb = ary if hb is None: # NumPy leverages huge pages under-the-hood, # which speeds up the copy from device to host. - hb = np.empty((s,), dtype="u1") + hb = ary = np.empty((s,), dtype="u1") elif len(hb) < s: raise ValueError( "Argument `hb` is too small. Need space for %i bytes." % s @@ -140,7 +141,7 @@ cdef class DeviceBuffer: with nogil: copy_ptr_to_host(dbp.data(), hb[:s], stream) - return hb + return ary cpdef bytes tobytes(self, uintptr_t stream=0): cdef const device_buffer* dbp = self.c_obj.get() From 3f6d997c21222f1bb01584854de3b84d6fc783d8 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Mon, 3 Feb 2020 14:36:47 -0800 Subject: [PATCH 30/30] Verify `copy_to_host` returns the same object --- python/rmm/tests/test_rmm.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 638e79e34..ba79049ee 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -138,15 +138,18 @@ def test_rmm_device_buffer_memoryview_roundtrip(hb): mv = memoryview(hb) db = rmm.DeviceBuffer.to_device(hb) hb2 = db.copy_to_host() + assert isinstance(hb2, np.ndarray) mv2 = memoryview(hb2) assert mv == mv2 - hb3 = bytearray(mv.nbytes) - hb3 = db.copy_to_host(hb3) - mv3 = memoryview(hb3) + hb3a = bytearray(mv.nbytes) + hb3b = db.copy_to_host(hb3a) + assert hb3a is hb3b + mv3 = memoryview(hb3b) assert mv == mv3 - hb4 = np.empty_like(mv) - hb4 = db.copy_to_host(hb4) - mv4 = memoryview(hb4) + hb4a = np.empty_like(mv) + hb4b = db.copy_to_host(hb4a) + assert hb4a is hb4b + mv4 = memoryview(hb4b) assert mv == mv4