From 7cbcd97586bcc48a39787de0c80e5e8e7e175c46 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 2 Jun 2021 10:57:46 +1000 Subject: [PATCH] Explicit streams in device_buffer (#775) Removes default stream arguments from `rmm::device_buffer`. Now copy construction requires a stream argument (default copy ctor deleted), and copy assignment is disallowed (operator deleted). Move construction and assignment are still supported, and move assignment still use the most recently used stream for deallocating any previous data. Also improves device_buffer tests (implements TODOs in code). I don't think this should be merged until RAPIDS dependent libraries are ready for it. I have a libcudf PR in progress for this. Fixes #418 - [x] cuDF PR: https://github.com/rapidsai/cudf/pull/8280 - [x] cuGraph PR: https://github.com/rapidsai/cugraph/pull/1609 - [x] cuSpatial PR: https://github.com/rapidsai/cuspatial/pull/403 - [x] cuML does not yet use device_buffer Authors: - Mark Harris (https://github.com/harrism) Approvers: - Rong Ou (https://github.com/rongou) - Keith Kraus (https://github.com/kkraus14) - Conor Hoekstra (https://github.com/codereport) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/rmm/pull/775 --- include/rmm/device_buffer.hpp | 103 ++++------ include/rmm/device_scalar.hpp | 4 +- python/rmm/_lib/device_buffer.pxd | 9 +- python/rmm/_lib/device_buffer.pyx | 6 +- python/rmm/_lib/tests/test_device_buffer.pyx | 5 +- tests/cuda_stream_tests.cpp | 4 +- tests/device_buffer_tests.cu | 200 +++++++------------ 7 files changed, 127 insertions(+), 204 deletions(-) diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp index d9ba0a4b8..ad8655180 100644 --- a/include/rmm/device_buffer.hpp +++ b/include/rmm/device_buffer.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,8 +50,12 @@ namespace rmm { * cuda_stream_view stream = cuda_stream_view{}; * device_buffer custom_buff(100, stream, &mr); * - * // deep copies `buff` into a new device buffer using the default stream - * device_buffer buff_copy(buff); + * // deep copies `buff` into a new device buffer using the specified stream + * device_buffer buff_copy(buff, stream); + * + * // moves the memory in `from_buff` to `to_buff`. Deallocates previously allocated + * // to_buff memory on `to_buff.stream()`. + * device_buffer to_buff(std::move(from_buff)); * * // deep copies `buff` into a new device buffer using the specified stream * device_buffer buff_copy(buff, stream); @@ -62,16 +66,19 @@ namespace rmm { * // Default construction. Buffer is empty * device_buffer buff_default{}; * - * // If the requested size is larger than the current size, resizes allocation - * // to the new size and deep copies any previous contents. Otherwise, simply - * // updates the value of `size()` to the newly requested size without any - * // allocations or copies. Uses the optionally specified stream or the default - * // stream if none specified. + * // If the requested size is larger than the current size, resizes allocation to the new size and + * // deep copies any previous contents. Otherwise, simply updates the value of `size()` to the + * // newly requested size without any allocations or copies. Uses the specified stream. * buff_default.resize(100, stream); *``` */ class device_buffer { public: + // The copy constructor and copy assignment operator without a stream are deleted because they + // provide no way to specify an explicit stream + device_buffer(device_buffer const& other) = delete; + device_buffer& operator=(device_buffer const& other) = delete; + /** * @brief Default constructor creates an empty `device_buffer` */ @@ -95,11 +102,11 @@ class device_buffer { * @param mr Memory resource to use for the device memory allocation. */ explicit device_buffer(std::size_t size, - cuda_stream_view stream = cuda_stream_view{}, + cuda_stream_view stream, mr::device_memory_resource* mr = mr::get_current_device_resource()) : _stream{stream}, _mr{mr} { - allocate(size); + allocate_async(size); } /** @@ -123,12 +130,12 @@ class device_buffer { */ device_buffer(void const* source_data, std::size_t size, - cuda_stream_view stream = cuda_stream_view{}, + cuda_stream_view stream, mr::device_memory_resource* mr = mr::get_current_device_resource()) : _stream{stream}, _mr{mr} { - allocate(size); - copy(source_data, size); + allocate_async(size); + copy_async(source_data, size); } /** @@ -153,7 +160,7 @@ class device_buffer { * @param mr The resource to use for allocating the new `device_buffer` */ device_buffer(device_buffer const& other, - cuda_stream_view stream = cuda_stream_view{}, + cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) : device_buffer{other.data(), other.size(), stream, mr} { @@ -185,47 +192,6 @@ class device_buffer { other.set_stream(cuda_stream_view{}); } - /** - * @brief Copies the contents of `other` into this `device_buffer`. - * - * All operations on the data in this `device_buffer` on all streams must be - * complete before using this operator, otherwise behavior is undefined. - * - * If the existing capacity is large enough, and the memory resources are - * compatible, then this `device_buffer`'s existing memory will be reused and - * `other`s contents will simply be copied on `other.stream()`. I.e., if - * `capcity() > other.size()` and - * `memory_resource()->is_equal(*other.memory_resource())`. - * - * Otherwise, the existing memory will be deallocated using - * `memory_resource()` on `stream()` and new memory will be allocated using - * `other.memory_resource()` on `other.stream()`. - * - * @throws rmm::bad_alloc if allocation fails - * @throws rmm::cuda_error if the copy from `other` fails - * - * @param other The `device_buffer` to copy. - */ - device_buffer& operator=(device_buffer const& other) - { - if (&other != this) { - // If the current capacity is large enough and the resources are - // compatible, just reuse the existing memory - if ((capacity() > other.size()) and _mr->is_equal(*other._mr)) { - resize(other.size(), other.stream()); - copy(other.data(), other.size()); - } else { - // Otherwise, need to deallocate and allocate new memory - deallocate(); - set_stream(other.stream()); - _mr = other._mr; - allocate(other.size()); - copy(other.data(), other.size()); - } - } - return *this; - } - /** * @brief Move assignment operator moves the contents from `other`. * @@ -241,7 +207,7 @@ class device_buffer { device_buffer& operator=(device_buffer&& other) noexcept { if (&other != this) { - deallocate(); + deallocate_async(); _data = other._data; _size = other._size; @@ -266,7 +232,7 @@ class device_buffer { */ ~device_buffer() noexcept { - deallocate(); + deallocate_async(); _mr = nullptr; _stream = cuda_stream_view{}; } @@ -296,7 +262,7 @@ class device_buffer { * @param new_size The requested new size, in bytes * @param stream The stream to use for allocation and copy */ - void resize(std::size_t new_size, cuda_stream_view stream = cuda_stream_view{}) + void resize(std::size_t new_size, cuda_stream_view stream) { set_stream(stream); // If the requested size is smaller than the current capacity, just update @@ -307,7 +273,7 @@ class device_buffer { void* const new_data = _mr->allocate(new_size, this->stream()); RMM_CUDA_TRY( cudaMemcpyAsync(new_data, data(), size(), cudaMemcpyDefault, this->stream().value())); - deallocate(); + deallocate_async(); _data = new_data; _size = new_size; _capacity = new_size; @@ -327,7 +293,7 @@ class device_buffer { * * @param stream The stream on which the allocation and copy are performed */ - void shrink_to_fit(cuda_stream_view stream = cuda_stream_view{}) + void shrink_to_fit(cuda_stream_view stream) { set_stream(stream); if (size() != capacity()) { @@ -404,19 +370,19 @@ class device_buffer { ///< allocate/deallocate device memory /** - * @brief Allocates the specified amount of memory and updates the - * size/capacity accordingly. + * @brief Allocates the specified amount of memory and updates the size/capacity accordingly. + * + * Allocates on `stream()` using the memory resource passed to the constructor. * * If `bytes == 0`, sets `_data = nullptr`. * * @param bytes The amount of memory to allocate - * @param stream The stream on which to allocate */ - void allocate(std::size_t bytes) + void allocate_async(std::size_t bytes) { _size = bytes; _capacity = bytes; - _data = (bytes > 0) ? _mr->allocate(bytes, stream()) : nullptr; + _data = (bytes > 0) ? memory_resource()->allocate(bytes, stream()) : nullptr; } /** @@ -426,10 +392,11 @@ class device_buffer { * If the buffer doesn't hold any memory, i.e., `capacity() == 0`, doesn't * call the resource deallocation. * + * Deallocates on `stream()` using the memory resource passed to the constructor. */ - void deallocate() noexcept + void deallocate_async() noexcept { - if (capacity() > 0) { _mr->deallocate(data(), capacity(), stream()); } + if (capacity() > 0) { memory_resource()->deallocate(data(), capacity(), stream()); } _size = 0; _capacity = 0; _data = nullptr; @@ -447,7 +414,7 @@ class device_buffer { * @param source The pointer to copy from * @param bytes The number of bytes to copy */ - void copy(void const* source, std::size_t bytes) + void copy_async(void const* source, std::size_t bytes) { if (bytes > 0) { RMM_EXPECTS(nullptr != source, "Invalid copy from nullptr."); diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index c54fc7937..40a7b43bc 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -314,7 +314,7 @@ class device_scalar { device_scalar &operator=(device_scalar &&) = delete; private: - rmm::device_buffer buffer{sizeof(T)}; + rmm::device_buffer buffer{sizeof(T), cuda_stream_default}; inline void _memcpy(void *dst, const void *src, cuda_stream_view stream) const { diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 3b6b4face..635b1ed8a 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -23,14 +23,11 @@ from rmm._lib.memory_resource cimport DeviceMemoryResource cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: cdef cppclass device_buffer: device_buffer() - device_buffer(size_t size) except + device_buffer(size_t size, cuda_stream_view stream) except + - device_buffer(const void* source_data, size_t size) except + device_buffer(const void* source_data, size_t size, cuda_stream_view stream) except + - device_buffer(const device_buffer& other) except + - void resize(size_t new_size) except + - void shrink_to_fit() except + + void resize(size_t new_size, cuda_stream_view stream) except + + void shrink_to_fit(cuda_stream_view stream) except + void* data() size_t size() size_t capacity() @@ -60,7 +57,7 @@ cdef class DeviceBuffer: cpdef bytes tobytes(self, Stream stream=*) cdef size_t c_size(self) except * - cpdef void resize(self, size_t new_size) except * + cpdef void resize(self, size_t new_size, Stream stream=*) except * cpdef size_t capacity(self) except * cdef void* c_data(self) except * diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 3401b4802..cbe0bdb33 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -284,8 +284,10 @@ cdef class DeviceBuffer: cdef size_t c_size(self) except *: return self.c_obj.get()[0].size() - cpdef void resize(self, size_t new_size) except *: - self.c_obj.get()[0].resize(new_size) + cpdef void resize(self, + size_t new_size, + Stream stream=DEFAULT_STREAM) except *: + self.c_obj.get()[0].resize(new_size, stream.view()) cpdef size_t capacity(self) except *: return self.c_obj.get()[0].capacity() diff --git a/python/rmm/_lib/tests/test_device_buffer.pyx b/python/rmm/_lib/tests/test_device_buffer.pyx index d346e8ed0..c4f5e5ae3 100644 --- a/python/rmm/_lib/tests/test_device_buffer.pyx +++ b/python/rmm/_lib/tests/test_device_buffer.pyx @@ -18,14 +18,17 @@ import numpy as np from libcpp.memory cimport make_unique from libcpp.utility cimport move +from rmm._lib.cuda_stream_view cimport cuda_stream_default from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer def test_release(): expect = DeviceBuffer.to_device(b'abc') cdef DeviceBuffer buf = DeviceBuffer.to_device(b'abc') + got = DeviceBuffer.c_from_unique_ptr( - make_unique[device_buffer](buf.c_release()) + make_unique[device_buffer](buf.c_release(), + cuda_stream_default.value()) ) np.testing.assert_equal(expect.copy_to_host(), got.copy_to_host()) diff --git a/tests/cuda_stream_tests.cpp b/tests/cuda_stream_tests.cpp index 59ac07f3d..55e3185fe 100644 --- a/tests/cuda_stream_tests.cpp +++ b/tests/cuda_stream_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ TEST_F(CudaStreamTest, Equality) EXPECT_NE(view_a, rmm::cuda_stream()); EXPECT_NE(stream_a, rmm::cuda_stream()); - rmm::device_buffer buff(0); + rmm::device_buffer buff{}; EXPECT_EQ(buff.stream(), view_default); } diff --git a/tests/device_buffer_tests.cu b/tests/device_buffer_tests.cu index 95ea23a93..448c9259d 100644 --- a/tests/device_buffer_tests.cu +++ b/tests/device_buffer_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,8 +17,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -51,7 +53,7 @@ TYPED_TEST_CASE(DeviceBufferTest, resources); TYPED_TEST(DeviceBufferTest, DefaultMemoryResource) { - rmm::device_buffer buff(this->size); + rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}); EXPECT_NE(nullptr, buff.data()); EXPECT_EQ(this->size, buff.size()); EXPECT_EQ(this->size, buff.capacity()); @@ -97,32 +99,36 @@ TYPED_TEST(DeviceBufferTest, CopyFromRawDevicePointer) { void *device_memory{nullptr}; EXPECT_EQ(cudaSuccess, cudaMalloc(&device_memory, this->size)); - rmm::device_buffer buff(device_memory, this->size); + rmm::device_buffer buff(device_memory, this->size, rmm::cuda_stream_view{}); EXPECT_NE(nullptr, buff.data()); EXPECT_EQ(this->size, buff.size()); EXPECT_EQ(this->size, buff.capacity()); EXPECT_EQ(rmm::mr::get_current_device_resource(), buff.memory_resource()); EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream()); + // TODO check for equality between the contents of the two allocations + buff.stream().synchronize(); EXPECT_EQ(cudaSuccess, cudaFree(device_memory)); } TYPED_TEST(DeviceBufferTest, CopyFromRawHostPointer) { std::vector host_data(this->size); - rmm::device_buffer buff(static_cast(host_data.data()), this->size); + rmm::device_buffer buff( + static_cast(host_data.data()), this->size, rmm::cuda_stream_view{}); EXPECT_NE(nullptr, buff.data()); EXPECT_EQ(this->size, buff.size()); EXPECT_EQ(this->size, buff.capacity()); EXPECT_EQ(rmm::mr::get_current_device_resource(), buff.memory_resource()); EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream()); + buff.stream().synchronize(); // TODO check for equality between the contents of the two allocations } TYPED_TEST(DeviceBufferTest, CopyFromNullptr) { // can copy from a nullptr only if size == 0 - rmm::device_buffer buff(nullptr, 0); + rmm::device_buffer buff(nullptr, 0, rmm::cuda_stream_view{}); EXPECT_EQ(nullptr, buff.data()); EXPECT_EQ(0, buff.size()); EXPECT_EQ(0, buff.capacity()); @@ -133,7 +139,7 @@ TYPED_TEST(DeviceBufferTest, CopyFromNullptr) TYPED_TEST(DeviceBufferTest, CopyFromNullptrNonZero) { // can copy from a nullptr only if size == 0 - EXPECT_THROW(rmm::device_buffer buff(nullptr, 1), rmm::logic_error); + EXPECT_THROW(rmm::device_buffer buff(nullptr, 1, rmm::cuda_stream_view{}), rmm::logic_error); } TYPED_TEST(DeviceBufferTest, CopyConstructor) @@ -141,12 +147,12 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor) rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); // Initialize buffer - thrust::sequence(thrust::device, + thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), static_cast(buff.data()) + buff.size(), 0); - rmm::device_buffer buff_copy(buff); // uses default stream and MR + rmm::device_buffer buff_copy(buff, rmm::cuda_stream_default); // uses default MR EXPECT_NE(nullptr, buff_copy.data()); EXPECT_NE(buff.data(), buff_copy.data()); EXPECT_EQ(buff.size(), buff_copy.size()); @@ -155,7 +161,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor) EXPECT_TRUE(buff_copy.memory_resource()->is_equal(*rmm::mr::get_current_device_resource())); EXPECT_EQ(buff_copy.stream(), rmm::cuda_stream_view{}); - EXPECT_TRUE(thrust::equal(thrust::device, + EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), static_cast(buff.data()) + buff.size(), static_cast(buff_copy.data()))); @@ -166,25 +172,25 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor) EXPECT_TRUE(buff_copy2.memory_resource()->is_equal(*buff.memory_resource())); EXPECT_EQ(buff_copy2.stream(), buff.stream()); - // EXPECT_TRUE( - // thrust::equal(thrust::device, static_cast(buff.data()), - // static_cast(buff.data()) + buff.size(), - // static_cast(buff_copy.data()))); + EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + static_cast(buff_copy.data()))); } TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSize) { - rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); // Resizing smaller to make `size()` < `capacity()` auto new_size = this->size - 1; - buff.resize(new_size); + buff.resize(new_size, rmm::cuda_stream_default); - // Can't do this until RMM cmake is setup to build cuda files - // thrust::sequence(thrust::device, static_cast(buff.data()), - // static_cast(buffer.data()) + buff.size(), - // 0); - rmm::device_buffer buff_copy(buff); + thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + 0); + rmm::device_buffer buff_copy(buff, rmm::cuda_stream_default); EXPECT_NE(nullptr, buff_copy.data()); EXPECT_NE(buff.data(), buff_copy.data()); EXPECT_EQ(buff.size(), buff_copy.size()); @@ -195,19 +201,20 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSize) EXPECT_TRUE(buff_copy.memory_resource()->is_equal(*rmm::mr::get_current_device_resource())); EXPECT_EQ(buff_copy.stream(), rmm::cuda_stream_view{}); - // EXPECT_TRUE( - // thrust::equal(thrust::device, static_cast(buff.data()), - // static_cast(buff.data()) + buff.size(), - // static_cast(buff_copy.data()))); + EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + static_cast(buff_copy.data()))); } TYPED_TEST(DeviceBufferTest, CopyConstructorExplicitMr) { - rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); - // Can't do this until RMM cmake is setup to build cuda files - // thrust::sequence(thrust::device, static_cast(buff.data()), - // static_cast(buffer.data()) + buff.size(), - // 0); + rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); + + thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + 0); rmm::device_buffer buff_copy(buff, this->stream, &this->mr); EXPECT_NE(nullptr, buff_copy.data()); EXPECT_NE(buff.data(), buff_copy.data()); @@ -217,24 +224,24 @@ TYPED_TEST(DeviceBufferTest, CopyConstructorExplicitMr) EXPECT_TRUE(buff.memory_resource()->is_equal(*buff_copy.memory_resource())); EXPECT_NE(buff.stream(), buff_copy.stream()); - // EXPECT_TRUE( - // thrust::equal(thrust::device, static_cast(buff.data()), - // static_cast(buff.data()) + buff.size(), - // static_cast(buff_copy.data()))); + EXPECT_TRUE(thrust::equal(rmm::exec_policy(buff_copy.stream()), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + static_cast(buff_copy.data()))); } TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSizeExplicitMr) { - rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); // Resizing smaller to make `size()` < `capacity()` auto new_size = this->size - 1; - buff.resize(new_size); + buff.resize(new_size, rmm::cuda_stream_default); - // Can't do this until RMM cmake is setup to build cuda files - // thrust::sequence(thrust::device, static_cast(buff.data()), - // static_cast(buffer.data()) + buff.size(), - // 0); + thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + 0); rmm::device_buffer buff_copy(buff, this->stream, &this->mr); EXPECT_NE(nullptr, buff_copy.data()); EXPECT_NE(buff.data(), buff_copy.data()); @@ -247,80 +254,15 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSizeExplicitMr) EXPECT_TRUE(buff.memory_resource()->is_equal(*buff_copy.memory_resource())); EXPECT_NE(buff.stream(), buff_copy.stream()); - // EXPECT_TRUE( - // thrust::equal(thrust::device, static_cast(buff.data()), - // static_cast(buff.data()) + buff.size(), - // static_cast(buff_copy.data()))); -} - -TYPED_TEST(DeviceBufferTest, CopyAssignmentToDefault) -{ - rmm::device_buffer const from(this->size, rmm::cuda_stream_view{}, &this->mr); - rmm::device_buffer to{}; - EXPECT_NO_THROW(to = from); - EXPECT_NE(nullptr, to.data()); - EXPECT_NE(nullptr, from.data()); - EXPECT_NE(from.data(), to.data()); - EXPECT_EQ(from.size(), to.size()); - EXPECT_EQ(from.capacity(), to.capacity()); - EXPECT_EQ(from.stream(), to.stream()); - EXPECT_EQ(from.memory_resource(), to.memory_resource()); - // TODO Check contents of memory -} - -TYPED_TEST(DeviceBufferTest, CopyAssignment) -{ - rmm::device_buffer from(this->size, rmm::cuda_stream_view{}, &this->mr); - rmm::device_buffer to(this->size - 1, rmm::cuda_stream_view{}, &this->mr); - EXPECT_NO_THROW(to = from); - EXPECT_NE(nullptr, to.data()); - EXPECT_NE(nullptr, from.data()); - EXPECT_NE(from.data(), to.data()); - EXPECT_EQ(from.size(), to.size()); - EXPECT_EQ(from.capacity(), to.capacity()); - EXPECT_EQ(from.stream(), to.stream()); - EXPECT_EQ(from.memory_resource(), to.memory_resource()); - // TODO Check contents of memory -} - -TYPED_TEST(DeviceBufferTest, CopyAssignmentCapacityLargerThanSize) -{ - rmm::device_buffer from(this->size, rmm::cuda_stream_view{}, &this->mr); - from.resize(from.size() - 1); - rmm::device_buffer to(42, rmm::cuda_stream_view{}, &this->mr); - EXPECT_NO_THROW(to = from); - EXPECT_NE(nullptr, to.data()); - EXPECT_NE(nullptr, from.data()); - EXPECT_NE(from.data(), to.data()); - EXPECT_EQ(from.size(), to.size()); - EXPECT_NE(from.capacity(), - to.capacity()); // copy doesn't copy the larger capacity - EXPECT_EQ(from.stream(), to.stream()); - EXPECT_EQ(from.memory_resource(), to.memory_resource()); - // TODO Check contents of memory -} - -TYPED_TEST(DeviceBufferTest, SelfCopyAssignment) -{ - rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); - auto p = buff.data(); - auto size = buff.size(); - auto capacity = buff.capacity(); - auto mr = buff.memory_resource(); - auto stream = buff.stream(); - - buff = buff; // self-assignment shouldn't modify the buffer - EXPECT_NE(nullptr, buff.data()); - EXPECT_EQ(p, buff.data()); - EXPECT_EQ(size, buff.size()); - EXPECT_EQ(capacity, buff.capacity()); - EXPECT_EQ(stream, buff.stream()); - EXPECT_EQ(mr, buff.memory_resource()); + EXPECT_TRUE(thrust::equal(rmm::exec_policy(buff_copy.stream()), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + static_cast(buff_copy.data()))); } TYPED_TEST(DeviceBufferTest, MoveConstructor) { - rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); auto p = buff.data(); auto size = buff.size(); auto capacity = buff.capacity(); @@ -340,7 +282,7 @@ TYPED_TEST(DeviceBufferTest, MoveConstructor) EXPECT_EQ(nullptr, buff.data()); EXPECT_EQ(0, buff.size()); EXPECT_EQ(0, buff.capacity()); - EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream()); + EXPECT_EQ(rmm::cuda_stream_default, buff.stream()); EXPECT_NE(nullptr, buff.memory_resource()); } @@ -374,7 +316,7 @@ TYPED_TEST(DeviceBufferTest, MoveConstructorStream) TYPED_TEST(DeviceBufferTest, MoveAssignmentToDefault) { - rmm::device_buffer from(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer from(this->size, rmm::cuda_stream_default, &this->mr); auto p = from.data(); auto size = from.size(); auto capacity = from.capacity(); @@ -396,20 +338,20 @@ TYPED_TEST(DeviceBufferTest, MoveAssignmentToDefault) EXPECT_EQ(nullptr, from.data()); EXPECT_EQ(0, from.size()); EXPECT_EQ(0, from.capacity()); - EXPECT_EQ(rmm::cuda_stream_view{}, from.stream()); + EXPECT_EQ(rmm::cuda_stream_default, from.stream()); EXPECT_NE(nullptr, from.memory_resource()); } TYPED_TEST(DeviceBufferTest, MoveAssignment) { - rmm::device_buffer from(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer from(this->size, rmm::cuda_stream_default, &this->mr); auto p = from.data(); auto size = from.size(); auto capacity = from.capacity(); auto mr = from.memory_resource(); auto stream = from.stream(); - rmm::device_buffer to(this->size - 1, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer to(this->size - 1, rmm::cuda_stream_default, &this->mr); EXPECT_NO_THROW(to = std::move(from)); // contents of `from` should be in `to` @@ -424,20 +366,20 @@ TYPED_TEST(DeviceBufferTest, MoveAssignment) EXPECT_EQ(nullptr, from.data()); EXPECT_EQ(0, from.size()); EXPECT_EQ(0, from.capacity()); - EXPECT_EQ(rmm::cuda_stream_view{}, from.stream()); + EXPECT_EQ(rmm::cuda_stream_default, from.stream()); EXPECT_NE(nullptr, from.memory_resource()); } TYPED_TEST(DeviceBufferTest, SelfMoveAssignment) { - rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); auto p = buff.data(); auto size = buff.size(); auto capacity = buff.capacity(); auto mr = buff.memory_resource(); auto stream = buff.stream(); - buff = buff; // self-assignment shouldn't modify the buffer + buff = std::move(buff); // self-move-assignment shouldn't modify the buffer EXPECT_NE(nullptr, buff.data()); EXPECT_EQ(p, buff.data()); EXPECT_EQ(size, buff.size()); @@ -448,31 +390,43 @@ TYPED_TEST(DeviceBufferTest, SelfMoveAssignment) TYPED_TEST(DeviceBufferTest, ResizeSmaller) { - rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); + + thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + 0); + auto old_data = buff.data(); + rmm::device_buffer old_content( + old_data, buff.size(), rmm::cuda_stream_default, &this->mr); // for comparison + auto new_size = this->size - 1; - buff.resize(new_size); + buff.resize(new_size, rmm::cuda_stream_default); EXPECT_EQ(new_size, buff.size()); EXPECT_EQ(this->size, buff.capacity()); // Capacity should be unchanged // Resizing smaller means the existing allocation should remain unchanged EXPECT_EQ(old_data, buff.data()); - EXPECT_NO_THROW(buff.shrink_to_fit()); + EXPECT_NO_THROW(buff.shrink_to_fit(rmm::cuda_stream_default)); EXPECT_NE(nullptr, buff.data()); // A reallocation should have occured EXPECT_NE(old_data, buff.data()); EXPECT_EQ(new_size, buff.size()); EXPECT_EQ(buff.capacity(), buff.size()); - // TODO Verify device memory contents are equal + EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + static_cast(old_content.data()))); } TYPED_TEST(DeviceBufferTest, ResizeBigger) { - rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); auto old_data = buff.data(); auto new_size = this->size + 1; - buff.resize(new_size); + buff.resize(new_size, rmm::cuda_stream_default); EXPECT_EQ(new_size, buff.size()); EXPECT_EQ(new_size, buff.capacity()); // Resizing bigger means the data should point to a new allocation