From fae33fa753177dd3fc7108eb751aca55b89185eb Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Tue, 28 Nov 2023 03:20:53 +0000 Subject: [PATCH 01/40] Add host_pinned_memory_resource and tests. --- .../rmm/mr/host_pinned_memory_resource.hpp | 243 ++++++++++++++++++ tests/mr/device/mr_test.hpp | 12 +- tests/mr/device/mr_tests.cpp | 2 + 3 files changed, 256 insertions(+), 1 deletion(-) create mode 100644 include/rmm/mr/host_pinned_memory_resource.hpp diff --git a/include/rmm/mr/host_pinned_memory_resource.hpp b/include/rmm/mr/host_pinned_memory_resource.hpp new file mode 100644 index 000000000..d12f74f9e --- /dev/null +++ b/include/rmm/mr/host_pinned_memory_resource.hpp @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +#include + +#include + +namespace rmm::mr { + +/** + * @brief Memory resource class for allocating pinned host memory. + * + * This class uses CUDA's `cudaHostAlloc` to allocate pinned host memory. It implements the + * `cuda::mr::memory_resource` and `cuda::mr::device_memory_resource` concepts, and + * the `cuda::mr::host_accessible` and `cuda::mr::device_accessible` properties. + */ +class pinned_host_memory_resource { + public: + // Disable clang-tidy complaining about the easily swappable size and alignment parameters + // of allocate and deallocate + // NOLINTBEGIN(bugprone-easily-swappable-parameters) + + /** + * @brief Allocates pinned host memory of size at least \p bytes bytes. + * + * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a + * CUDA out of memory error. + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other + * reason. + * + * @param bytes The size, in bytes, of the allocation. + * @return Pointer to the newly allocated memory. + */ + static void* allocate(std::size_t bytes) + { + void* ptr{nullptr}; + RMM_CUDA_TRY_ALLOC(cudaHostAlloc(&ptr, bytes, cudaHostAllocDefault)); + return ptr; + } + /** + * @brief Allocates pinned host memory of size at least \p bytes bytes. + * + * @todo Alignment is not implemented yet. + * + * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a + * CUDA out of memory error. + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other + * reason. + * + * @param bytes The size, in bytes, of the allocation. + * @param alignment Alignment in bytes. + * @return Pointer to the newly allocated memory. + */ + static void* allocate(std::size_t bytes, [[maybe_unused]] std::size_t alignment) + { + return allocate(bytes); + } + + /** + * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes. + * + * @throws Nothing. + * + * @param ptr Pointer to be deallocated. + * @param bytes Size of the allocation. + */ + static void deallocate(void* ptr, [[maybe_unused]] std::size_t bytes) noexcept + { + RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); + } + + /** + * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes and alignment \p + * alignment bytes. + * + * @todo Alignment is not implemented yet. + * + * @throws Nothing. + * + * @param ptr Pointer to be deallocated. + * @param bytes Size of the allocation. + * @param alignment Alignment in bytes. + */ + static void deallocate(void* ptr, std::size_t bytes, std::size_t) noexcept + { + return deallocate(ptr, bytes); + } + + /** + * @brief Allocates pinned host memory of size at least \p bytes bytes. + * + * @note Stream argument is ignored and behavior is identical to allocate. + * + * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a + * CUDA out of memory error. + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other + * error. + * + * @param bytes The size, in bytes, of the allocation. + * @param stream CUDA stream on which to perform the allocation (ignored). + * @return Pointer to the newly allocated memory. + */ + static void* allocate_async(std::size_t bytes, [[maybe_unused]] cuda::stream_ref stream) + { + return allocate(bytes); + } + + /** + * @brief Allocates pinned host memory of size at least \p bytes bytes and alignment \p alignment. + * + * @note Stream argument is ignored and behavior is identical to allocate. + * + * @todo Alignment is not implemented yet. + * + * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a + * CUDA out of memory error. + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other + * error. + * + * @param bytes The size, in bytes, of the allocation. + * @param alignment Alignment in bytes. + * @param stream CUDA stream on which to perform the allocation (ignored). + * @return Pointer to the newly allocated memory. + */ + static void* allocate_async(std::size_t bytes, + std::size_t alignment, + [[maybe_unused]] cuda::stream_ref stream) + { + return allocate(bytes, alignment); + } + + /** + * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes. + * + * @note Stream argument is ignored and behavior is identical to deallocate. + * + * @throws Nothing. + * + * @param ptr Pointer to be deallocated. + * @param bytes Size of the allocation. + * @param stream CUDA stream on which to perform the deallocation (ignored). + */ + static void deallocate_async(void* ptr, + std::size_t bytes, + [[maybe_unused]] cuda::stream_ref stream) noexcept + { + return deallocate(ptr, bytes); + } + + /** + * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes and alignment \p + * alignment bytes. + * + * @note Stream argument is ignored and behavior is identical to deallocate. + * + * @todo Alignment is not implemented yet. + * + * @throws Nothing. + * + * @param ptr Pointer to be deallocated. + * @param bytes Size of the allocation. + * @param alignment Alignment in bytes. + * @param stream CUDA stream on which to perform the deallocation (ignored). + */ + static void deallocate_async(void* ptr, + std::size_t bytes, + std::size_t alignment, + [[maybe_unused]] cuda::stream_ref stream) noexcept + { + return deallocate(ptr, bytes, alignment); + } + // NOLINTEND(bugprone-easily-swappable-parameters) + + /** + * @briefreturn returns true if the specified resource is the same type as this resource, else + * false. + */ + bool operator==(const pinned_host_memory_resource&) const { return true; } + + /** + * @briefreturn returns true if the specified resource is not the same type as this resource, else + * false. + */ + bool operator!=(const pinned_host_memory_resource&) const { return false; } + + /** + * @brief Query whether the resource supports reporting free and available memory. + * + * @return false + */ + static bool supports_get_mem_info() { return false; } + + /** + * @brief Query the total amount of memory and free memory available for allocation by this + * resource. + * + * @throws nothing + * + * @return std::pair containing 0 for both total and free memory. + */ + [[nodiscard]] static std::pair get_mem_info(cuda::stream_ref) noexcept + { + return {0, 0}; + } + + /** + * @brief Enables the `cuda::mr::device_accessible` property + * + * This property declares that a `pinned_host_memory_resource` provides device accessible memory + */ + friend void get_property(pinned_host_memory_resource const&, cuda::mr::device_accessible) noexcept + { + } + + /** + * @brief Enables the `cuda::mr::host_accessible` property + * + * This property declares that a `pinned_host_memory_resource` provides host accessible memory + */ + friend void get_property(pinned_host_memory_resource const&, cuda::mr::host_accessible) noexcept + { + } +}; + +} // namespace rmm::mr diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index 03f880e72..fb9b9dd67 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -52,7 +53,8 @@ inline bool is_device_memory(void* ptr) { cudaPointerAttributes attributes{}; if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } - return (attributes.type == cudaMemoryTypeDevice) or (attributes.type == cudaMemoryTypeManaged); + return (attributes.type == cudaMemoryTypeDevice) or (attributes.type == cudaMemoryTypeManaged) or + ((attributes.type == cudaMemoryTypeHost) and (attributes.devicePointer != nullptr)); } enum size_in_bytes : size_t {}; @@ -245,6 +247,8 @@ struct mr_allocation_test : public mr_test {}; /// MR factory functions inline auto make_cuda() { return std::make_shared(); } +inline auto make_host_pinned() { return std::make_shared(); } + inline auto make_cuda_async() { if (rmm::detail::async_alloc::is_supported()) { @@ -260,6 +264,12 @@ inline auto make_pool() return rmm::mr::make_owning_wrapper(make_cuda()); } +inline auto make_host_pinned_pool() +{ + return rmm::mr::make_owning_wrapper( + make_host_pinned(), 2_GiB, 8_GiB); +} + inline auto make_arena() { return rmm::mr::make_owning_wrapper(make_cuda()); diff --git a/tests/mr/device/mr_tests.cpp b/tests/mr/device/mr_tests.cpp index f6141e90f..bf513adda 100644 --- a/tests/mr/device/mr_tests.cpp +++ b/tests/mr/device/mr_tests.cpp @@ -31,6 +31,7 @@ INSTANTIATE_TEST_SUITE_P(ResourceTests, #endif mr_factory{"Managed", &make_managed}, mr_factory{"Pool", &make_pool}, + mr_factory{"HostPinnedPool", &make_host_pinned_pool}, mr_factory{"Arena", &make_arena}, mr_factory{"Binning", &make_binning}, mr_factory{"Fixed_Size", &make_fixed_size}), @@ -45,6 +46,7 @@ INSTANTIATE_TEST_SUITE_P(ResourceAllocationTests, #endif mr_factory{"Managed", &make_managed}, mr_factory{"Pool", &make_pool}, + mr_factory{"HostPinnedPool", &make_host_pinned_pool}, mr_factory{"Arena", &make_arena}, mr_factory{"Binning", &make_binning}), [](auto const& info) { return info.param.name; }); From 15be572073857e820f1976ef96715e56dc6906c7 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Tue, 28 Nov 2023 03:50:35 +0000 Subject: [PATCH 02/40] Add missing maybe_unused alignment parameter and fix briefreturn --- include/rmm/mr/host_pinned_memory_resource.hpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/include/rmm/mr/host_pinned_memory_resource.hpp b/include/rmm/mr/host_pinned_memory_resource.hpp index d12f74f9e..ba607e483 100644 --- a/include/rmm/mr/host_pinned_memory_resource.hpp +++ b/include/rmm/mr/host_pinned_memory_resource.hpp @@ -99,7 +99,9 @@ class pinned_host_memory_resource { * @param bytes Size of the allocation. * @param alignment Alignment in bytes. */ - static void deallocate(void* ptr, std::size_t bytes, std::size_t) noexcept + static void deallocate(void* ptr, + std::size_t bytes, + [[maybe_unused]] std::size_t alignment) noexcept { return deallocate(ptr, bytes); } @@ -190,14 +192,14 @@ class pinned_host_memory_resource { // NOLINTEND(bugprone-easily-swappable-parameters) /** - * @briefreturn returns true if the specified resource is the same type as this resource, else - * false. + * @briefreturn{true if the specified resource is the same type as this resource, otherwise + * false.} */ bool operator==(const pinned_host_memory_resource&) const { return true; } /** - * @briefreturn returns true if the specified resource is not the same type as this resource, else - * false. + * @briefreturn{true if the specified resource is not the same type as this resource, otherwise + * false.} */ bool operator!=(const pinned_host_memory_resource&) const { return false; } From 2b373720a4da43fc4b2e8cd6e4da79f2e232abbf Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 6 Dec 2023 02:24:38 +0000 Subject: [PATCH 03/40] Respond to review feedback: - Consolidate allocate/deallocate functions using default alignment argument. - Add missing includes. --- .../rmm/mr/host_pinned_memory_resource.hpp | 57 ++++++------------- 1 file changed, 17 insertions(+), 40 deletions(-) diff --git a/include/rmm/mr/host_pinned_memory_resource.hpp b/include/rmm/mr/host_pinned_memory_resource.hpp index ba607e483..46be47d9a 100644 --- a/include/rmm/mr/host_pinned_memory_resource.hpp +++ b/include/rmm/mr/host_pinned_memory_resource.hpp @@ -15,13 +15,16 @@ */ #pragma once +#include #include #include +#include #include #include +#include namespace rmm::mr { @@ -41,6 +44,8 @@ class pinned_host_memory_resource { /** * @brief Allocates pinned host memory of size at least \p bytes bytes. * + * @todo Alignment is not implemented yet. + * * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a * CUDA out of memory error. * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other @@ -49,61 +54,31 @@ class pinned_host_memory_resource { * @param bytes The size, in bytes, of the allocation. * @return Pointer to the newly allocated memory. */ - static void* allocate(std::size_t bytes) + static void* allocate( + std::size_t bytes, + [[maybe_unused]] std::size_t alignment = rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT) { void* ptr{nullptr}; RMM_CUDA_TRY_ALLOC(cudaHostAlloc(&ptr, bytes, cudaHostAllocDefault)); return ptr; } - /** - * @brief Allocates pinned host memory of size at least \p bytes bytes. - * - * @todo Alignment is not implemented yet. - * - * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a - * CUDA out of memory error. - * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other - * reason. - * - * @param bytes The size, in bytes, of the allocation. - * @param alignment Alignment in bytes. - * @return Pointer to the newly allocated memory. - */ - static void* allocate(std::size_t bytes, [[maybe_unused]] std::size_t alignment) - { - return allocate(bytes); - } /** * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes. * - * @throws Nothing. - * - * @param ptr Pointer to be deallocated. - * @param bytes Size of the allocation. - */ - static void deallocate(void* ptr, [[maybe_unused]] std::size_t bytes) noexcept - { - RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); - } - - /** - * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes and alignment \p - * alignment bytes. - * * @todo Alignment is not implemented yet. * * @throws Nothing. * * @param ptr Pointer to be deallocated. * @param bytes Size of the allocation. - * @param alignment Alignment in bytes. */ - static void deallocate(void* ptr, - std::size_t bytes, - [[maybe_unused]] std::size_t alignment) noexcept + static void deallocate( + void* ptr, + [[maybe_unused]] std::size_t bytes, + [[maybe_unused]] std::size_t alignment = rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT) noexcept { - return deallocate(ptr, bytes); + RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); } /** @@ -154,6 +129,8 @@ class pinned_host_memory_resource { * * @note Stream argument is ignored and behavior is identical to deallocate. * + * @todo Alignment is not implemented yet. + * * @throws Nothing. * * @param ptr Pointer to be deallocated. @@ -184,8 +161,8 @@ class pinned_host_memory_resource { */ static void deallocate_async(void* ptr, std::size_t bytes, - std::size_t alignment, - [[maybe_unused]] cuda::stream_ref stream) noexcept + std::size_t alignment = rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT, + [[maybe_unused]] cuda::stream_ref stream = {}) noexcept { return deallocate(ptr, bytes, alignment); } From c43a8c18090a05bc2cb196dec70ddf0b2bf58f41 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Tue, 19 Dec 2023 01:07:36 -0800 Subject: [PATCH 04/40] Add new util to get a fraction of available device mem, move available_Device_memory utility. --- include/rmm/cuda_device.hpp | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index 054bbb920..cf364128e 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include @@ -102,6 +103,34 @@ inline int get_num_cuda_devices() return num_dev; } +/** + * @brief Returns the available and total device memory in bytes for the current device + * + * @return The available and total device memory in bytes for the current device as a std::pair. + */ +inline std::pair available_device_memory() +{ + std::size_t free{}; + std::size_t total{}; + RMM_CUDA_TRY(cudaMemGetInfo(&free, &total)); + return {free, total}; +} + +/** + * @brief Returns the specified fraction of free device memory on the current CUDA device, aligned + * to the nearest CUDA allocation size. + * + * @return std::size_t The recommended initial device memory pool size in bytes. + */ +inline std::size_t fraction_of_free_device_memory(double fraction = 1. / 2) +{ + auto const [free, total] = rmm::available_device_memory(); + + return rmm::detail::align_up( + std::min(free, static_cast(static_cast(total) * fraction)), + rmm::detail::CUDA_ALLOCATION_ALIGNMENT); +} + /** * @brief RAII class that sets the current CUDA device to the specified device on construction * and restores the previous device on destruction. From d238daa1a24bcc65d26120f6129ebc7b219d6264 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Tue, 19 Dec 2023 01:11:12 -0800 Subject: [PATCH 05/40] Deprecate old pool_mr ctors (optional initial size) and add new ctors that require an initial pool size. --- .../rmm/mr/device/pool_memory_resource.hpp | 92 +++++++++++++------ 1 file changed, 63 insertions(+), 29 deletions(-) diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index 20b250524..4b2ffd417 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -110,6 +110,29 @@ class pool_memory_resource final friend class detail::stream_ordered_memory_resource, detail::coalescing_free_list>; + /** + * @brief [DEPRECATED] Construct a `pool_memory_resource` and allocate the initial device memory + * pool using `upstream_mr`. + * + * @throws rmm::logic_error if `upstream_mr == nullptr` + * @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * + * @param upstream_mr The memory_resource from which to allocate blocks for the pool. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to zero. + * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all + * of the available memory from the upstream resource. + */ + [[deprecated("Must specify initial_pool_size")]] // + explicit pool_memory_resource(Upstream* upstream_mr, + thrust::optional initial_pool_size = thrust::nullopt, + thrust::optional maximum_pool_size = thrust::nullopt) + : pool_memory_resource(upstream_mr, initial_pool_size.value_or(0), maximum_pool_size) + { + } + /** * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using * `upstream_mr`. @@ -121,21 +144,44 @@ class pool_memory_resource final * multiple of pool_memory_resource::allocation_alignment bytes. * * @param upstream_mr The memory_resource from which to allocate blocks for the pool. - * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to half of the - * available memory on the current device. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to zero. * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all - * of the available memory on the current device. + * of the available memory from the upstream resource. */ - explicit pool_memory_resource(Upstream* upstream_mr, + template , int> = 0> + [[deprecated("Must specify initial_pool_size")]] // + explicit pool_memory_resource(Upstream2& upstream_mr, thrust::optional initial_pool_size = thrust::nullopt, thrust::optional maximum_pool_size = thrust::nullopt) + : pool_memory_resource(upstream_mr, initial_pool_size.value_or(0), maximum_pool_size) + { + } + + /** + * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using + * `upstream_mr`. + * + * @throws rmm::logic_error if `upstream_mr == nullptr` + * @throws rmm::logic_error if `initial_pool_size` is not aligned to a multiple of + * pool_memory_resource::allocation_alignment bytes. + * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * + * @param upstream_mr The memory_resource from which to allocate blocks for the pool. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. + * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all + * of the available from the upstream resource. + */ + explicit pool_memory_resource(Upstream* upstream_mr, + std::size_t initial_pool_size, + thrust::optional maximum_pool_size = thrust::nullopt) : upstream_mr_{[upstream_mr]() { RMM_EXPECTS(nullptr != upstream_mr, "Unexpected null upstream pointer."); return upstream_mr; }()} { - RMM_EXPECTS(rmm::detail::is_aligned(initial_pool_size.value_or(0), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT), + RMM_EXPECTS(rmm::detail::is_aligned(initial_pool_size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT), "Error, Initial pool size required to be a multiple of 256 bytes"); RMM_EXPECTS(rmm::detail::is_aligned(maximum_pool_size.value_or(0), rmm::detail::CUDA_ALLOCATION_ALIGNMENT), @@ -149,21 +195,20 @@ class pool_memory_resource final * `upstream_mr`. * * @throws rmm::logic_error if `upstream_mr == nullptr` - * @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a - * multiple of pool_memory_resource::allocation_alignment bytes. + * @throws rmm::logic_error if `initial_pool_size` is not aligned to a multiple of + * pool_memory_resource::allocation_alignment bytes. * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a * multiple of pool_memory_resource::allocation_alignment bytes. * * @param upstream_mr The memory_resource from which to allocate blocks for the pool. - * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to half of the - * available memory on the current device. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all - * of the available memory on the current device. + * of the available memory from the upstream resource. */ template , int> = 0> explicit pool_memory_resource(Upstream2& upstream_mr, - thrust::optional initial_pool_size = thrust::nullopt, + std::size_t initial_pool_size, thrust::optional maximum_pool_size = thrust::nullopt) : pool_memory_resource(cuda::std::addressof(upstream_mr), initial_pool_size, maximum_pool_size) { @@ -286,28 +331,17 @@ class pool_memory_resource final * @param maximum_size The optional maximum size for the pool */ // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) - void initialize_pool(thrust::optional initial_size, - thrust::optional maximum_size) + void initialize_pool(std::size_t initial_size, thrust::optional maximum_size) { - auto const try_size = [&]() { - if (not initial_size.has_value()) { - auto const [free, total] = (get_upstream()->supports_get_mem_info()) - ? get_upstream()->get_mem_info(cuda_stream_legacy) - : rmm::detail::available_device_memory(); - return rmm::detail::align_up(std::min(free, total / 2), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - } - return initial_size.value(); - }(); - current_pool_size_ = 0; // try_to_expand will set this if it succeeds maximum_pool_size_ = maximum_size; - RMM_EXPECTS(try_size <= maximum_pool_size_.value_or(std::numeric_limits::max()), - "Initial pool size exceeds the maximum pool size!"); + RMM_EXPECTS( + initial_size <= maximum_pool_size_.value_or(std::numeric_limits::max()), + "Initial pool size exceeds the maximum pool size!"); - if (try_size > 0) { - auto const block = try_to_expand(try_size, try_size, cuda_stream_legacy); + if (initial_size > 0) { + auto const block = try_to_expand(initial_size, initial_size, cuda_stream_legacy); this->insert_block(block, cuda_stream_legacy); } } From 3d65d4cf36745e4ea86bebd366a23cd43b41ba94 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Tue, 19 Dec 2023 01:17:37 -0800 Subject: [PATCH 06/40] Update all tests and resources to use new pool ctors and util --- .../device_uvector/device_uvector_bench.cu | 9 ++++-- .../random_allocations/random_allocations.cpp | 5 +-- include/rmm/detail/cuda_util.hpp | 31 ------------------- .../mr/device/cuda_async_memory_resource.hpp | 4 +-- include/rmm/mr/device/detail/arena.hpp | 3 +- tests/mr/device/arena_mr_tests.cpp | 8 +++-- tests/mr/device/pool_mr_tests.cpp | 12 +++---- tests/mr/host/pinned_pool_mr_tests.cpp | 2 +- 8 files changed, 25 insertions(+), 49 deletions(-) delete mode 100644 include/rmm/detail/cuda_util.hpp diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu index 454db81a5..95f6edefb 100644 --- a/benchmarks/device_uvector/device_uvector_bench.cu +++ b/benchmarks/device_uvector/device_uvector_bench.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include "../synchronization/synchronization.hpp" +#include #include #include #include @@ -38,7 +39,8 @@ void BM_UvectorSizeConstruction(benchmark::State& state) { rmm::mr::cuda_memory_resource cuda_mr{}; - rmm::mr::pool_memory_resource mr{&cuda_mr}; + rmm::mr::pool_memory_resource mr{ + &cuda_mr, rmm::fraction_of_free_device_memory(1. / 2)}; rmm::mr::set_current_device_resource(&mr); for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) @@ -59,7 +61,8 @@ BENCHMARK(BM_UvectorSizeConstruction) void BM_ThrustVectorSizeConstruction(benchmark::State& state) { rmm::mr::cuda_memory_resource cuda_mr{}; - rmm::mr::pool_memory_resource mr{&cuda_mr}; + rmm::mr::pool_memory_resource mr{ + &cuda_mr, rmm::fraction_of_free_device_memory(1. / 2)}; rmm::mr::set_current_device_resource(&mr); for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index 470442830..acc256365 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include +#include #include #include #include @@ -170,7 +171,7 @@ inline auto make_pool() inline auto make_arena() { - auto free = rmm::detail::available_device_memory().first; + auto free = rmm::available_device_memory().first; constexpr auto reserve{64UL << 20}; // Leave some space for CUDA overhead. return rmm::mr::make_owning_wrapper(make_cuda(), free - reserve); } diff --git a/include/rmm/detail/cuda_util.hpp b/include/rmm/detail/cuda_util.hpp deleted file mode 100644 index 613b8d156..000000000 --- a/include/rmm/detail/cuda_util.hpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include - -namespace rmm::detail { - -/// Gets the available and total device memory in bytes for the current device -inline std::pair available_device_memory() -{ - std::size_t free{}; - std::size_t total{}; - RMM_CUDA_TRY(cudaMemGetInfo(&free, &total)); - return {free, total}; -} - -} // namespace rmm::detail diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp index de31c7dc4..b33f5452b 100644 --- a/include/rmm/mr/device/cuda_async_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -120,7 +120,7 @@ class cuda_async_memory_resource final : public device_memory_resource { pool_handle(), cudaMemPoolReuseAllowOpportunistic, &disabled)); } - auto const [free, total] = rmm::detail::available_device_memory(); + auto const [free, total] = rmm::available_device_memory(); // Need an l-value to take address to pass to cudaMemPoolSetAttribute uint64_t threshold = release_threshold.value_or(total); diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index d8da58493..762a704a8 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -692,7 +693,7 @@ class global_arena final { */ constexpr std::size_t default_size() const { - auto const [free, total] = rmm::detail::available_device_memory(); + auto const [free, total] = rmm::available_device_memory(); return free / 2; } diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 7525cac9f..0add2ac7c 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,8 @@ */ #include "../../byte_literals.hpp" + +#include #include #include #include @@ -487,7 +489,7 @@ TEST_F(ArenaTest, SizeSmallerThanSuperblockSize) // NOLINT TEST_F(ArenaTest, AllocateNinetyPercent) // NOLINT { EXPECT_NO_THROW([]() { // NOLINT(cppcoreguidelines-avoid-goto) - auto const free = rmm::detail::available_device_memory().first; + auto const free = rmm::available_device_memory().first; auto const ninety_percent = rmm::detail::align_up(static_cast(static_cast(free) * 0.9), rmm::detail::CUDA_ALLOCATION_ALIGNMENT); @@ -501,7 +503,7 @@ TEST_F(ArenaTest, SmallMediumLarge) // NOLINT arena_mr mr(rmm::mr::get_current_device_resource()); auto* small = mr.allocate(256); auto* medium = mr.allocate(64_MiB); - auto const free = rmm::detail::available_device_memory().first; + auto const free = rmm::available_device_memory().first; auto* large = mr.allocate(free / 3); mr.deallocate(small, 256); mr.deallocate(medium, 64_MiB); diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp index 2f32889d0..6cbf91b2e 100644 --- a/tests/mr/device/pool_mr_tests.cpp +++ b/tests/mr/device/pool_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,7 +39,7 @@ using limiting_mr = rmm::mr::limiting_resource_adaptor(static_cast(free) * 0.9), @@ -85,7 +85,7 @@ TEST(PoolTest, TwoLargeBuffers) auto two_large = []() { auto const [free, total] = rmm::detail::available_device_memory(); (void)total; - pool_mr mr{rmm::mr::get_current_device_resource()}; + pool_mr mr{rmm::mr::get_current_device_resource(), rmm::fraction_of_free_device_memory(1. / 2)}; auto* ptr1 = mr.allocate(free / 4); auto* ptr2 = mr.allocate(free / 4); mr.deallocate(ptr1, free / 4); @@ -158,8 +158,8 @@ TEST(PoolTest, NonAlignedPoolSize) TEST(PoolTest, UpstreamDoesntSupportMemInfo) { cuda_mr cuda; - pool_mr mr1(&cuda); - pool_mr mr2(&mr1); + pool_mr mr1(&cuda, 0); + pool_mr mr2(&mr1, 0); auto* ptr = mr2.allocate(1024); mr2.deallocate(ptr, 1024); } diff --git a/tests/mr/host/pinned_pool_mr_tests.cpp b/tests/mr/host/pinned_pool_mr_tests.cpp index dcdae37fa..5d07f35bb 100644 --- a/tests/mr/host/pinned_pool_mr_tests.cpp +++ b/tests/mr/host/pinned_pool_mr_tests.cpp @@ -33,7 +33,7 @@ using pool_mr = rmm::mr::pool_memory_resource; TEST(PinnedPoolTest, ThrowOnNullUpstream) { - auto construct_nullptr = []() { pool_mr mr{nullptr}; }; + auto construct_nullptr = []() { pool_mr mr{nullptr, 0}; }; EXPECT_THROW(construct_nullptr(), rmm::logic_error); } From 66d85b497bbf8880e20f55f84eb2216edccb7eb4 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 20 Dec 2023 03:13:56 +0000 Subject: [PATCH 07/40] Rename fraction_of_free_device_memory to percent_of_free_device_memory --- benchmarks/device_uvector/device_uvector_bench.cu | 4 ++-- include/rmm/cuda_device.hpp | 12 ++++++++---- tests/mr/device/pool_mr_tests.cpp | 5 ++--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu index 95f6edefb..0a0942b3f 100644 --- a/benchmarks/device_uvector/device_uvector_bench.cu +++ b/benchmarks/device_uvector/device_uvector_bench.cu @@ -40,7 +40,7 @@ void BM_UvectorSizeConstruction(benchmark::State& state) { rmm::mr::cuda_memory_resource cuda_mr{}; rmm::mr::pool_memory_resource mr{ - &cuda_mr, rmm::fraction_of_free_device_memory(1. / 2)}; + &cuda_mr, rmm::percent_of_free_device_memory(1. / 2)}; rmm::mr::set_current_device_resource(&mr); for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) @@ -62,7 +62,7 @@ void BM_ThrustVectorSizeConstruction(benchmark::State& state) { rmm::mr::cuda_memory_resource cuda_mr{}; rmm::mr::pool_memory_resource mr{ - &cuda_mr, rmm::fraction_of_free_device_memory(1. / 2)}; + &cuda_mr, rmm::percent_of_free_device_memory(50)}; rmm::mr::set_current_device_resource(&mr); for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index cf364128e..73c49cd63 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -117,15 +117,19 @@ inline std::pair available_device_memory() } /** - * @brief Returns the specified fraction of free device memory on the current CUDA device, aligned - * to the nearest CUDA allocation size. + * @brief Returns the approximate specified percent of free device memory on the current CUDA + * device, aligned to the nearest CUDA allocation size. * - * @return std::size_t The recommended initial device memory pool size in bytes. + * @param percent The percent of free memory to return. Defaults to 50%. + * + * @return The recommended initial device memory pool size in bytes. */ -inline std::size_t fraction_of_free_device_memory(double fraction = 1. / 2) +inline std::size_t percent_of_free_device_memory(int percent = 50) { auto const [free, total] = rmm::available_device_memory(); + double fraction = static_cast(percent) / 100; + return rmm::detail::align_up( std::min(free, static_cast(static_cast(total) * fraction)), rmm::detail::CUDA_ALLOCATION_ALIGNMENT); diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp index 6cbf91b2e..91ef3a6c0 100644 --- a/tests/mr/device/pool_mr_tests.cpp +++ b/tests/mr/device/pool_mr_tests.cpp @@ -16,7 +16,6 @@ #include #include -#include #include #include #include @@ -83,9 +82,9 @@ TEST(PoolTest, AllocateNinetyPercent) TEST(PoolTest, TwoLargeBuffers) { auto two_large = []() { - auto const [free, total] = rmm::detail::available_device_memory(); + auto const [free, total] = rmm::available_device_memory(); (void)total; - pool_mr mr{rmm::mr::get_current_device_resource(), rmm::fraction_of_free_device_memory(1. / 2)}; + pool_mr mr{rmm::mr::get_current_device_resource(), rmm::percent_of_free_device_memory(50)}; auto* ptr1 = mr.allocate(free / 4); auto* ptr2 = mr.allocate(free / 4); mr.deallocate(ptr1, free / 4); From 265de9bce69993bfeacd7eb8529be0e250410c18 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 20 Dec 2023 03:14:21 +0000 Subject: [PATCH 08/40] clang-tidy Ignore 50 and 100 magic numbers --- .clang-tidy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 9b3f844c9..70a0bea16 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -62,8 +62,8 @@ CheckOptions: value: 'alignment' - key: cppcoreguidelines-avoid-magic-numbers.IgnorePowersOf2IntegerValues value: '1' - - key: readability-magic-numbers.IgnorePowersOf2IntegerValues - value: '1' + - key: cppcoreguidelines-avoid-magic-numbers.IgnoredIntegerValues + value: "0;1;2;3;4;50;100" - key: cppcoreguidelines-avoid-do-while.IgnoreMacros value: 'true' ... From 0be364b28747a9d935476f80a1e03cbc11b1596e Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 20 Dec 2023 04:01:19 +0000 Subject: [PATCH 09/40] Remove straggler includes of removed file. --- include/rmm/mr/device/cuda_async_view_memory_resource.hpp | 3 +-- include/rmm/mr/device/detail/arena.hpp | 1 - include/rmm/mr/device/pool_memory_resource.hpp | 1 - tests/mr/device/arena_mr_tests.cpp | 1 - tests/mr/host/pinned_pool_mr_tests.cpp | 1 - 5 files changed, 1 insertion(+), 6 deletions(-) diff --git a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp index 825fcab1e..784fcf7d6 100644 --- a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ #include #include -#include #include #include #include diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 762a704a8..418aaa5bd 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index 4b2ffd417..bd772cc83 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -17,7 +17,6 @@ #include #include -#include #include #include #include diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 0add2ac7c..066a3abbe 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/tests/mr/host/pinned_pool_mr_tests.cpp b/tests/mr/host/pinned_pool_mr_tests.cpp index 5d07f35bb..2e798e7e9 100644 --- a/tests/mr/host/pinned_pool_mr_tests.cpp +++ b/tests/mr/host/pinned_pool_mr_tests.cpp @@ -16,7 +16,6 @@ #include #include -#include #include #include #include From 5d66f401a8dad52079e262c6b0c8cf44febd1c3b Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 20 Dec 2023 03:37:35 -0800 Subject: [PATCH 10/40] Another missed include. --- include/rmm/mr/device/cuda_async_memory_resource.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp index b33f5452b..ac126c4a6 100644 --- a/include/rmm/mr/device/cuda_async_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp @@ -17,7 +17,6 @@ #include #include -#include #include #include #include From fae5b736d6c11d6c70a0993ae5789f093f36806b Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Mon, 8 Jan 2024 17:22:28 -0800 Subject: [PATCH 11/40] Add detail::available_device_memory back as an alias of rmm::available_device_memory --- include/rmm/cuda_device.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index 73c49cd63..f1976b4c7 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -116,6 +116,12 @@ inline std::pair available_device_memory() return {free, total}; } +// TODO: temporary alias for backward compatibility. Remove once dependent libraries like cuGraph +// and cuDF are fixed to not use the old `rmm::defail::available_device_memory` function. +namespace detail { +const auto available_device_memory = rmm::available_device_memory; +} + /** * @brief Returns the approximate specified percent of free device memory on the current CUDA * device, aligned to the nearest CUDA allocation size. From 2acf7591826d8f2a412bb43cb261b1ecf54d4c0b Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Mon, 8 Jan 2024 19:39:00 -0800 Subject: [PATCH 12/40] copyright --- benchmarks/device_uvector/device_uvector_bench.cu | 2 +- benchmarks/random_allocations/random_allocations.cpp | 2 +- include/rmm/cuda_device.hpp | 2 +- include/rmm/mr/device/cuda_async_memory_resource.hpp | 2 +- include/rmm/mr/device/cuda_async_view_memory_resource.hpp | 2 +- include/rmm/mr/device/detail/arena.hpp | 2 +- include/rmm/mr/device/pool_memory_resource.hpp | 2 +- tests/mr/device/arena_mr_tests.cpp | 2 +- tests/mr/device/pool_mr_tests.cpp | 2 +- tests/mr/host/pinned_pool_mr_tests.cpp | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu index 0a0942b3f..12256ef3a 100644 --- a/benchmarks/device_uvector/device_uvector_bench.cu +++ b/benchmarks/device_uvector/device_uvector_bench.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index acc256365..ed8ef1074 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index f1976b4c7..fce931275 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp index ac126c4a6..f8295c6f6 100644 --- a/include/rmm/mr/device/cuda_async_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp index 784fcf7d6..562944669 100644 --- a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 418aaa5bd..a868db21e 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index bd772cc83..6dc61a8a7 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 066a3abbe..3168252ea 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp index 91ef3a6c0..46e0ee988 100644 --- a/tests/mr/device/pool_mr_tests.cpp +++ b/tests/mr/device/pool_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/tests/mr/host/pinned_pool_mr_tests.cpp b/tests/mr/host/pinned_pool_mr_tests.cpp index 2e798e7e9..6efe08463 100644 --- a/tests/mr/host/pinned_pool_mr_tests.cpp +++ b/tests/mr/host/pinned_pool_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From b6edcd196d88d7415442ee8a7ec103e920239909 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Tue, 9 Jan 2024 05:50:28 +0000 Subject: [PATCH 13/40] Rename file to match class and remove default alignment from some allocation/deallocate functions. --- ...ed_memory_resource.hpp => pinned_host_memory_resource.hpp} | 4 ++-- tests/mr/device/mr_test.hpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) rename include/rmm/mr/{host_pinned_memory_resource.hpp => pinned_host_memory_resource.hpp} (98%) diff --git a/include/rmm/mr/host_pinned_memory_resource.hpp b/include/rmm/mr/pinned_host_memory_resource.hpp similarity index 98% rename from include/rmm/mr/host_pinned_memory_resource.hpp rename to include/rmm/mr/pinned_host_memory_resource.hpp index 46be47d9a..fadf241fc 100644 --- a/include/rmm/mr/host_pinned_memory_resource.hpp +++ b/include/rmm/mr/pinned_host_memory_resource.hpp @@ -161,8 +161,8 @@ class pinned_host_memory_resource { */ static void deallocate_async(void* ptr, std::size_t bytes, - std::size_t alignment = rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT, - [[maybe_unused]] cuda::stream_ref stream = {}) noexcept + std::size_t alignment, + [[maybe_unused]] cuda::stream_ref stream) noexcept { return deallocate(ptr, bytes, alignment); } diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index fb9b9dd67..0cc774a64 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include From 782ff55263bd9946b88df88fea30ec26e01a437c Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Tue, 9 Jan 2024 06:05:31 +0000 Subject: [PATCH 14/40] document (and deprecate) available_device_memory alias --- include/rmm/cuda_device.hpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index fce931275..c7f94dc37 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -116,11 +116,19 @@ inline std::pair available_device_memory() return {free, total}; } -// TODO: temporary alias for backward compatibility. Remove once dependent libraries like cuGraph -// and cuDF are fixed to not use the old `rmm::defail::available_device_memory` function. namespace detail { + +/** + * @brief Returns the available and total device memory in bytes for the current device + * + * @deprecated Use rmm::available_device_memory instead. + * + * @return The available and total device memory in bytes for the current device as a std::pair. + */ +[[deprecated("Use `rmm::available_device_memory` instead.")]] // const auto available_device_memory = rmm::available_device_memory; -} + +} // namespace detail /** * @brief Returns the approximate specified percent of free device memory on the current CUDA From ce58ff5bf67ddbcbfab42d9ad1fea38081b31526 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Tue, 9 Jan 2024 08:21:27 +0000 Subject: [PATCH 15/40] Add documentation for alignment params --- include/rmm/mr/pinned_host_memory_resource.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/rmm/mr/pinned_host_memory_resource.hpp b/include/rmm/mr/pinned_host_memory_resource.hpp index fadf241fc..d26481847 100644 --- a/include/rmm/mr/pinned_host_memory_resource.hpp +++ b/include/rmm/mr/pinned_host_memory_resource.hpp @@ -52,6 +52,8 @@ class pinned_host_memory_resource { * reason. * * @param bytes The size, in bytes, of the allocation. + * @param alignment Alignment in bytes. Default alignment is used if unspecified. + * * @return Pointer to the newly allocated memory. */ static void* allocate( @@ -72,6 +74,7 @@ class pinned_host_memory_resource { * * @param ptr Pointer to be deallocated. * @param bytes Size of the allocation. + * @param alignment Alignment in bytes. Default alignment is used if unspecified. */ static void deallocate( void* ptr, From 0b4c968f5c9d72238d3e9e8d23de698f2de11891 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Tue, 9 Jan 2024 10:13:37 +0000 Subject: [PATCH 16/40] Respond to feedback from @wence- --- benchmarks/device_uvector/device_uvector_bench.cu | 2 +- include/rmm/cuda_device.hpp | 7 ++++--- include/rmm/mr/device/pool_memory_resource.hpp | 13 ++++++------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu index 12256ef3a..8b7f9a5ba 100644 --- a/benchmarks/device_uvector/device_uvector_bench.cu +++ b/benchmarks/device_uvector/device_uvector_bench.cu @@ -40,7 +40,7 @@ void BM_UvectorSizeConstruction(benchmark::State& state) { rmm::mr::cuda_memory_resource cuda_mr{}; rmm::mr::pool_memory_resource mr{ - &cuda_mr, rmm::percent_of_free_device_memory(1. / 2)}; + &cuda_mr, rmm::percent_of_free_device_memory(50)}; rmm::mr::set_current_device_resource(&mr); for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index c7f94dc37..05028fe9f 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -131,8 +131,9 @@ const auto available_device_memory = rmm::available_device_memory; } // namespace detail /** - * @brief Returns the approximate specified percent of free device memory on the current CUDA - * device, aligned to the nearest CUDA allocation size. + * @brief Returns the approximate specified percent of total device memory on the current CUDA + * device or the total free device memory (whichever is smaller), aligned to the nearest CUDA + * allocation size. * * @param percent The percent of free memory to return. Defaults to 50%. * @@ -142,7 +143,7 @@ inline std::size_t percent_of_free_device_memory(int percent = 50) { auto const [free, total] = rmm::available_device_memory(); - double fraction = static_cast(percent) / 100; + double const fraction = static_cast(percent) / 100.0; return rmm::detail::align_up( std::min(free, static_cast(static_cast(total) * fraction)), diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index 6dc61a8a7..0e6f8ecbe 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -113,6 +113,8 @@ class pool_memory_resource final * @brief [DEPRECATED] Construct a `pool_memory_resource` and allocate the initial device memory * pool using `upstream_mr`. * + * @deprecated Use the constructor that takes an explicit initial pool size instead. + * * @throws rmm::logic_error if `upstream_mr == nullptr` * @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a * multiple of pool_memory_resource::allocation_alignment bytes. @@ -136,6 +138,8 @@ class pool_memory_resource final * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using * `upstream_mr`. * + * @deprecated Use the constructor that takes an explicit initial size instead. + * * @throws rmm::logic_error if `upstream_mr == nullptr` * @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a * multiple of pool_memory_resource::allocation_alignment bytes. @@ -320,16 +324,11 @@ class pool_memory_resource final /** * @brief Allocate initial memory for the pool * - * If initial_size is unset, then queries the upstream memory resource for available memory if - * upstream supports `get_mem_info`, or queries the device (using CUDA API) for available memory - * if not. Then attempts to initialize to half the available memory. - * - * If initial_size is set, then tries to initialize the pool to that size. - * * @param initial_size The optional initial size for the pool * @param maximum_size The optional maximum size for the pool + * + * @throws logic_error if @p initial_size is larger than @p maximum_size (if set). */ - // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) void initialize_pool(std::size_t initial_size, thrust::optional maximum_size) { current_pool_size_ = 0; // try_to_expand will set this if it succeeds From 4f9147846c706febb9b72060ead8720d9bce8972 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Tue, 9 Jan 2024 13:11:57 +0000 Subject: [PATCH 17/40] Include doxygen deprecated output in docs --- python/docs/librmm_docs/deprecated.rst | 5 +++++ python/docs/librmm_docs/index.rst | 1 + 2 files changed, 6 insertions(+) create mode 100644 python/docs/librmm_docs/deprecated.rst diff --git a/python/docs/librmm_docs/deprecated.rst b/python/docs/librmm_docs/deprecated.rst new file mode 100644 index 000000000..b5ed90caa --- /dev/null +++ b/python/docs/librmm_docs/deprecated.rst @@ -0,0 +1,5 @@ +Deprecated functionality +======================== + +.. doxygenpage:: deprecated + :content-only: diff --git a/python/docs/librmm_docs/index.rst b/python/docs/librmm_docs/index.rst index 6afd94d2e..ba8034dcb 100644 --- a/python/docs/librmm_docs/index.rst +++ b/python/docs/librmm_docs/index.rst @@ -17,6 +17,7 @@ librmm Documentation cuda_streams errors logging + deprecated .. doxygennamespace:: rmm From f5818091c5a7090a6f4b1a2892c1908e0689c065 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Tue, 9 Jan 2024 13:12:31 +0000 Subject: [PATCH 18/40] Minor docstring fixes --- include/rmm/cuda_device.hpp | 2 +- include/rmm/mr/device/pool_memory_resource.hpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index 05028fe9f..ebf50d830 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -121,7 +121,7 @@ namespace detail { /** * @brief Returns the available and total device memory in bytes for the current device * - * @deprecated Use rmm::available_device_memory instead. + * @deprecated Use rmm::available_device_memory() instead. * * @return The available and total device memory in bytes for the current device as a std::pair. */ diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index 0e6f8ecbe..b30e70a65 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -110,7 +110,7 @@ class pool_memory_resource final detail::coalescing_free_list>; /** - * @brief [DEPRECATED] Construct a `pool_memory_resource` and allocate the initial device memory + * @brief Construct a `pool_memory_resource` and allocate the initial device memory * pool using `upstream_mr`. * * @deprecated Use the constructor that takes an explicit initial pool size instead. @@ -138,7 +138,7 @@ class pool_memory_resource final * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using * `upstream_mr`. * - * @deprecated Use the constructor that takes an explicit initial size instead. + * @deprecated Use the constructor that takes an explicit initial pool size instead. * * @throws rmm::logic_error if `upstream_mr == nullptr` * @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a From bafd70aa464f10e2b3e6745465f6b3198ba77e01 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 10 Jan 2024 03:14:30 +0000 Subject: [PATCH 19/40] Don't use zero for default size in test. --- tests/mr/host/pinned_pool_mr_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/mr/host/pinned_pool_mr_tests.cpp b/tests/mr/host/pinned_pool_mr_tests.cpp index 6efe08463..d10b85e72 100644 --- a/tests/mr/host/pinned_pool_mr_tests.cpp +++ b/tests/mr/host/pinned_pool_mr_tests.cpp @@ -32,7 +32,7 @@ using pool_mr = rmm::mr::pool_memory_resource; TEST(PinnedPoolTest, ThrowOnNullUpstream) { - auto construct_nullptr = []() { pool_mr mr{nullptr, 0}; }; + auto construct_nullptr = []() { pool_mr mr{nullptr, 1024}; }; EXPECT_THROW(construct_nullptr(), rmm::logic_error); } From a77d215bb62d402c50eb85de33e2e0df57538bfc Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 10 Jan 2024 03:14:45 +0000 Subject: [PATCH 20/40] Add non-detail alignment utilities --- include/rmm/aligned.hpp | 94 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 include/rmm/aligned.hpp diff --git a/include/rmm/aligned.hpp b/include/rmm/aligned.hpp new file mode 100644 index 000000000..bc3c2a623 --- /dev/null +++ b/include/rmm/aligned.hpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace rmm { + +/** + * @brief Default alignment used for host memory allocated by RMM. + * + */ +static constexpr std::size_t RMM_DEFAULT_HOST_ALIGNMENT{alignof(std::max_align_t)}; + +/** + * @brief Default alignment used for CUDA memory allocation. + * + */ +static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; + +/** + * @brief Returns whether or not `n` is a power of 2. + * + */ +constexpr bool is_pow2(std::size_t value) { return (0 == (value & (value - 1))); } + +/** + * @brief Returns whether or not `alignment` is a valid memory alignment. + * + */ +constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(alignment); } + +/** + * @brief Align up to nearest multiple of specified power of 2 + * + * @param[in] v value to align + * @param[in] alignment amount, in bytes, must be a power of 2 + * + * @return Return the aligned value, as one would expect + */ +constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept +{ + assert(is_supported_alignment(alignment)); + return (value + (alignment - 1)) & ~(alignment - 1); +} + +/** + * @brief Align down to the nearest multiple of specified power of 2 + * + * @param[in] v value to align + * @param[in] alignment amount, in bytes, must be a power of 2 + * + * @return Return the aligned value, as one would expect + */ +constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept +{ + assert(is_supported_alignment(alignment)); + return value & ~(alignment - 1); +} + +/** + * @brief Checks whether a value is aligned to a multiple of a specified power of 2 + * + * @param[in] v value to check for alignment + * @param[in] alignment amount, in bytes, must be a power of 2 + * + * @return true if aligned + */ +constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept +{ + assert(is_supported_alignment(alignment)); + return value == align_down(value, alignment); +} + +inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) +{ + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return is_aligned(reinterpret_cast(ptr), alignment); +} + +} // namespace rmm From 07dffa391328a7f3724c221dd83384d93e8bef09 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 10 Jan 2024 03:38:37 +0000 Subject: [PATCH 21/40] Duplicate (for now) alignment utilities in rmm:: namespace since outside libraries use them --- include/rmm/aligned.hpp | 8 +++++--- include/rmm/cuda_device.hpp | 6 +++--- include/rmm/detail/aligned.hpp | 8 ++++---- .../mr/device/aligned_resource_adaptor.hpp | 18 +++++++++--------- .../rmm/mr/device/arena_memory_resource.hpp | 7 ++++--- include/rmm/mr/device/detail/arena.hpp | 6 +++--- .../detail/stream_ordered_memory_resource.hpp | 8 ++++---- .../mr/device/fixed_size_memory_resource.hpp | 9 ++++----- .../mr/device/limiting_resource_adaptor.hpp | 10 +++++----- .../rmm/mr/device/pool_memory_resource.hpp | 19 +++++++++---------- include/rmm/mr/host/new_delete_resource.hpp | 12 ++++++------ .../rmm/mr/host/pinned_memory_resource.hpp | 10 +++++----- tests/mr/device/aligned_mr_tests.cpp | 7 ++++--- tests/mr/device/arena_mr_tests.cpp | 9 ++++----- tests/mr/device/mr_ref_test.hpp | 16 ++++++++-------- tests/mr/device/mr_test.hpp | 12 ++++++------ tests/mr/device/pool_mr_tests.cpp | 7 +++---- tests/mr/host/mr_ref_tests.cpp | 6 +++--- tests/mr/host/mr_tests.cpp | 6 +++--- 19 files changed, 92 insertions(+), 92 deletions(-) diff --git a/include/rmm/aligned.hpp b/include/rmm/aligned.hpp index bc3c2a623..ce762094f 100644 --- a/include/rmm/aligned.hpp +++ b/include/rmm/aligned.hpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#pragma once + #include #include @@ -46,7 +48,7 @@ constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(al /** * @brief Align up to nearest multiple of specified power of 2 * - * @param[in] v value to align + * @param[in] value value to align * @param[in] alignment amount, in bytes, must be a power of 2 * * @return Return the aligned value, as one would expect @@ -60,7 +62,7 @@ constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcep /** * @brief Align down to the nearest multiple of specified power of 2 * - * @param[in] v value to align + * @param[in] value value to align * @param[in] alignment amount, in bytes, must be a power of 2 * * @return Return the aligned value, as one would expect @@ -74,7 +76,7 @@ constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexc /** * @brief Checks whether a value is aligned to a multiple of a specified power of 2 * - * @param[in] v value to check for alignment + * @param[in] value value to check for alignment * @param[in] alignment amount, in bytes, must be a power of 2 * * @return true if aligned diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index 05028fe9f..89fa1479f 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -145,9 +145,9 @@ inline std::size_t percent_of_free_device_memory(int percent = 50) double const fraction = static_cast(percent) / 100.0; - return rmm::detail::align_up( + return rmm::align_up( std::min(free, static_cast(static_cast(total) * fraction)), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + rmm::CUDA_ALLOCATION_ALIGNMENT); } /** diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index 321be53b5..5b9c11474 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,7 +51,7 @@ constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(al /** * @brief Align up to nearest multiple of specified power of 2 * - * @param[in] v value to align + * @param[in] value value to align * @param[in] alignment amount, in bytes, must be a power of 2 * * @return Return the aligned value, as one would expect @@ -65,7 +65,7 @@ constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcep /** * @brief Align down to the nearest multiple of specified power of 2 * - * @param[in] v value to align + * @param[in] value value to align * @param[in] alignment amount, in bytes, must be a power of 2 * * @return Return the aligned value, as one would expect @@ -79,7 +79,7 @@ constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexc /** * @brief Checks whether a value is aligned to a multiple of a specified power of 2 * - * @param[in] v value to check for alignment + * @param[in] value value to check for alignment * @param[in] alignment amount, in bytes, must be a power of 2 * * @return true if aligned diff --git a/include/rmm/mr/device/aligned_resource_adaptor.hpp b/include/rmm/mr/device/aligned_resource_adaptor.hpp index 05e9915cc..be7c3036c 100644 --- a/include/rmm/mr/device/aligned_resource_adaptor.hpp +++ b/include/rmm/mr/device/aligned_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ */ #pragma once +#include #include -#include #include #include @@ -65,12 +65,12 @@ class aligned_resource_adaptor final : public device_memory_resource { * are aligned. */ explicit aligned_resource_adaptor(Upstream* upstream, - std::size_t alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT, + std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT, std::size_t alignment_threshold = default_alignment_threshold) : upstream_{upstream}, alignment_{alignment}, alignment_threshold_{alignment_threshold} { RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); - RMM_EXPECTS(rmm::detail::is_supported_alignment(alignment), + RMM_EXPECTS(rmm::is_supported_alignment(alignment), "Allocation alignment is not a power of 2."); } @@ -127,14 +127,14 @@ class aligned_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - if (alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { + if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { return upstream_->allocate(bytes, stream); } auto const size = upstream_allocation_size(bytes); void* pointer = upstream_->allocate(size, stream); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) auto const address = reinterpret_cast(pointer); - auto const aligned_address = rmm::detail::align_up(address, alignment_); + auto const aligned_address = rmm::align_up(address, alignment_); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) void* aligned_pointer = reinterpret_cast(aligned_address); if (pointer != aligned_pointer) { @@ -153,7 +153,7 @@ class aligned_resource_adaptor final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - if (alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { + if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { upstream_->deallocate(ptr, bytes, stream); } else { { @@ -208,8 +208,8 @@ class aligned_resource_adaptor final : public device_memory_resource { */ std::size_t upstream_allocation_size(std::size_t bytes) const { - auto const aligned_size = rmm::detail::align_up(bytes, alignment_); - return aligned_size + alignment_ - rmm::detail::CUDA_ALLOCATION_ALIGNMENT; + auto const aligned_size = rmm::align_up(bytes, alignment_); + return aligned_size + alignment_ - rmm::CUDA_ALLOCATION_ALIGNMENT; } Upstream* upstream_; ///< The upstream resource used for satisfying allocation requests diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 929b8454f..1b821b440 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include #include @@ -145,7 +146,7 @@ class arena_memory_resource final : public device_memory_resource { #ifdef RMM_ARENA_USE_SIZE_CLASSES bytes = rmm::mr::detail::arena::align_to_size_class(bytes); #else - bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + bytes = rmm::align_up(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT); #endif auto& arena = get_arena(stream); @@ -195,7 +196,7 @@ class arena_memory_resource final : public device_memory_resource { #ifdef RMM_ARENA_USE_SIZE_CLASSES bytes = rmm::mr::detail::arena::align_to_size_class(bytes); #else - bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + bytes = rmm::align_up(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT); #endif auto& arena = get_arena(stream); diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index a868db21e..c7965ca34 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -16,9 +16,9 @@ #pragma once +#include #include #include -#include #include #include #include @@ -508,8 +508,8 @@ class global_arena final { : upstream_mr_{upstream_mr} { RMM_EXPECTS(nullptr != upstream_mr_, "Unexpected null upstream pointer."); - auto const size = rmm::detail::align_down(arena_size.value_or(default_size()), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + auto const size = + rmm::align_down(arena_size.value_or(default_size()), rmm::CUDA_ALLOCATION_ALIGNMENT); RMM_EXPECTS(size >= superblock::minimum_size, "Arena size smaller than minimum superblock size."); initialize(size); diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp index a57bf1c6d..1d6829cb5 100644 --- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp +++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ */ #pragma once +#include #include -#include #include #include #include @@ -207,7 +207,7 @@ class stream_ordered_memory_resource : public crtp, public device_ auto stream_event = get_event(stream); - size = rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + size = rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT); RMM_EXPECTS(size <= this->underlying().get_maximum_allocation_size(), "Maximum allocation size exceeded", rmm::out_of_memory); @@ -241,7 +241,7 @@ class stream_ordered_memory_resource : public crtp, public device_ lock_guard lock(mtx_); auto stream_event = get_event(stream); - size = rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + size = rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT); auto const block = this->underlying().free_block(ptr, size); // TODO: cudaEventRecord has significant overhead on deallocations. For the non-PTDS case diff --git a/include/rmm/mr/device/fixed_size_memory_resource.hpp b/include/rmm/mr/device/fixed_size_memory_resource.hpp index 01fb8a6bc..91cc95c53 100644 --- a/include/rmm/mr/device/fixed_size_memory_resource.hpp +++ b/include/rmm/mr/device/fixed_size_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ */ #pragma once +#include #include -#include #include #include #include @@ -77,7 +77,7 @@ class fixed_size_memory_resource std::size_t block_size = default_block_size, std::size_t blocks_to_preallocate = default_blocks_to_preallocate) : upstream_mr_{upstream_mr}, - block_size_{rmm::detail::align_up(block_size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT)}, + block_size_{rmm::align_up(block_size, rmm::CUDA_ALLOCATION_ALIGNMENT)}, upstream_chunk_size_{block_size * blocks_to_preallocate} { // allocate initial blocks and insert into free list @@ -207,8 +207,7 @@ class fixed_size_memory_resource { // Deallocating a fixed-size block just inserts it in the free list, which is // handled by the parent class - RMM_LOGGING_ASSERT(rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT) <= - block_size_); + RMM_LOGGING_ASSERT(rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT) <= block_size_); return block_type{ptr}; } diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp index 6573956d0..2123c3cac 100644 --- a/include/rmm/mr/device/limiting_resource_adaptor.hpp +++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -54,7 +54,7 @@ class limiting_resource_adaptor final : public device_memory_resource { */ limiting_resource_adaptor(Upstream* upstream, std::size_t allocation_limit, - std::size_t alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT) + std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT) : allocation_limit_{allocation_limit}, allocated_bytes_(0), alignment_(alignment), @@ -134,7 +134,7 @@ class limiting_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - auto const proposed_size = rmm::detail::align_up(bytes, alignment_); + auto const proposed_size = rmm::align_up(bytes, alignment_); auto const old = allocated_bytes_.fetch_add(proposed_size); if (old + proposed_size <= allocation_limit_) { try { @@ -158,7 +158,7 @@ class limiting_resource_adaptor final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - std::size_t allocated_size = rmm::detail::align_up(bytes, alignment_); + std::size_t allocated_size = rmm::align_up(bytes, alignment_); upstream_->deallocate(ptr, bytes, stream); allocated_bytes_ -= allocated_size; } diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index 0e6f8ecbe..964470695 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -15,8 +15,8 @@ */ #pragma once +#include #include -#include #include #include #include @@ -126,7 +126,7 @@ class pool_memory_resource final * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all * of the available memory from the upstream resource. */ - [[deprecated("Must specify initial_pool_size")]] // + //[[deprecated("Must specify initial_pool_size")]] // explicit pool_memory_resource(Upstream* upstream_mr, thrust::optional initial_pool_size = thrust::nullopt, thrust::optional maximum_pool_size = thrust::nullopt) @@ -153,7 +153,7 @@ class pool_memory_resource final */ template , int> = 0> - [[deprecated("Must specify initial_pool_size")]] // + //[[deprecated("Must specify initial_pool_size")]] // explicit pool_memory_resource(Upstream2& upstream_mr, thrust::optional initial_pool_size = thrust::nullopt, thrust::optional maximum_pool_size = thrust::nullopt) @@ -184,10 +184,9 @@ class pool_memory_resource final return upstream_mr; }()} { - RMM_EXPECTS(rmm::detail::is_aligned(initial_pool_size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT), + RMM_EXPECTS(rmm::is_aligned(initial_pool_size, rmm::CUDA_ALLOCATION_ALIGNMENT), "Error, Initial pool size required to be a multiple of 256 bytes"); - RMM_EXPECTS(rmm::detail::is_aligned(maximum_pool_size.value_or(0), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT), + RMM_EXPECTS(rmm::is_aligned(maximum_pool_size.value_or(0), rmm::CUDA_ALLOCATION_ALIGNMENT), "Error, Maximum pool size required to be a multiple of 256 bytes"); initialize_pool(initial_pool_size, maximum_pool_size); @@ -378,9 +377,9 @@ class pool_memory_resource final { if (maximum_pool_size_.has_value()) { auto const unaligned_remaining = maximum_pool_size_.value() - pool_size(); - using rmm::detail::align_up; - auto const remaining = align_up(unaligned_remaining, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - auto const aligned_size = align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + using rmm::align_up; + auto const remaining = align_up(unaligned_remaining, rmm::CUDA_ALLOCATION_ALIGNMENT); + auto const aligned_size = align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT); return (aligned_size <= remaining) ? std::max(aligned_size, remaining / 2) : 0; } return std::max(size, pool_size()); @@ -448,7 +447,7 @@ class pool_memory_resource final RMM_LOGGING_ASSERT(iter != allocated_blocks_.end()); auto block = *iter; - RMM_LOGGING_ASSERT(block.size() == rmm::detail::align_up(size, allocation_alignment)); + RMM_LOGGING_ASSERT(block.size() == rmm::align_up(size, allocation_alignment)); allocated_blocks_.erase(iter); return block; diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp index 044f74063..4bb272df3 100644 --- a/include/rmm/mr/host/new_delete_resource.hpp +++ b/include/rmm/mr/host/new_delete_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include +#include #include #include @@ -58,12 +59,11 @@ class new_delete_resource final : public host_memory_resource { * @return Pointer to the newly allocated memory */ void* do_allocate(std::size_t bytes, - std::size_t alignment = rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT) override + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) override { // If the requested alignment isn't supported, use default - alignment = (rmm::detail::is_supported_alignment(alignment)) - ? alignment - : rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT; + alignment = + (rmm::is_supported_alignment(alignment)) ? alignment : rmm::RMM_DEFAULT_HOST_ALIGNMENT; return rmm::detail::aligned_allocate( bytes, alignment, [](std::size_t size) { return ::operator new(size); }); @@ -84,7 +84,7 @@ class new_delete_resource final : public host_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, - std::size_t alignment = rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT) override + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) override { rmm::detail::aligned_deallocate( ptr, bytes, alignment, [](void* ptr) { ::operator delete(ptr); }); diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp index e49767faf..b5c273ef5 100644 --- a/include/rmm/mr/host/pinned_memory_resource.hpp +++ b/include/rmm/mr/host/pinned_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include #include @@ -114,7 +115,7 @@ class pinned_memory_resource final : public host_memory_resource { */ void deallocate_async(void* ptr, std::size_t bytes, std::size_t alignment, cuda_stream_view) { - do_deallocate(ptr, rmm::detail::align_up(bytes, alignment)); + do_deallocate(ptr, rmm::align_up(bytes, alignment)); } /** @@ -143,9 +144,8 @@ class pinned_memory_resource final : public host_memory_resource { if (0 == bytes) { return nullptr; } // If the requested alignment isn't supported, use default - alignment = (rmm::detail::is_supported_alignment(alignment)) - ? alignment - : rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT; + alignment = + (rmm::is_supported_alignment(alignment)) ? alignment : rmm::RMM_DEFAULT_HOST_ALIGNMENT; return rmm::detail::aligned_allocate(bytes, alignment, [](std::size_t size) { void* ptr{nullptr}; diff --git a/tests/mr/device/aligned_mr_tests.cpp b/tests/mr/device/aligned_mr_tests.cpp index dfcdfa72f..5fbb4b8f1 100644 --- a/tests/mr/device/aligned_mr_tests.cpp +++ b/tests/mr/device/aligned_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,8 @@ */ #include "../../mock_resource.hpp" -#include + +#include #include #include #include @@ -223,7 +224,7 @@ TEST(AlignedTest, AlignRealPointer) auto const threshold{65536}; aligned_real mr{rmm::mr::get_current_device_resource(), alignment, threshold}; void* alloc = mr.allocate(threshold); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc, alignment)); + EXPECT_TRUE(rmm::is_pointer_aligned(alloc, alignment)); mr.deallocate(alloc, threshold); } diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 3168252ea..1068e0cf0 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -16,9 +16,9 @@ #include "../../byte_literals.hpp" +#include #include #include -#include #include #include #include @@ -488,10 +488,9 @@ TEST_F(ArenaTest, SizeSmallerThanSuperblockSize) // NOLINT TEST_F(ArenaTest, AllocateNinetyPercent) // NOLINT { EXPECT_NO_THROW([]() { // NOLINT(cppcoreguidelines-avoid-goto) - auto const free = rmm::available_device_memory().first; - auto const ninety_percent = - rmm::detail::align_up(static_cast(static_cast(free) * 0.9), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + auto const free = rmm::available_device_memory().first; + auto const ninety_percent = rmm::align_up( + static_cast(static_cast(free) * 0.9), rmm::CUDA_ALLOCATION_ALIGNMENT); arena_mr mr(rmm::mr::get_current_device_resource(), ninety_percent); }()); } diff --git a/tests/mr/device/mr_ref_test.hpp b/tests/mr/device/mr_ref_test.hpp index 804c710a5..78f290f41 100644 --- a/tests/mr/device/mr_ref_test.hpp +++ b/tests/mr/device/mr_ref_test.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,9 +18,9 @@ #include "../../byte_literals.hpp" +#include #include #include -#include #include #include #include @@ -78,7 +78,7 @@ inline void test_allocate(resource_ref ref, std::size_t bytes) try { void* ptr = ref.allocate(bytes); EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); EXPECT_TRUE(is_device_memory(ptr)); ref.deallocate(ptr, bytes); } catch (rmm::out_of_memory const& e) { @@ -94,7 +94,7 @@ inline void test_allocate_async(async_resource_ref ref, void* ptr = ref.allocate_async(bytes, stream); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); EXPECT_TRUE(is_device_memory(ptr)); ref.deallocate_async(ptr, bytes, stream); if (not stream.is_default()) { stream.synchronize(); } @@ -202,7 +202,7 @@ inline void test_random_allocations(resource_ref ref, alloc.size = distribution(generator); EXPECT_NO_THROW(alloc.ptr = ref.allocate(alloc.size)); EXPECT_NE(nullptr, alloc.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [&ref](allocation& alloc) { @@ -228,7 +228,7 @@ inline void test_random_async_allocations(async_resource_ref ref, EXPECT_NO_THROW(alloc.ptr = ref.allocate(alloc.size)); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, alloc.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [stream, &ref](allocation& alloc) { @@ -269,7 +269,7 @@ inline void test_mixed_random_allocation_free(resource_ref ref, EXPECT_NO_THROW(allocations.emplace_back(ref.allocate(size), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; @@ -316,7 +316,7 @@ inline void test_mixed_random_async_allocation_free(async_resource_ref ref, EXPECT_NO_THROW(allocations.emplace_back(ref.allocate_async(size, stream), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index 03f880e72..11614b3d4 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,9 +18,9 @@ #include "../../byte_literals.hpp" +#include #include #include -#include #include #include #include @@ -74,7 +74,7 @@ inline void test_get_current_device_resource() EXPECT_NE(nullptr, rmm::mr::get_current_device_resource()); void* ptr = rmm::mr::get_current_device_resource()->allocate(1_MiB); EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); EXPECT_TRUE(is_device_memory(ptr)); rmm::mr::get_current_device_resource()->deallocate(ptr, 1_MiB); } @@ -86,7 +86,7 @@ inline void test_allocate(rmm::mr::device_memory_resource* mr, void* ptr = mr->allocate(bytes); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); EXPECT_TRUE(is_device_memory(ptr)); mr->deallocate(ptr, bytes); if (not stream.is_default()) { stream.synchronize(); } @@ -154,7 +154,7 @@ inline void test_random_allocations(rmm::mr::device_memory_resource* mr, EXPECT_NO_THROW(alloc.ptr = mr->allocate(alloc.size, stream)); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, alloc.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [stream, mr](allocation& alloc) { @@ -196,7 +196,7 @@ inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* m EXPECT_NO_THROW(allocations.emplace_back(mr->allocate(size, stream), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp index 46e0ee988..299eee65c 100644 --- a/tests/mr/device/pool_mr_tests.cpp +++ b/tests/mr/device/pool_mr_tests.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ +#include #include -#include #include #include #include @@ -71,9 +71,8 @@ TEST(PoolTest, AllocateNinetyPercent) auto allocate_ninety = []() { auto const [free, total] = rmm::available_device_memory(); (void)total; - auto const ninety_percent_pool = - rmm::detail::align_up(static_cast(static_cast(free) * 0.9), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + auto const ninety_percent_pool = rmm::align_up( + static_cast(static_cast(free) * 0.9), rmm::CUDA_ALLOCATION_ALIGNMENT); pool_mr mr{rmm::mr::get_current_device_resource(), ninety_percent_pool}; }; EXPECT_NO_THROW(allocate_ninety()); diff --git a/tests/mr/host/mr_ref_tests.cpp b/tests/mr/host/mr_ref_tests.cpp index 6563eb635..416641f18 100644 --- a/tests/mr/host/mr_ref_tests.cpp +++ b/tests/mr/host/mr_ref_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #include "../../byte_literals.hpp" -#include +#include #include #include #include @@ -35,7 +35,7 @@ namespace rmm::test { namespace { inline bool is_aligned(void* ptr, std::size_t alignment = alignof(std::max_align_t)) { - return rmm::detail::is_pointer_aligned(ptr, alignment); + return rmm::is_pointer_aligned(ptr, alignment); } // Returns true if a pointer points to a device memory or managed memory allocation. diff --git a/tests/mr/host/mr_tests.cpp b/tests/mr/host/mr_tests.cpp index 678d6aeb8..e0078c920 100644 --- a/tests/mr/host/mr_tests.cpp +++ b/tests/mr/host/mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #include "../../byte_literals.hpp" -#include +#include #include #include #include @@ -35,7 +35,7 @@ namespace rmm::test { namespace { inline bool is_aligned(void* ptr, std::size_t alignment = alignof(std::max_align_t)) { - return rmm::detail::is_pointer_aligned(ptr, alignment); + return rmm::is_pointer_aligned(ptr, alignment); } // Returns true if a pointer points to a device memory or managed memory allocation. From 8afff2d74175e57d09e1dfff2c8cab329fb5ad8b Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 10 Jan 2024 03:39:44 +0000 Subject: [PATCH 22/40] Don't deprecate anything just yet (until cuDF/cuGraph updated) --- include/rmm/cuda_device.hpp | 2 +- include/rmm/mr/device/pool_memory_resource.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index 89fa1479f..473601161 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -125,7 +125,7 @@ namespace detail { * * @return The available and total device memory in bytes for the current device as a std::pair. */ -[[deprecated("Use `rmm::available_device_memory` instead.")]] // +//[[deprecated("Use `rmm::available_device_memory` instead.")]] // const auto available_device_memory = rmm::available_device_memory; } // namespace detail diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index 964470695..500e8029b 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -110,7 +110,7 @@ class pool_memory_resource final detail::coalescing_free_list>; /** - * @brief [DEPRECATED] Construct a `pool_memory_resource` and allocate the initial device memory + * @brief Construct a `pool_memory_resource` and allocate the initial device memory * pool using `upstream_mr`. * * @deprecated Use the constructor that takes an explicit initial pool size instead. From 91752c81321838a875ce9ffd064785f9ecfefa17 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 10 Jan 2024 05:37:03 +0000 Subject: [PATCH 23/40] Make percent_of_free_device_memory do what it says on the tin. --- include/rmm/cuda_device.hpp | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index d782a1d1b..941e7926d 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -131,23 +131,18 @@ const auto available_device_memory = rmm::available_device_memory; } // namespace detail /** - * @brief Returns the approximate specified percent of total device memory on the current CUDA - * device or the total free device memory (whichever is smaller), aligned to the nearest CUDA - * allocation size. + * @brief Returns the approximate specified percent of available device memory on the current CUDA + * device, aligned (down) to the nearest CUDA allocation size. * - * @param percent The percent of free memory to return. Defaults to 50%. + * @param percent The percent of free memory to return. * * @return The recommended initial device memory pool size in bytes. */ -inline std::size_t percent_of_free_device_memory(int percent = 50) +inline std::size_t percent_of_free_device_memory(int percent) { - auto const [free, total] = rmm::available_device_memory(); - - double const fraction = static_cast(percent) / 100.0; - - return rmm::align_up( - std::min(free, static_cast(static_cast(total) * fraction)), - rmm::CUDA_ALLOCATION_ALIGNMENT); + [[maybe_unused]] auto const [free, total] = rmm::available_device_memory(); + return rmm::align_down(static_cast(static_cast(free) * percent / 100.0), + rmm::CUDA_ALLOCATION_ALIGNMENT); } /** From baf429c99df88e9173995d2a39ae0d5573ae6755 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 10 Jan 2024 05:46:40 +0000 Subject: [PATCH 24/40] Fix remaining uses of pool ctor in docs and code --- README.md | 10 +++++++--- .../multi_stream_allocations_bench.cu | 6 ++++-- benchmarks/random_allocations/random_allocations.cpp | 3 ++- benchmarks/replay/replay.cpp | 4 ++-- include/rmm/mr/device/device_memory_resource.hpp | 8 +++++--- include/rmm/mr/device/per_device_resource.hpp | 12 +++++++++++- tests/mr/device/mr_ref_test.hpp | 4 +++- tests/mr/device/mr_test.hpp | 4 +++- tests/mr/device/pool_mr_tests.cpp | 6 ++---- 9 files changed, 39 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index e033ef56f..a1b85d33c 100644 --- a/README.md +++ b/README.md @@ -332,7 +332,9 @@ Accessing and modifying the default resource is done through two functions: ```c++ rmm::mr::cuda_memory_resource cuda_mr; // Construct a resource that uses a coalescing best-fit pool allocator -rmm::mr::pool_memory_resource pool_mr{&cuda_mr}; +// With the pool initially half of available device memory +auto initial_size = rmm::percent_of_free_device_memory(50); +rmm::mr::pool_memory_resource pool_mr{&cuda_mr, initial_size}; rmm::mr::set_current_device_resource(&pool_mr); // Updates the current device resource pointer to `pool_mr` rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); // Points to `pool_mr` ``` @@ -351,11 +353,13 @@ per-device resources. Here is an example loop that creates `unique_ptr`s to `poo objects for each device and sets them as the per-device resource for that device. ```c++ -std::vector> per_device_pools; +using pool_mr = rmm::mr::pool_memory_resource; +std::vector> per_device_pools; for(int i = 0; i < N; ++i) { cudaSetDevice(i); // set device i before creating MR // Use a vector of unique_ptr to maintain the lifetime of the MRs - per_device_pools.push_back(std::make_unique()); + // Note: for brevity, omitting creation of upstream and computing initial_size + per_device_pools.push_back(std::make_unique(upstream, initial_size)); // Set the per-device resource for device i set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); } diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu index 5ed1b31f9..a853ac1c4 100644 --- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu +++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "rmm/cuda_device.hpp" #include #include @@ -100,7 +101,8 @@ inline auto make_cuda_async() { return std::make_shared(make_cuda()); + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_arena() diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index ed8ef1074..2856cd323 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -166,7 +166,8 @@ inline auto make_cuda_async() { return std::make_shared(make_cuda()); + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_arena() diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp index 320811875..253708ace 100644 --- a/benchmarks/replay/replay.cpp +++ b/benchmarks/replay/replay.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,7 +61,7 @@ inline auto make_pool(std::size_t simulated_size) return rmm::mr::make_owning_wrapper( make_simulated(simulated_size), simulated_size, simulated_size); } - return rmm::mr::make_owning_wrapper(make_cuda()); + return rmm::mr::make_owning_wrapper(make_cuda(), 0); } inline auto make_arena(std::size_t simulated_size) diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp index 63e5f39a4..e3014b6c3 100644 --- a/include/rmm/mr/device/device_memory_resource.hpp +++ b/include/rmm/mr/device/device_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -78,10 +78,12 @@ namespace rmm::mr { * device. * * @code{.cpp} - * std::vector> per_device_pools; + * using pool_mr = rmm::mr::pool_memory_resource; + * std::vector> per_device_pools; * for(int i = 0; i < N; ++i) { * cudaSetDevice(i); - * per_device_pools.push_back(std::make_unique()); + * // Note: for brevity, omitting creation of upstream and computing initial_size + * per_device_pools.push_back(std::make_unique(upstream, initial_size)); * set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); * } * @endcode diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp index 139389f0c..a56a784a1 100644 --- a/include/rmm/mr/device/per_device_resource.hpp +++ b/include/rmm/mr/device/per_device_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,6 +69,16 @@ * set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); * } * @endcode + * @code{.cpp} + * using pool_mr = rmm::mr::pool_memory_resource; + * std::vector> per_device_pools; + * for(int i = 0; i < N; ++i) { + * cudaSetDevice(i); + * // Note: for brevity, omitting creation of upstream and computing initial_size + * per_device_pools.push_back(std::make_unique(upstream, initial_size)); + * set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); + * } + * @endcode */ namespace rmm::mr { diff --git a/tests/mr/device/mr_ref_test.hpp b/tests/mr/device/mr_ref_test.hpp index 78f290f41..eb84dffd0 100644 --- a/tests/mr/device/mr_ref_test.hpp +++ b/tests/mr/device/mr_ref_test.hpp @@ -17,6 +17,7 @@ #pragma once #include "../../byte_literals.hpp" +#include "rmm/cuda_device.hpp" #include #include @@ -379,7 +380,8 @@ inline auto make_managed() { return std::make_shared(make_cuda()); + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_arena() diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index 11614b3d4..64dabad35 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -17,6 +17,7 @@ #pragma once #include "../../byte_literals.hpp" +#include "rmm/cuda_device.hpp" #include #include @@ -257,7 +258,8 @@ inline auto make_managed() { return std::make_shared(make_cuda()); + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_arena() diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp index 299eee65c..a2793386f 100644 --- a/tests/mr/device/pool_mr_tests.cpp +++ b/tests/mr/device/pool_mr_tests.cpp @@ -71,8 +71,7 @@ TEST(PoolTest, AllocateNinetyPercent) auto allocate_ninety = []() { auto const [free, total] = rmm::available_device_memory(); (void)total; - auto const ninety_percent_pool = rmm::align_up( - static_cast(static_cast(free) * 0.9), rmm::CUDA_ALLOCATION_ALIGNMENT); + auto const ninety_percent_pool = rmm::percent_of_free_device_memory(90); pool_mr mr{rmm::mr::get_current_device_resource(), ninety_percent_pool}; }; EXPECT_NO_THROW(allocate_ninety()); @@ -81,8 +80,7 @@ TEST(PoolTest, AllocateNinetyPercent) TEST(PoolTest, TwoLargeBuffers) { auto two_large = []() { - auto const [free, total] = rmm::available_device_memory(); - (void)total; + [[maybe_unused]] auto const [free, total] = rmm::available_device_memory(); pool_mr mr{rmm::mr::get_current_device_resource(), rmm::percent_of_free_device_memory(50)}; auto* ptr1 = mr.allocate(free / 4); auto* ptr2 = mr.allocate(free / 4); From c90e81cbcadf73ec7ceb8cb32e33048efef3ac1b Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 10 Jan 2024 19:59:41 +0000 Subject: [PATCH 25/40] Fix overflow in percent_of_free_device_memory --- include/rmm/cuda_device.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index 941e7926d..565d86926 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -141,7 +141,8 @@ const auto available_device_memory = rmm::available_device_memory; inline std::size_t percent_of_free_device_memory(int percent) { [[maybe_unused]] auto const [free, total] = rmm::available_device_memory(); - return rmm::align_down(static_cast(static_cast(free) * percent / 100.0), + auto fraction = static_cast(percent) / 100.0; + return rmm::align_down(static_cast(static_cast(free) * fraction), rmm::CUDA_ALLOCATION_ALIGNMENT); } From c2843be8939a8427059f90353a217c0fd147b768 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 10 Jan 2024 20:00:21 +0000 Subject: [PATCH 26/40] Fix Cython to provide explicit initial size --- python/rmm/_lib/memory_resource.pyx | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index ce7f45e19..690e2e338 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -120,12 +120,15 @@ cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \ win32 win32_kmt +cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil: + size_t percent_of_free_device_memory(int percent) except + + cdef extern from "rmm/mr/device/pool_memory_resource.hpp" \ namespace "rmm::mr" nogil: cdef cppclass pool_memory_resource[Upstream](device_memory_resource): pool_memory_resource( Upstream* upstream_mr, - optional[size_t] initial_pool_size, + size_t initial_pool_size, optional[size_t] maximum_pool_size) except + size_t pool_size() @@ -369,12 +372,12 @@ cdef class PoolMemoryResource(UpstreamResourceAdaptor): initial_pool_size=None, maximum_pool_size=None ): - cdef optional[size_t] c_initial_pool_size + cdef size_t c_initial_pool_size cdef optional[size_t] c_maximum_pool_size c_initial_pool_size = ( - optional[size_t]() if + percent_of_free_device_memory(50) if initial_pool_size is None - else make_optional[size_t](initial_pool_size) + else initial_pool_size ) c_maximum_pool_size = ( optional[size_t]() if From 6e0aeaa6147959fe4221e8baf1192daeffbb744e Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 10 Jan 2024 20:30:20 +0000 Subject: [PATCH 27/40] Respond to review suggestions in aligned.hpp --- include/rmm/aligned.hpp | 5 +++-- include/rmm/detail/aligned.hpp | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/rmm/aligned.hpp b/include/rmm/aligned.hpp index ce762094f..83b84a321 100644 --- a/include/rmm/aligned.hpp +++ b/include/rmm/aligned.hpp @@ -18,6 +18,7 @@ #include #include +#include namespace rmm { @@ -37,7 +38,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; * @brief Returns whether or not `n` is a power of 2. * */ -constexpr bool is_pow2(std::size_t value) { return (0 == (value & (value - 1))); } +constexpr bool is_pow2(std::size_t value) { return (value != 0U) && ((value & (value - 1)) == 0U); } /** * @brief Returns whether or not `alignment` is a valid memory alignment. @@ -90,7 +91,7 @@ constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - return is_aligned(reinterpret_cast(ptr), alignment); + return is_aligned(reinterpret_cast(ptr), alignment); } } // namespace rmm diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index 5b9c11474..54d287bfb 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -40,7 +40,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; * @brief Returns whether or not `n` is a power of 2. * */ -constexpr bool is_pow2(std::size_t value) { return (0 == (value & (value - 1))); } +constexpr bool is_pow2(std::size_t value) { return (value != 0U) && ((value & (value - 1)) == 0U); } /** * @brief Returns whether or not `alignment` is a valid memory alignment. @@ -93,7 +93,7 @@ constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - return rmm::detail::is_aligned(reinterpret_cast(ptr), alignment); + return rmm::detail::is_aligned(reinterpret_cast(ptr), alignment); } /** From c3c61e14e936399f31c5e8e3533b226402749421 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 10 Jan 2024 20:34:31 +0000 Subject: [PATCH 28/40] Fix quoted auto includes --- .../multi_stream_allocations_bench.cu | 2 +- tests/container_multidevice_tests.cu | 4 ++-- tests/mr/device/failure_callback_mr_tests.cpp | 9 +++++---- tests/mr/device/mr_ref_test.hpp | 2 +- tests/mr/device/mr_test.hpp | 2 +- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu index a853ac1c4..4943e507f 100644 --- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu +++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "rmm/cuda_device.hpp" #include +#include #include #include #include diff --git a/tests/container_multidevice_tests.cu b/tests/container_multidevice_tests.cu index 9de9ddf40..e58ba53a2 100644 --- a/tests/container_multidevice_tests.cu +++ b/tests/container_multidevice_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,12 +15,12 @@ */ #include "device_check_resource_adaptor.hpp" -#include "rmm/mr/device/per_device_resource.hpp" #include #include #include #include +#include #include diff --git a/tests/mr/device/failure_callback_mr_tests.cpp b/tests/mr/device/failure_callback_mr_tests.cpp index bb5484c69..79acd5c7e 100644 --- a/tests/mr/device/failure_callback_mr_tests.cpp +++ b/tests/mr/device/failure_callback_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,16 +15,17 @@ */ #include "../../byte_literals.hpp" -#include "rmm/cuda_stream_view.hpp" -#include "rmm/mr/device/device_memory_resource.hpp" -#include +#include #include #include +#include #include #include +#include + namespace rmm::test { namespace { diff --git a/tests/mr/device/mr_ref_test.hpp b/tests/mr/device/mr_ref_test.hpp index eb84dffd0..25ff76891 100644 --- a/tests/mr/device/mr_ref_test.hpp +++ b/tests/mr/device/mr_ref_test.hpp @@ -17,9 +17,9 @@ #pragma once #include "../../byte_literals.hpp" -#include "rmm/cuda_device.hpp" #include +#include #include #include #include diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index 64dabad35..ef4b4bc80 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -17,9 +17,9 @@ #pragma once #include "../../byte_literals.hpp" -#include "rmm/cuda_device.hpp" #include +#include #include #include #include From 014ac5b90a4cd422242c1554c466fb3ef408e90e Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 10 Jan 2024 22:46:45 +0000 Subject: [PATCH 29/40] missed file for detail changes --- include/rmm/mr/device/binning_memory_resource.hpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/include/rmm/mr/device/binning_memory_resource.hpp b/include/rmm/mr/device/binning_memory_resource.hpp index c2e1621a6..2a9975b18 100644 --- a/include/rmm/mr/device/binning_memory_resource.hpp +++ b/include/rmm/mr/device/binning_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -138,8 +138,7 @@ class binning_memory_resource final : public device_memory_resource { */ void add_bin(std::size_t allocation_size, device_memory_resource* bin_resource = nullptr) { - allocation_size = - rmm::detail::align_up(allocation_size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + allocation_size = rmm::align_up(allocation_size, rmm::CUDA_ALLOCATION_ALIGNMENT); if (nullptr != bin_resource) { resource_bins_.insert({allocation_size, bin_resource}); From 909b733ff78c8b1b449d96a500c0d0a95a242089 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Thu, 11 Jan 2024 00:15:54 +0000 Subject: [PATCH 30/40] Add utilities doxygen group --- include/doxygen_groups.h | 3 ++- include/rmm/aligned.hpp | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/doxygen_groups.h b/include/doxygen_groups.h index be5eaf17f..70ec0cd68 100644 --- a/include/doxygen_groups.h +++ b/include/doxygen_groups.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,4 +41,5 @@ * @defgroup errors Errors * @defgroup logging Logging * @defgroup thrust_integrations Thrust Integrations + * @defgroup utilities Utilities */ diff --git a/include/rmm/aligned.hpp b/include/rmm/aligned.hpp index 83b84a321..dad58a579 100644 --- a/include/rmm/aligned.hpp +++ b/include/rmm/aligned.hpp @@ -20,6 +20,11 @@ #include #include +/** + * @addtogroup utilities + * @{ + */ + namespace rmm { /** @@ -95,3 +100,5 @@ inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATIO } } // namespace rmm + +/** @} */ // end of group From 0fc3fba7ab5524eb4b9f1dc71cd7dbfb3de09fbb Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Thu, 11 Jan 2024 00:56:19 +0000 Subject: [PATCH 31/40] Add utilities to sphinx docs --- python/docs/librmm_docs/index.rst | 1 + python/docs/librmm_docs/utilities.rst | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 python/docs/librmm_docs/utilities.rst diff --git a/python/docs/librmm_docs/index.rst b/python/docs/librmm_docs/index.rst index ba8034dcb..2b61deb9f 100644 --- a/python/docs/librmm_docs/index.rst +++ b/python/docs/librmm_docs/index.rst @@ -17,6 +17,7 @@ librmm Documentation cuda_streams errors logging + utilities deprecated diff --git a/python/docs/librmm_docs/utilities.rst b/python/docs/librmm_docs/utilities.rst new file mode 100644 index 000000000..25b455746 --- /dev/null +++ b/python/docs/librmm_docs/utilities.rst @@ -0,0 +1,5 @@ +Utilities +============ + +.. doxygengroup:: utilities + :members: From 27fe52cea5a256a58fd90c618cc51e0b12a78e49 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 17 Jan 2024 00:08:08 +0000 Subject: [PATCH 32/40] Some cleanup of aligned_allocate/deallocate --- include/rmm/detail/aligned.hpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index 54d287bfb..0805b8829 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -114,6 +114,7 @@ inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATIO * from `alloc`. * * If `alignment` is not a power of 2, behavior is undefined. + * If `Alloc` does not allocate host-accessible memory, behavior is undefined. * * @param bytes The desired size of the allocation * @param alignment Desired alignment of allocation @@ -126,7 +127,7 @@ inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATIO template void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) { - assert(is_pow2(alignment)); + assert(is_supported_alignment(alignment)); // allocate memory for bytes, plus potential alignment correction, // plus store of the correction offset @@ -168,9 +169,12 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) */ template // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -void aligned_deallocate(void* ptr, std::size_t bytes, std::size_t alignment, Dealloc dealloc) +void aligned_deallocate(void* ptr, + [[maybe_unused]] std::size_t bytes, + [[maybe_unused]] std::size_t alignment, + Dealloc dealloc) noexcept { - (void)alignment; + assert(is_supported_alignment(alignment)); // Get offset from the location immediately prior to the aligned pointer // NOLINTNEXTLINE From da934ba811edc354a07da1f436d3d8cf3112b1b6 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 17 Jan 2024 00:11:42 +0000 Subject: [PATCH 33/40] Implement aligned alloc/dealloc and fix tests. --- .../rmm/mr/pinned_host_memory_resource.hpp | 31 +++++++--------- tests/mr/device/mr_ref_test.hpp | 36 +++++++++++++------ tests/mr/device/mr_test.hpp | 29 +++++++++++---- 3 files changed, 61 insertions(+), 35 deletions(-) diff --git a/include/rmm/mr/pinned_host_memory_resource.hpp b/include/rmm/mr/pinned_host_memory_resource.hpp index d26481847..cc2843dce 100644 --- a/include/rmm/mr/pinned_host_memory_resource.hpp +++ b/include/rmm/mr/pinned_host_memory_resource.hpp @@ -44,8 +44,6 @@ class pinned_host_memory_resource { /** * @brief Allocates pinned host memory of size at least \p bytes bytes. * - * @todo Alignment is not implemented yet. - * * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a * CUDA out of memory error. * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other @@ -60,28 +58,31 @@ class pinned_host_memory_resource { std::size_t bytes, [[maybe_unused]] std::size_t alignment = rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT) { - void* ptr{nullptr}; - RMM_CUDA_TRY_ALLOC(cudaHostAlloc(&ptr, bytes, cudaHostAllocDefault)); - return ptr; + // don't allocate anything if the user requested zero bytes + if (0 == bytes) { return nullptr; } + + return rmm::detail::aligned_allocate(bytes, alignment, [](std::size_t size) { + void* ptr{nullptr}; + RMM_CUDA_TRY_ALLOC(cudaHostAlloc(&ptr, size, cudaHostAllocDefault)); + return ptr; + }); } /** * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes. * - * @todo Alignment is not implemented yet. - * * @throws Nothing. * * @param ptr Pointer to be deallocated. * @param bytes Size of the allocation. * @param alignment Alignment in bytes. Default alignment is used if unspecified. */ - static void deallocate( - void* ptr, - [[maybe_unused]] std::size_t bytes, - [[maybe_unused]] std::size_t alignment = rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT) noexcept + static void deallocate(void* ptr, + std::size_t bytes, + std::size_t alignment = rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT) noexcept { - RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); + rmm::detail::aligned_deallocate( + ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); }); } /** @@ -108,8 +109,6 @@ class pinned_host_memory_resource { * * @note Stream argument is ignored and behavior is identical to allocate. * - * @todo Alignment is not implemented yet. - * * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a * CUDA out of memory error. * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other @@ -132,8 +131,6 @@ class pinned_host_memory_resource { * * @note Stream argument is ignored and behavior is identical to deallocate. * - * @todo Alignment is not implemented yet. - * * @throws Nothing. * * @param ptr Pointer to be deallocated. @@ -153,8 +150,6 @@ class pinned_host_memory_resource { * * @note Stream argument is ignored and behavior is identical to deallocate. * - * @todo Alignment is not implemented yet. - * * @throws Nothing. * * @param ptr Pointer to be deallocated. diff --git a/tests/mr/device/mr_ref_test.hpp b/tests/mr/device/mr_ref_test.hpp index 25ff76891..386c37a20 100644 --- a/tests/mr/device/mr_ref_test.hpp +++ b/tests/mr/device/mr_ref_test.hpp @@ -54,11 +54,27 @@ namespace rmm::test { * @brief Returns if a pointer points to a device memory or managed memory * allocation. */ -inline bool is_device_memory(void* ptr) +inline bool is_device_accessible_memory(void* ptr) { cudaPointerAttributes attributes{}; if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } - return (attributes.type == cudaMemoryTypeDevice) or (attributes.type == cudaMemoryTypeManaged); + return (attributes.type == cudaMemoryTypeDevice) or (attributes.type == cudaMemoryTypeManaged) or + ((attributes.type == cudaMemoryTypeHost) and (attributes.devicePointer != nullptr)); +} + +inline bool is_host_memory(void* ptr) +{ + cudaPointerAttributes attributes{}; + if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } + return attributes.type == cudaMemoryTypeHost; +} + +inline bool is_properly_aligned(void* ptr) +{ + if (is_host_memory(ptr)) { + return rmm::detail::is_pointer_aligned(ptr, rmm::RMM_DEFAULT_HOST_ALIGNMENT); + } + return rmm::is_pointer_aligned(ptr, rmm::CUDA_ALLOCATION_ALIGNMENT); } enum size_in_bytes : size_t {}; @@ -79,8 +95,8 @@ inline void test_allocate(resource_ref ref, std::size_t bytes) try { void* ptr = ref.allocate(bytes); EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); - EXPECT_TRUE(is_device_memory(ptr)); + EXPECT_TRUE(is_properly_aligned(ptr)); + EXPECT_TRUE(is_device_accessible_memory(ptr)); ref.deallocate(ptr, bytes); } catch (rmm::out_of_memory const& e) { EXPECT_NE(std::string{e.what()}.find("out_of_memory"), std::string::npos); @@ -95,8 +111,8 @@ inline void test_allocate_async(async_resource_ref ref, void* ptr = ref.allocate_async(bytes, stream); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); - EXPECT_TRUE(is_device_memory(ptr)); + EXPECT_TRUE(is_properly_aligned(ptr)); + EXPECT_TRUE(is_device_accessible_memory(ptr)); ref.deallocate_async(ptr, bytes, stream); if (not stream.is_default()) { stream.synchronize(); } } catch (rmm::out_of_memory const& e) { @@ -203,7 +219,7 @@ inline void test_random_allocations(resource_ref ref, alloc.size = distribution(generator); EXPECT_NO_THROW(alloc.ptr = ref.allocate(alloc.size)); EXPECT_NE(nullptr, alloc.ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr)); + EXPECT_TRUE(is_properly_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [&ref](allocation& alloc) { @@ -229,7 +245,7 @@ inline void test_random_async_allocations(async_resource_ref ref, EXPECT_NO_THROW(alloc.ptr = ref.allocate(alloc.size)); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, alloc.ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr)); + EXPECT_TRUE(is_properly_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [stream, &ref](allocation& alloc) { @@ -270,7 +286,7 @@ inline void test_mixed_random_allocation_free(resource_ref ref, EXPECT_NO_THROW(allocations.emplace_back(ref.allocate(size), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(is_properly_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; @@ -317,7 +333,7 @@ inline void test_mixed_random_async_allocation_free(async_resource_ref ref, EXPECT_NO_THROW(allocations.emplace_back(ref.allocate_async(size, stream), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(is_properly_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index ba63a6267..38acac9c8 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -50,7 +50,7 @@ namespace rmm::test { * @brief Returns if a pointer points to a device memory or managed memory * allocation. */ -inline bool is_device_memory(void* ptr) +inline bool is_device_accessible_memory(void* ptr) { cudaPointerAttributes attributes{}; if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } @@ -58,6 +58,21 @@ inline bool is_device_memory(void* ptr) ((attributes.type == cudaMemoryTypeHost) and (attributes.devicePointer != nullptr)); } +inline bool is_host_memory(void* ptr) +{ + cudaPointerAttributes attributes{}; + if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } + return attributes.type == cudaMemoryTypeHost; +} + +inline bool is_properly_aligned(void* ptr) +{ + if (is_host_memory(ptr)) { + return rmm::detail::is_pointer_aligned(ptr, rmm::RMM_DEFAULT_HOST_ALIGNMENT); + } + return rmm::is_pointer_aligned(ptr, rmm::CUDA_ALLOCATION_ALIGNMENT); +} + enum size_in_bytes : size_t {}; constexpr auto default_num_allocations{100}; @@ -77,8 +92,8 @@ inline void test_get_current_device_resource() EXPECT_NE(nullptr, rmm::mr::get_current_device_resource()); void* ptr = rmm::mr::get_current_device_resource()->allocate(1_MiB); EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); - EXPECT_TRUE(is_device_memory(ptr)); + EXPECT_TRUE(is_properly_aligned(ptr)); + EXPECT_TRUE(is_device_accessible_memory(ptr)); rmm::mr::get_current_device_resource()->deallocate(ptr, 1_MiB); } @@ -89,8 +104,8 @@ inline void test_allocate(rmm::mr::device_memory_resource* mr, void* ptr = mr->allocate(bytes); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); - EXPECT_TRUE(is_device_memory(ptr)); + EXPECT_TRUE(is_properly_aligned(ptr)); + EXPECT_TRUE(is_device_accessible_memory(ptr)); mr->deallocate(ptr, bytes); if (not stream.is_default()) { stream.synchronize(); } } @@ -157,7 +172,7 @@ inline void test_random_allocations(rmm::mr::device_memory_resource* mr, EXPECT_NO_THROW(alloc.ptr = mr->allocate(alloc.size, stream)); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, alloc.ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr)); + EXPECT_TRUE(is_properly_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [stream, mr](allocation& alloc) { @@ -199,7 +214,7 @@ inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* m EXPECT_NO_THROW(allocations.emplace_back(mr->allocate(size, stream), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(is_properly_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; From 85286b024425e72ea26e98acba52b34883a8234f Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 17 Jan 2024 20:16:06 +0000 Subject: [PATCH 34/40] copyright year --- include/rmm/mr/pinned_host_memory_resource.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rmm/mr/pinned_host_memory_resource.hpp b/include/rmm/mr/pinned_host_memory_resource.hpp index cc2843dce..b58625f05 100644 --- a/include/rmm/mr/pinned_host_memory_resource.hpp +++ b/include/rmm/mr/pinned_host_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From f7b0ca531619e958489e8d841963d98a6bd85960 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Thu, 18 Jan 2024 09:21:36 +0000 Subject: [PATCH 35/40] static_assert MR properties. --- include/rmm/mr/pinned_host_memory_resource.hpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/include/rmm/mr/pinned_host_memory_resource.hpp b/include/rmm/mr/pinned_host_memory_resource.hpp index b58625f05..0fea14b9d 100644 --- a/include/rmm/mr/pinned_host_memory_resource.hpp +++ b/include/rmm/mr/pinned_host_memory_resource.hpp @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include @@ -54,9 +55,8 @@ class pinned_host_memory_resource { * * @return Pointer to the newly allocated memory. */ - static void* allocate( - std::size_t bytes, - [[maybe_unused]] std::size_t alignment = rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT) + static void* allocate(std::size_t bytes, + [[maybe_unused]] std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) { // don't allocate anything if the user requested zero bytes if (0 == bytes) { return nullptr; } @@ -79,7 +79,7 @@ class pinned_host_memory_resource { */ static void deallocate(void* ptr, std::size_t bytes, - std::size_t alignment = rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT) noexcept + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept { rmm::detail::aligned_deallocate( ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); }); @@ -217,4 +217,7 @@ class pinned_host_memory_resource { } }; +static_assert(cuda::mr::async_resource_with); } // namespace rmm::mr From 52fc2f1af9cea3df9f8077a84ccad22a055e8fa8 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Thu, 18 Jan 2024 09:24:06 +0000 Subject: [PATCH 36/40] I don't know how those deprecated calls snuck back in. --- tests/mr/device/mr_ref_test.hpp | 4 +--- tests/mr/device/mr_test.hpp | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/mr/device/mr_ref_test.hpp b/tests/mr/device/mr_ref_test.hpp index 386c37a20..b1af26934 100644 --- a/tests/mr/device/mr_ref_test.hpp +++ b/tests/mr/device/mr_ref_test.hpp @@ -71,9 +71,7 @@ inline bool is_host_memory(void* ptr) inline bool is_properly_aligned(void* ptr) { - if (is_host_memory(ptr)) { - return rmm::detail::is_pointer_aligned(ptr, rmm::RMM_DEFAULT_HOST_ALIGNMENT); - } + if (is_host_memory(ptr)) { return rmm::is_pointer_aligned(ptr, rmm::RMM_DEFAULT_HOST_ALIGNMENT); } return rmm::is_pointer_aligned(ptr, rmm::CUDA_ALLOCATION_ALIGNMENT); } diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index 38acac9c8..3c621df56 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -67,9 +67,7 @@ inline bool is_host_memory(void* ptr) inline bool is_properly_aligned(void* ptr) { - if (is_host_memory(ptr)) { - return rmm::detail::is_pointer_aligned(ptr, rmm::RMM_DEFAULT_HOST_ALIGNMENT); - } + if (is_host_memory(ptr)) { return rmm::is_pointer_aligned(ptr, rmm::RMM_DEFAULT_HOST_ALIGNMENT); } return rmm::is_pointer_aligned(ptr, rmm::CUDA_ALLOCATION_ALIGNMENT); } From 6162699e8729e07654bdddb1cfcaeb629445a4ef Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Thu, 18 Jan 2024 11:00:47 +0000 Subject: [PATCH 37/40] Rename aligned_[de]allocate to aligned_host_[de]allocate and clarify docs --- include/rmm/detail/aligned.hpp | 53 +++++++++---------- include/rmm/mr/host/new_delete_resource.hpp | 4 +- .../rmm/mr/host/pinned_memory_resource.hpp | 4 +- .../rmm/mr/pinned_host_memory_resource.hpp | 4 +- 4 files changed, 31 insertions(+), 34 deletions(-) diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index 2dc2819f1..eb31658e9 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -108,21 +108,19 @@ namespace rmm::detail { } /** - * @brief Allocates sufficient memory to satisfy the requested size `bytes` with + * @brief Allocates sufficient host-accessible memory to satisfy the requested size `bytes` with * alignment `alignment` using the unary callable `alloc` to allocate memory. * - * Given a pointer `p` to an allocation of size `n` returned from the unary - * callable `alloc`, the pointer `q` returned from `aligned_alloc` points to a - * location within the `n` bytes with sufficient space for `bytes` that - * satisfies `alignment`. + * Given a pointer `p` to an allocation of size `n` returned from the unary callable `alloc`, the + * pointer `q` returned from `aligned_alloc` points to a location within the `n` bytes with + * sufficient space for `bytes` that satisfies `alignment`. * - * In order to retrieve the original allocation pointer `p`, the offset - * between `p` and `q` is stored at `q - sizeof(std::ptrdiff_t)`. + * In order to retrieve the original allocation pointer `p`, the offset between `p` and `q` is + * stored at `q - sizeof(std::ptrdiff_t)`. * - * Allocations returned from `aligned_allocate` *MUST* be freed by calling - * `aligned_deallocate` with the same arguments for `bytes` and `alignment` with - * a compatible unary `dealloc` callable capable of freeing the memory returned - * from `alloc`. + * Allocations returned from `aligned_host_allocate` *MUST* be freed by calling + * `aligned_host_deallocate` with the same arguments for `bytes` and `alignment` with a compatible + * unary `dealloc` callable capable of freeing the memory returned from `alloc`. * * If `alignment` is not a power of 2, behavior is undefined. * If `Alloc` does not allocate host-accessible memory, behavior is undefined. @@ -130,13 +128,13 @@ namespace rmm::detail { * @param bytes The desired size of the allocation * @param alignment Desired alignment of allocation * @param alloc Unary callable given a size `n` will allocate at least `n` bytes - * of host memory. - * @tparam Alloc a unary callable type that allocates memory. + * of host-accessible memory. + * @tparam Alloc a unary callable type that allocates host-accessible memory. * @return void* Pointer into allocation of at least `bytes` with desired * `alignment`. */ template -void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) +void* aligned_host_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) { assert(rmm::is_supported_alignment(alignment)); @@ -164,26 +162,25 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) } /** - * @brief Frees an allocation returned from `aligned_allocate`. + * @brief Frees an allocation of host-accessible returned from `aligned_host_allocate`. * - * Allocations returned from `aligned_allocate` *MUST* be freed by calling - * `aligned_deallocate` with the same arguments for `bytes` and `alignment` - * with a compatible unary `dealloc` callable capable of freeing the memory - * returned from `alloc`. + * Allocations returned from `aligned_host_allocate` *MUST* be freed by calling + * `aligned_host_deallocate` with the same arguments for `bytes` and `alignment` with a compatible + * unary `dealloc` callable capable of freeing the memory returned from `alloc`. * * @param p The aligned pointer to deallocate - * @param bytes The number of bytes requested from `aligned_allocate` - * @param alignment The alignment required from `aligned_allocate` - * @param dealloc A unary callable capable of freeing memory returned from - * `alloc` in `aligned_allocate`. - * @tparam Dealloc A unary callable type that deallocates memory. + * @param bytes The number of bytes requested from `aligned_host_allocate` + * @param alignment The alignment required from `aligned_host_allocate` + * @param dealloc A unary callable capable of freeing host-accessible memory returned from `alloc` + * in `aligned_host_allocate`. + * @tparam Dealloc A unary callable type that deallocates host-accessible memory. */ template // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -void aligned_deallocate(void* ptr, - [[maybe_unused]] std::size_t bytes, - [[maybe_unused]] std::size_t alignment, - Dealloc dealloc) noexcept +void aligned_host_deallocate(void* ptr, + [[maybe_unused]] std::size_t bytes, + [[maybe_unused]] std::size_t alignment, + Dealloc dealloc) noexcept { assert(rmm::is_supported_alignment(alignment)); diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp index 4bb272df3..ccb294d21 100644 --- a/include/rmm/mr/host/new_delete_resource.hpp +++ b/include/rmm/mr/host/new_delete_resource.hpp @@ -65,7 +65,7 @@ class new_delete_resource final : public host_memory_resource { alignment = (rmm::is_supported_alignment(alignment)) ? alignment : rmm::RMM_DEFAULT_HOST_ALIGNMENT; - return rmm::detail::aligned_allocate( + return rmm::detail::aligned_host_allocate( bytes, alignment, [](std::size_t size) { return ::operator new(size); }); } @@ -86,7 +86,7 @@ class new_delete_resource final : public host_memory_resource { std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) override { - rmm::detail::aligned_deallocate( + rmm::detail::aligned_host_deallocate( ptr, bytes, alignment, [](void* ptr) { ::operator delete(ptr); }); } }; diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp index b5c273ef5..cb8524999 100644 --- a/include/rmm/mr/host/pinned_memory_resource.hpp +++ b/include/rmm/mr/host/pinned_memory_resource.hpp @@ -147,7 +147,7 @@ class pinned_memory_resource final : public host_memory_resource { alignment = (rmm::is_supported_alignment(alignment)) ? alignment : rmm::RMM_DEFAULT_HOST_ALIGNMENT; - return rmm::detail::aligned_allocate(bytes, alignment, [](std::size_t size) { + return rmm::detail::aligned_host_allocate(bytes, alignment, [](std::size_t size) { void* ptr{nullptr}; auto status = cudaMallocHost(&ptr, size); if (cudaSuccess != status) { throw std::bad_alloc{}; } @@ -173,7 +173,7 @@ class pinned_memory_resource final : public host_memory_resource { std::size_t alignment = alignof(std::max_align_t)) override { if (nullptr == ptr) { return; } - rmm::detail::aligned_deallocate( + rmm::detail::aligned_host_deallocate( ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); }); } }; diff --git a/include/rmm/mr/pinned_host_memory_resource.hpp b/include/rmm/mr/pinned_host_memory_resource.hpp index 0fea14b9d..e679352e8 100644 --- a/include/rmm/mr/pinned_host_memory_resource.hpp +++ b/include/rmm/mr/pinned_host_memory_resource.hpp @@ -61,7 +61,7 @@ class pinned_host_memory_resource { // don't allocate anything if the user requested zero bytes if (0 == bytes) { return nullptr; } - return rmm::detail::aligned_allocate(bytes, alignment, [](std::size_t size) { + return rmm::detail::aligned_host_allocate(bytes, alignment, [](std::size_t size) { void* ptr{nullptr}; RMM_CUDA_TRY_ALLOC(cudaHostAlloc(&ptr, size, cudaHostAllocDefault)); return ptr; @@ -81,7 +81,7 @@ class pinned_host_memory_resource { std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept { - rmm::detail::aligned_deallocate( + rmm::detail::aligned_host_deallocate( ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); }); } From fa140ae0783068dc6a09d5f15f3d9fd264aee582 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Thu, 18 Jan 2024 11:01:02 +0000 Subject: [PATCH 38/40] Fix docs per feedback --- include/rmm/aligned.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rmm/aligned.hpp b/include/rmm/aligned.hpp index d6c230c98..6e9970ab8 100644 --- a/include/rmm/aligned.hpp +++ b/include/rmm/aligned.hpp @@ -45,7 +45,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; * * @param[in] value value to check. * - * @return True if the input a power of two with non-negative exponent, false otherwise. + * @return True if the input is a power of two with non-negative integer exponent, false otherwise. */ [[nodiscard]] constexpr bool is_pow2(std::size_t value) noexcept { From aafa18afe987741746388d78b79742c90fcc5461 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Thu, 18 Jan 2024 21:17:03 +0000 Subject: [PATCH 39/40] Factor out mr test utilities. --- tests/mr/device/mr_ref_test.hpp | 28 +----------------- tests/mr/device/mr_test.hpp | 28 +----------------- tests/mr/device/test_utils.hpp | 50 +++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 54 deletions(-) create mode 100644 tests/mr/device/test_utils.hpp diff --git a/tests/mr/device/mr_ref_test.hpp b/tests/mr/device/mr_ref_test.hpp index b1af26934..9826c10be 100644 --- a/tests/mr/device/mr_ref_test.hpp +++ b/tests/mr/device/mr_ref_test.hpp @@ -17,6 +17,7 @@ #pragma once #include "../../byte_literals.hpp" +#include "test_utils.hpp" #include #include @@ -35,8 +36,6 @@ #include -#include - #include #include @@ -50,31 +49,6 @@ using async_resource_ref = cuda::mr::async_resource_ref #include @@ -36,8 +37,6 @@ #include -#include - #include #include #include @@ -46,31 +45,6 @@ namespace rmm::test { -/** - * @brief Returns if a pointer points to a device memory or managed memory - * allocation. - */ -inline bool is_device_accessible_memory(void* ptr) -{ - cudaPointerAttributes attributes{}; - if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } - return (attributes.type == cudaMemoryTypeDevice) or (attributes.type == cudaMemoryTypeManaged) or - ((attributes.type == cudaMemoryTypeHost) and (attributes.devicePointer != nullptr)); -} - -inline bool is_host_memory(void* ptr) -{ - cudaPointerAttributes attributes{}; - if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } - return attributes.type == cudaMemoryTypeHost; -} - -inline bool is_properly_aligned(void* ptr) -{ - if (is_host_memory(ptr)) { return rmm::is_pointer_aligned(ptr, rmm::RMM_DEFAULT_HOST_ALIGNMENT); } - return rmm::is_pointer_aligned(ptr, rmm::CUDA_ALLOCATION_ALIGNMENT); -} - enum size_in_bytes : size_t {}; constexpr auto default_num_allocations{100}; diff --git a/tests/mr/device/test_utils.hpp b/tests/mr/device/test_utils.hpp new file mode 100644 index 000000000..932a72a7e --- /dev/null +++ b/tests/mr/device/test_utils.hpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace rmm::test { + +/** + * @brief Returns if a pointer points to a device memory or managed memory + * allocation. + */ +inline bool is_device_accessible_memory(void* ptr) +{ + cudaPointerAttributes attributes{}; + if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } + return (attributes.type == cudaMemoryTypeDevice) or (attributes.type == cudaMemoryTypeManaged) or + ((attributes.type == cudaMemoryTypeHost) and (attributes.devicePointer != nullptr)); +} + +inline bool is_host_memory(void* ptr) +{ + cudaPointerAttributes attributes{}; + if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } + return attributes.type == cudaMemoryTypeHost; +} + +inline bool is_properly_aligned(void* ptr) +{ + if (is_host_memory(ptr)) { return rmm::is_pointer_aligned(ptr, rmm::RMM_DEFAULT_HOST_ALIGNMENT); } + return rmm::is_pointer_aligned(ptr, rmm::CUDA_ALLOCATION_ALIGNMENT); +} + +} // namespace rmm::test From 92c8e236c5cb4f27d5e331acfa43e2bc17d438b1 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Thu, 18 Jan 2024 21:17:19 +0000 Subject: [PATCH 40/40] Fix docstring for operator== --- include/rmm/mr/pinned_host_memory_resource.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/rmm/mr/pinned_host_memory_resource.hpp b/include/rmm/mr/pinned_host_memory_resource.hpp index e679352e8..c51af4182 100644 --- a/include/rmm/mr/pinned_host_memory_resource.hpp +++ b/include/rmm/mr/pinned_host_memory_resource.hpp @@ -167,8 +167,7 @@ class pinned_host_memory_resource { // NOLINTEND(bugprone-easily-swappable-parameters) /** - * @briefreturn{true if the specified resource is the same type as this resource, otherwise - * false.} + * @briefreturn{true if the specified resource is the same type as this resource.} */ bool operator==(const pinned_host_memory_resource&) const { return true; }