From debe302290c8520265167f952a9978a2c3672eda Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 25 Aug 2021 13:26:14 +1000 Subject: [PATCH 01/72] Clean up: use std::size_t, include cstddef and aligned.hpp where missing --- include/rmm/cuda_stream_pool.hpp | 5 +-- include/rmm/detail/stack_trace.hpp | 3 +- include/rmm/device_buffer.hpp | 2 ++ include/rmm/device_uvector.hpp | 1 + include/rmm/exec_policy.hpp | 2 +- .../mr/device/aligned_resource_adaptor.hpp | 12 ++++--- .../rmm/mr/device/arena_memory_resource.hpp | 3 +- .../mr/device/cuda_async_memory_resource.hpp | 8 +++-- .../rmm/mr/device/cuda_memory_resource.hpp | 8 +++-- include/rmm/mr/device/detail/arena.hpp | 9 ++--- .../mr/device/detail/coalescing_free_list.hpp | 20 ++++++----- .../mr/device/detail/fixed_size_free_list.hpp | 5 +-- include/rmm/mr/device/detail/free_list.hpp | 4 +-- .../detail/stream_ordered_memory_resource.hpp | 33 ++++++++++--------- .../mr/device/fixed_size_memory_resource.hpp | 11 ++++--- .../mr/device/limiting_resource_adaptor.hpp | 8 +++-- .../mr/device/logging_resource_adaptor.hpp | 16 +++------ .../rmm/mr/device/managed_memory_resource.hpp | 8 +++-- include/rmm/mr/device/owning_wrapper.hpp | 4 +-- .../rmm/mr/device/polymorphic_allocator.hpp | 9 ++--- .../rmm/mr/device/pool_memory_resource.hpp | 25 +++++++------- .../mr/device/statistics_resource_adaptor.hpp | 6 ++-- .../device/thread_safe_resource_adaptor.hpp | 5 +-- .../mr/device/thrust_allocator_adaptor.hpp | 8 ++--- .../mr/device/tracking_resource_adaptor.hpp | 8 +++-- include/rmm/mr/host/new_delete_resource.hpp | 4 +-- .../rmm/mr/host/pinned_memory_resource.hpp | 3 +- 27 files changed, 130 insertions(+), 100 deletions(-) diff --git a/include/rmm/cuda_stream_pool.hpp b/include/rmm/cuda_stream_pool.hpp index 2e77f2047..27decc9fd 100644 --- a/include/rmm/cuda_stream_pool.hpp +++ b/include/rmm/cuda_stream_pool.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include namespace rmm { @@ -84,7 +85,7 @@ class cuda_stream_pool { * * @return the number of streams in the pool */ - size_t get_pool_size() const noexcept { return streams_.size(); } + std::size_t get_pool_size() const noexcept { return streams_.size(); } private: std::vector streams_; diff --git a/include/rmm/detail/stack_trace.hpp b/include/rmm/detail/stack_trace.hpp index 358e95080..1e218fa53 100644 --- a/include/rmm/detail/stack_trace.hpp +++ b/include/rmm/detail/stack_trace.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #endif diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp index ad8655180..f034b28fe 100644 --- a/include/rmm/device_buffer.hpp +++ b/include/rmm/device_buffer.hpp @@ -21,7 +21,9 @@ #include #include + #include +#include #include #include diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp index 537801658..ca4cf6d30 100644 --- a/include/rmm/device_uvector.hpp +++ b/include/rmm/device_uvector.hpp @@ -23,6 +23,7 @@ #include #include +#include #include namespace rmm { diff --git a/include/rmm/exec_policy.hpp b/include/rmm/exec_policy.hpp index d984f0b47..98cd91cd4 100644 --- a/include/rmm/exec_policy.hpp +++ b/include/rmm/exec_policy.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/include/rmm/mr/device/aligned_resource_adaptor.hpp b/include/rmm/mr/device/aligned_resource_adaptor.hpp index adb668395..4e29b90b3 100644 --- a/include/rmm/mr/device/aligned_resource_adaptor.hpp +++ b/include/rmm/mr/device/aligned_resource_adaptor.hpp @@ -15,15 +15,16 @@ */ #pragma once -#include -#include -#include - #include #include #include #include +#include +#include +#include +#include + namespace rmm::mr { /** * @brief Resource that adapts `Upstream` memory resource to allocate memory in a specified @@ -195,7 +196,8 @@ class aligned_resource_adaptor final : public device_memory_resource { * @param stream Stream on which to get the mem info. * @return std::pair containing free_size and total_size of memory */ - [[nodiscard]] std::pair do_get_mem_info(cuda_stream_view stream) const override + [[nodiscard]] std::pair do_get_mem_info( + cuda_stream_view stream) const override { return upstream_->get_mem_info(stream); } diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 84bb9b129..28376142c 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include +#include #include #include diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp index cb7bd004f..9dab79893 100644 --- a/include/rmm/mr/device/cuda_async_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp @@ -15,16 +15,18 @@ */ #pragma once -#include #include #include #include #include #include +#include + #include -#include +#include +#include #if CUDART_VERSION >= 11020 // 11.2 introduced cudaMallocAsync #define RMM_CUDA_MALLOC_ASYNC_SUPPORT @@ -194,7 +196,7 @@ class cuda_async_memory_resource final : public device_memory_resource { * * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(rmm::cuda_stream_view) const override + std::pair do_get_mem_info(rmm::cuda_stream_view) const override { return std::make_pair(0, 0); } diff --git a/include/rmm/mr/device/cuda_memory_resource.hpp b/include/rmm/mr/device/cuda_memory_resource.hpp index f3ebf7e93..d419ce335 100644 --- a/include/rmm/mr/device/cuda_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,11 +15,13 @@ */ #pragma once -#include "device_memory_resource.hpp" +#include #include #include +#include + namespace rmm { namespace mr { /** @@ -108,7 +110,7 @@ class cuda_memory_resource final : public device_memory_resource { * * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(cuda_stream_view) const override + std::pair do_get_mem_info(cuda_stream_view) const override { std::size_t free_size; std::size_t total_size; diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 3d29b361e..7a449949c 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -55,7 +56,7 @@ class block { * @param pointer The address for the beginning of the block. * @param size The size of the block. */ - block(char* pointer, size_t size) : pointer_(pointer), size_(size) {} + block(char* pointer, std::size_t size) : pointer_(pointer), size_(size) {} /** * @brief Construct a block given a void pointer and size. @@ -63,13 +64,13 @@ class block { * @param pointer The address for the beginning of the block. * @param size The size of the block. */ - block(void* pointer, size_t size) : pointer_(static_cast(pointer)), size_(size) {} + block(void* pointer, std::size_t size) : pointer_(static_cast(pointer)), size_(size) {} /// Returns the underlying pointer. void* pointer() const { return pointer_; } /// Returns the size of the block. - size_t size() const { return size_; } + std::size_t size() const { return size_; } /// Returns true if this block is valid (non-null), false otherwise. bool is_valid() const { return pointer_ != nullptr; } diff --git a/include/rmm/mr/device/detail/coalescing_free_list.hpp b/include/rmm/mr/device/detail/coalescing_free_list.hpp index 6050f4f6f..6d7aa9612 100644 --- a/include/rmm/mr/device/detail/coalescing_free_list.hpp +++ b/include/rmm/mr/device/detail/coalescing_free_list.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -35,7 +36,10 @@ namespace detail { */ struct block : public block_base { block() = default; - block(char* ptr, size_t size, bool is_head) : block_base{ptr}, size_bytes{size}, head{is_head} {} + block(char* ptr, std::size_t size, bool is_head) + : block_base{ptr}, size_bytes{size}, head{is_head} + { + } /** * @brief Returns the pointer to the memory represented by this block. @@ -49,7 +53,7 @@ struct block : public block_base { * * @return the size in bytes of the memory represented by this block. */ - inline size_t size() const { return size_bytes; } + inline std::size_t size() const { return size_bytes; } /** * @brief Returns whether this block is the start of an allocation from an upstream allocator. @@ -104,7 +108,7 @@ struct block : public block_base { * @param sz The size in bytes to check for fit. * @return true if this block is at least `sz` bytes */ - inline bool fits(size_t sz) const noexcept { return size() >= sz; } + inline bool fits(std::size_t sz) const noexcept { return size() >= sz; } /** * @brief Is this block a better fit for `sz` bytes than block `b`? @@ -114,7 +118,7 @@ struct block : public block_base { * @return true If this block is a tighter fit for `sz` bytes than block `b`. * @return false If this block does not fit `sz` bytes or `b` is a tighter fit. */ - inline bool is_better_fit(size_t sz, block const& b) const noexcept + inline bool is_better_fit(std::size_t sz, block const& b) const noexcept { return fits(sz) && (size() < b.size() || b.size() < sz); } @@ -128,8 +132,8 @@ struct block : public block_base { } private: - size_t size_bytes{}; ///< Size in bytes - bool head{}; ///< Indicates whether ptr was allocated from the heap + std::size_t size_bytes{}; ///< Size in bytes + bool head{}; ///< Indicates whether ptr was allocated from the heap }; /// Print block on an ostream @@ -222,7 +226,7 @@ struct coalescing_free_list : free_list { * @param size The size in bytes of the desired block. * @return block A block large enough to store `size` bytes. */ - block_type get_block(size_t size) + block_type get_block(std::size_t size) { // find best fit block auto const iter = diff --git a/include/rmm/mr/device/detail/fixed_size_free_list.hpp b/include/rmm/mr/device/detail/fixed_size_free_list.hpp index 02a316c7d..f74ab1135 100644 --- a/include/rmm/mr/device/detail/fixed_size_free_list.hpp +++ b/include/rmm/mr/device/detail/fixed_size_free_list.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include +#include #include namespace rmm { @@ -63,7 +64,7 @@ struct fixed_size_free_list : free_list { * @param size The size in bytes of the desired block (unused). * @return block A block large enough to store `size` bytes. */ - block_type get_block(size_t size) + block_type get_block(std::size_t size) { if (is_empty()) return block_type{}; diff --git a/include/rmm/mr/device/detail/free_list.hpp b/include/rmm/mr/device/detail/free_list.hpp index 45dbf8ad7..18b5ad493 100644 --- a/include/rmm/mr/device/detail/free_list.hpp +++ b/include/rmm/mr/device/detail/free_list.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,7 +50,7 @@ inline std::ostream& operator<<(std::ostream& out, const block_base& b) * * - `void insert(block_type const& b) // insert a block into the free list` * - `void insert(free_list&& other) // insert / merge another free list` - * - `block_type get_block(size_t size) // get a block of at least size bytes + * - `block_type get_block(std::size_t size) // get a block of at least size bytes * - `void print() // print the block` * * @tparam list_type the type of the internal list data structure. diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp index b6123e772..bdef45546 100644 --- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp +++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,14 +15,16 @@ */ #pragma once -#include +#include #include #include #include #include +#include #include +#include #include #include #include @@ -66,10 +68,10 @@ struct crtp { * Classes derived from stream_ordered_memory_resource must implement the following four methods, * documented separately: * - * 1. `size_t get_maximum_allocation_size() const` - * 2. `block_type expand_pool(size_t size, free_list& blocks, cuda_stream_view stream)` - * 3. `split_block allocate_from_block(block_type const& b, size_t size)` - * 4. `block_type free_block(void* p, size_t size) noexcept` + * 1. `std::size_t get_maximum_allocation_size() const` + * 2. `block_type expand_pool(std::size_t size, free_list& blocks, cuda_stream_view stream)` + * 3. `split_block allocate_from_block(block_type const& b, std::size_t size)` + * 4. `block_type free_block(void* p, std::size_t size) noexcept` */ template class stream_ordered_memory_resource : public crtp, public device_memory_resource { @@ -93,12 +95,13 @@ class stream_ordered_memory_resource : public crtp, public device_ * @brief Get the maximum size of a single allocation supported by this suballocator memory * resource * - * Default implementation is the maximum `size_t` value, but fixed-size allocators will have a - * lower limit. Override this function in derived classes as necessary. + * Default implementation is the maximum `std::size_t` value, but fixed-size allocators will have + * a lower limit. Override this function in derived classes as necessary. * - * @return size_t The maximum size of a single allocation supported by this memory resource + * @return std::size_t The maximum size of a single allocation supported by this memory resource */ - // size_t get_maximum_allocation_size() const { return std::numeric_limits::max(); } + // std::size_t get_maximum_allocation_size() const { return + // std::numeric_limits::max(); } /** * @brief Allocate space (typically from upstream) to supply the suballocation pool and return @@ -114,7 +117,7 @@ class stream_ordered_memory_resource : public crtp, public device_ * @param stream The stream on which the memory is to be used. * @return block_type a block of at least `size` bytes */ - // block_type expand_pool(size_t size, free_list& blocks, cuda_stream_view stream) + // block_type expand_pool(std::size_t size, free_list& blocks, cuda_stream_view stream) /// Struct representing a block that has been split for allocation struct split_block { @@ -134,7 +137,7 @@ class stream_ordered_memory_resource : public crtp, public device_ * @return A `split_block` comprising the allocated pointer and any unallocated remainder of the * input block. */ - // split_block allocate_from_block(block_type const& b, size_t size) + // split_block allocate_from_block(block_type const& b, std::size_t size) /** * @brief Finds, frees and returns the block associated with pointer `p`. @@ -144,7 +147,7 @@ class stream_ordered_memory_resource : public crtp, public device_ * @return The (now freed) block associated with `p`. The caller is expected to return the block * to the pool. */ - // block_type free_block(void* p, size_t size) noexcept + // block_type free_block(void* p, std::size_t size) noexcept /** * @brief Returns the block `b` (last used on stream `stream_event`) to the pool. @@ -307,7 +310,7 @@ class stream_ordered_memory_resource : public crtp, public device_ * @param stream_event The stream and associated event on which the allocation will be used. * @return block_type A block of memory of at least `size` bytes */ - block_type get_block(size_t size, stream_event_pair stream_event) + block_type get_block(std::size_t size, stream_event_pair stream_event) { // Try to find a satisfactory block in free list for the same stream (no sync required) auto iter = stream_free_blocks_.find(stream_event); @@ -352,7 +355,7 @@ class stream_ordered_memory_resource : public crtp, public device_ * @return A block with non-null pointer and size >= `size`, or a nullptr block if none is * available in `blocks`. */ - block_type get_block_from_other_stream(size_t size, + block_type get_block_from_other_stream(std::size_t size, stream_event_pair stream_event, free_list& blocks, bool merge_first) diff --git a/include/rmm/mr/device/fixed_size_memory_resource.hpp b/include/rmm/mr/device/fixed_size_memory_resource.hpp index fe5f9707b..b744910a0 100644 --- a/include/rmm/mr/device/fixed_size_memory_resource.hpp +++ b/include/rmm/mr/device/fixed_size_memory_resource.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include #include @@ -129,10 +130,10 @@ class fixed_size_memory_resource /** * @brief Get the (fixed) size of allocations supported by this memory resource * - * @return size_t The (fixed) maximum size of a single allocation supported by this memory + * @return std::size_t The (fixed) maximum size of a single allocation supported by this memory * resource */ - size_t get_maximum_allocation_size() const { return get_block_size(); } + std::size_t get_maximum_allocation_size() const { return get_block_size(); } /** * @brief Allocate a block from upstream to supply the suballocation pool. @@ -144,7 +145,7 @@ class fixed_size_memory_resource * @param stream The stream on which the memory is to be used. * @return block_type The allocated block */ - block_type expand_pool(size_t size, free_list& blocks, cuda_stream_view stream) + block_type expand_pool(std::size_t size, free_list& blocks, cuda_stream_view stream) { blocks.insert(std::move(blocks_from_upstream(stream))); return blocks.get_block(size); @@ -181,7 +182,7 @@ class fixed_size_memory_resource * @return A pair comprising the allocated pointer and any unallocated remainder of the input * block. */ - split_block allocate_from_block(block_type const& b, size_t size) + split_block allocate_from_block(block_type const& b, std::size_t size) { return split_block{b.pointer(), block_type{nullptr}}; } @@ -195,7 +196,7 @@ class fixed_size_memory_resource * @return The (now freed) block associated with `p`. The caller is expected to return the block * to the pool. */ - block_type free_block(void* p, size_t size) noexcept + block_type free_block(void* p, std::size_t size) noexcept { // Deallocating a fixed-size block just inserts it in the free list, which is // handled by the parent class diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp index d2ce01a1a..5002962d5 100644 --- a/include/rmm/mr/device/limiting_resource_adaptor.hpp +++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,8 @@ #include #include +#include + namespace rmm { namespace mr { /** @@ -185,7 +187,7 @@ class limiting_resource_adaptor final : public device_memory_resource { * @param stream Stream on which to get the mem info. * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + std::pair do_get_mem_info(cuda_stream_view stream) const override { return {allocation_limit_ - allocated_bytes_, allocation_limit_}; } @@ -213,7 +215,7 @@ class limiting_resource_adaptor final : public device_memory_resource { */ template limiting_resource_adaptor make_limiting_adaptor(Upstream* upstream, - size_t allocation_limit) + std::size_t allocation_limit) { return limiting_resource_adaptor{upstream, allocation_limit}; } diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp index bd5c38685..1148afa88 100644 --- a/include/rmm/mr/device/logging_resource_adaptor.hpp +++ b/include/rmm/mr/device/logging_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,22 +15,16 @@ */ #pragma once -#include -#include - #include #include +#include -// If using GCC, temporary workaround for older libcudacxx defining _LIBCPP_VERSION -// undefine it before including spdlog, due to fmtlib checking if it is defined -// TODO: remove once libcudacxx is on Github and RAPIDS depends on it -#ifdef __GNUG__ -#undef _LIBCPP_VERSION -#endif +#include #include #include #include +#include #include #include @@ -274,7 +268,7 @@ class logging_resource_adaptor final : public device_memory_resource { * @param stream Stream on which to get the mem info. * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + std::pair do_get_mem_info(cuda_stream_view stream) const override { return upstream_->get_mem_info(stream); } diff --git a/include/rmm/mr/device/managed_memory_resource.hpp b/include/rmm/mr/device/managed_memory_resource.hpp index d0ec75de8..ebce40bf5 100644 --- a/include/rmm/mr/device/managed_memory_resource.hpp +++ b/include/rmm/mr/device/managed_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,11 +15,13 @@ */ #pragma once -#include "device_memory_resource.hpp" +#include #include #include +#include + namespace rmm { namespace mr { /** @@ -113,7 +115,7 @@ class managed_memory_resource final : public device_memory_resource { * @param stream to execute on * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + std::pair do_get_mem_info(cuda_stream_view stream) const override { std::size_t free_size{}; std::size_t total_size{}; diff --git a/include/rmm/mr/device/owning_wrapper.hpp b/include/rmm/mr/device/owning_wrapper.hpp index cee32e0fe..6abe950b0 100644 --- a/include/rmm/mr/device/owning_wrapper.hpp +++ b/include/rmm/mr/device/owning_wrapper.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ */ #pragma once -#include "device_memory_resource.hpp" +#include #include #include diff --git a/include/rmm/mr/device/polymorphic_allocator.hpp b/include/rmm/mr/device/polymorphic_allocator.hpp index a43b3bd36..4f97cf568 100644 --- a/include/rmm/mr/device/polymorphic_allocator.hpp +++ b/include/rmm/mr/device/polymorphic_allocator.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,13 +16,14 @@ #pragma once -#include -#include - #include #include #include +#include +#include +#include + namespace rmm { namespace mr { diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index 8b09e5208..cfdebf786 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -31,7 +31,7 @@ #include #include -#include +#include #include #include #include @@ -141,11 +141,14 @@ class pool_memory_resource final * @brief Get the maximum size of allocations supported by this memory resource * * Note this does not depend on the memory size of the device. It simply returns the maximum - * value of `size_t` + * value of `std::size_t` * - * @return size_t The maximum size of a single allocation supported by this memory resource + * @return std::size_t The maximum size of a single allocation supported by this memory resource */ - size_t get_maximum_allocation_size() const { return std::numeric_limits::max(); } + std::size_t get_maximum_allocation_size() const + { + return std::numeric_limits::max(); + } /** * @brief Try to expand the pool by allocating a block of at least `min_size` bytes from @@ -247,7 +250,7 @@ class pool_memory_resource final * Returns 0 if the requested size cannot be satisfied. * * @param size The size of the minimum allocation immediately needed - * @return size_t The computed size to grow the pool. + * @return std::size_t The computed size to grow the pool. */ std::size_t size_to_grow(std::size_t size) const { @@ -268,7 +271,7 @@ class pool_memory_resource final * @param stream The stream on which the memory is to be used. * @return block_type The allocated block */ - thrust::optional block_from_upstream(size_t size, cuda_stream_view stream) + thrust::optional block_from_upstream(std::size_t size, cuda_stream_view stream) { RMM_LOG_DEBUG("[A][Stream {}][Upstream {}B]", fmt::ptr(stream.value()), size); @@ -294,7 +297,7 @@ class pool_memory_resource final * @return A pair comprising the allocated pointer and any unallocated remainder of the input * block. */ - split_block allocate_from_block(block_type const& b, size_t size) + split_block allocate_from_block(block_type const& b, std::size_t size) { block_type const alloc{b.pointer(), size, b.is_head()}; #ifdef RMM_POOL_TRACK_ALLOCATIONS @@ -315,7 +318,7 @@ class pool_memory_resource final * @return The (now freed) block associated with `p`. The caller is expected to return the block * to the pool. */ - block_type free_block(void* p, size_t size) noexcept + block_type free_block(void* p, std::size_t size) noexcept { #ifdef RMM_POOL_TRACK_ALLOCATIONS if (p == nullptr) return block_type{}; @@ -338,9 +341,9 @@ class pool_memory_resource final * * Includes allocated as well as free memory. * - * @return size_t The total size of the currently allocated pool. + * @return std::size_t The total size of the currently allocated pool. */ - size_t pool_size() const noexcept { return current_pool_size_; } + std::size_t pool_size() const noexcept { return current_pool_size_; } /** * @brief Free all memory allocated from the upstream memory_resource. @@ -419,7 +422,7 @@ class pool_memory_resource final * @param stream to execute on * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + std::pair do_get_mem_info(cuda_stream_view stream) const override { std::size_t free_size{}; std::size_t total_size{}; diff --git a/include/rmm/mr/device/statistics_resource_adaptor.hpp b/include/rmm/mr/device/statistics_resource_adaptor.hpp index d71dc52d4..bcc0bf10b 100644 --- a/include/rmm/mr/device/statistics_resource_adaptor.hpp +++ b/include/rmm/mr/device/statistics_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,10 @@ */ #pragma once -#include #include + +#include +#include #include namespace rmm { diff --git a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp index b0b2f1273..2675a4df2 100644 --- a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp +++ b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include #include namespace rmm { @@ -142,7 +143,7 @@ class thread_safe_resource_adaptor final : public device_memory_resource { * @param stream Stream on which to get the mem info. * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + std::pair do_get_mem_info(cuda_stream_view stream) const override { lock_t lock(mtx); return upstream_->get_mem_info(stream); diff --git a/include/rmm/mr/device/thrust_allocator_adaptor.hpp b/include/rmm/mr/device/thrust_allocator_adaptor.hpp index e7acd00fd..d841304a6 100644 --- a/include/rmm/mr/device/thrust_allocator_adaptor.hpp +++ b/include/rmm/mr/device/thrust_allocator_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,12 +16,12 @@ #pragma once -#include -#include - #include #include +#include +#include + namespace rmm { namespace mr { /** diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp index 0e2ca9c6e..1a32a1c44 100644 --- a/include/rmm/mr/device/tracking_resource_adaptor.hpp +++ b/include/rmm/mr/device/tracking_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,11 +15,13 @@ */ #pragma once -#include -#include #include #include #include + +#include +#include +#include #include #include diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp index aaf1d9c25..0f27cbf3c 100644 --- a/include/rmm/mr/host/new_delete_resource.hpp +++ b/include/rmm/mr/host/new_delete_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ */ #pragma once -#include "host_memory_resource.hpp" +#include #include diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp index 5188644cd..d00a5cffe 100644 --- a/include/rmm/mr/host/pinned_memory_resource.hpp +++ b/include/rmm/mr/host/pinned_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include From 86e78590e673c3d6b440af43667d66bac82ea915 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 26 Aug 2021 08:53:02 +1000 Subject: [PATCH 02/72] Fix copyright. --- include/rmm/cuda_stream_pool.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rmm/cuda_stream_pool.hpp b/include/rmm/cuda_stream_pool.hpp index 27decc9fd..c0ef1dbce 100644 --- a/include/rmm/cuda_stream_pool.hpp +++ b/include/rmm/cuda_stream_pool.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From b9f7b42f844d0101531bd23428571c182f614d47 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 31 Aug 2021 09:18:44 +1000 Subject: [PATCH 03/72] More missed std::size_t --- .../multi_stream_allocations_bench.cu | 4 ++- .../random_allocations/random_allocations.cpp | 35 ++++++++++--------- tests/mr/device/mr_multithreaded_tests.cpp | 2 +- tests/mr/device/mr_test.hpp | 4 +-- 4 files changed, 24 insertions(+), 21 deletions(-) diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu index 6c763fcb7..7d0a8a17a 100644 --- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu +++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu @@ -31,6 +31,8 @@ #include +#include + __global__ void compute_bound_kernel(int64_t* out) { clock_t clock_begin = clock64(); @@ -56,7 +58,7 @@ static void run_prewarm(rmm::cuda_stream_pool& stream_pool, rmm::mr::device_memo } } -static void run_test(size_t num_kernels, +static void run_test(std::size_t num_kernels, rmm::cuda_stream_pool& stream_pool, rmm::mr::device_memory_resource* mr) { diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index 6d43b0677..a69b26b91 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -61,8 +62,8 @@ allocation remove_at(allocation_vector& allocs, std::size_t index) template void random_allocation_free(rmm::mr::device_memory_resource& mr, SizeDistribution size_distribution, - size_t num_allocations, - size_t max_usage, // in MiB + std::size_t num_allocations, + std::size_t max_usage, // in MiB rmm::cuda_stream_view stream = {}) { std::default_random_engine generator; @@ -77,11 +78,11 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr, std::size_t allocation_count{0}; allocation_vector allocations{}; - size_t allocation_size{0}; + std::size_t allocation_size{0}; for (std::size_t i = 0; i < num_allocations * 2; ++i) { bool do_alloc = true; - size_t size = static_cast(size_distribution(generator)); + auto size = static_cast(size_distribution(generator)); if (active_allocations > 0) { int chance = op_distribution(generator); @@ -113,7 +114,7 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr, #endif } else { // dealloc, or alloc failed if (active_allocations > 0) { - size_t index = index_distribution(generator) % active_allocations; + std::size_t index = index_distribution(generator) % active_allocations; active_allocations--; allocation to_free = remove_at(allocations, index); mr.deallocate(to_free.p, to_free.size, stream); @@ -136,9 +137,9 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr, } // namespace void uniform_random_allocations(rmm::mr::device_memory_resource& mr, - size_t num_allocations, - size_t max_allocation_size, // in MiB - size_t max_usage, + std::size_t num_allocations, + std::size_t max_allocation_size, // in MiB + std::size_t max_usage, rmm::cuda_stream_view stream = {}) { std::uniform_int_distribution size_distribution(1, max_allocation_size * size_mb); @@ -147,10 +148,10 @@ void uniform_random_allocations(rmm::mr::device_memory_resource& mr, // TODO figure out how to map a normal distribution to integers between 1 and max_allocation_size /*void normal_random_allocations(rmm::mr::device_memory_resource& mr, - size_t num_allocations = 1000, - size_t mean_allocation_size = 500, // in MiB - size_t stddev_allocation_size = 500, // in MiB - size_t max_usage = 8 << 20, + std::size_t num_allocations = 1000, + std::size_t mean_allocation_size = 500, // in MiB + std::size_t stddev_allocation_size = 500, // in MiB + std::size_t max_usage = 8 << 20, cuda_stream_view stream) { std::normal_distribution size_distribution(, max_allocation_size * size_mb); }*/ @@ -181,14 +182,14 @@ inline auto make_binning() using MRFactoryFunc = std::function()>; -constexpr size_t max_usage = 16000; +constexpr std::size_t max_usage = 16000; static void BM_RandomAllocations(benchmark::State& state, MRFactoryFunc factory) { auto mr = factory(); - size_t num_allocations = state.range(0); - size_t max_size = state.range(1); + std::size_t num_allocations = state.range(0); + std::size_t max_size = state.range(1); try { for (auto _ : state) @@ -252,8 +253,8 @@ void declare_benchmark(std::string name) } static void profile_random_allocations(MRFactoryFunc factory, - size_t num_allocations, - size_t max_size) + std::size_t num_allocations, + std::size_t max_size) { auto mr = factory(); diff --git a/tests/mr/device/mr_multithreaded_tests.cpp b/tests/mr/device/mr_multithreaded_tests.cpp index dbacaba71..233686f7e 100644 --- a/tests/mr/device/mr_multithreaded_tests.cpp +++ b/tests/mr/device/mr_multithreaded_tests.cpp @@ -175,7 +175,7 @@ void allocate_loop(rmm::mr::device_memory_resource* mr, std::uniform_int_distribution size_distribution(1, max_size); for (std::size_t i = 0; i < num_allocations; ++i) { - size_t size = size_distribution(generator); + std::size_t size = size_distribution(generator); void* ptr{}; EXPECT_NO_THROW(ptr = mr->allocate(size, stream)); { diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index 51a7e41ac..27da69fbe 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -179,7 +179,7 @@ inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* m } if (do_alloc) { - size_t size = size_distribution(generator); + std::size_t size = size_distribution(generator); active_allocations++; allocation_count++; EXPECT_NO_THROW(allocations.emplace_back(mr->allocate(size, stream), size)); @@ -187,7 +187,7 @@ inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* m EXPECT_NE(nullptr, new_allocation.p); EXPECT_TRUE(is_pointer_aligned(new_allocation.p)); } else { - size_t index = index_distribution(generator) % active_allocations; + std::size_t index = index_distribution(generator) % active_allocations; active_allocations--; allocation to_free = allocations[index]; allocations.erase(std::next(allocations.begin(), index)); From d7f1a32c6c41f2dcc1f1fd81eca74c0706a8ae18 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 31 Aug 2021 09:19:34 +1000 Subject: [PATCH 04/72] doc --- include/rmm/mr/device/device_memory_resource.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp index 4b5011d1d..d9817a933 100644 --- a/include/rmm/mr/device/device_memory_resource.hpp +++ b/include/rmm/mr/device/device_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -162,8 +162,8 @@ class device_memory_resource { * * @param stream the stream whose memory manager we want to retrieve * - * @returns a std::pair which contains free memory in bytes - * in .first and total amount of memory in .second + * @returns a pair containing the free memory in bytes in .first and total amount of memory in + * .second */ std::pair get_mem_info(cuda_stream_view stream) const { From 97f5571412f0de94552f2d880448fdd6828b6dde Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 31 Aug 2021 11:06:54 +1000 Subject: [PATCH 05/72] .clang-tidy and initial fixes --- .clang-tidy | 56 ++++++ .../random_allocations/random_allocations.cpp | 118 +++++++----- include/rmm/cuda_stream_view.hpp | 56 +++--- include/rmm/detail/aligned.hpp | 22 +-- include/rmm/detail/cuda_util.hpp | 9 +- include/rmm/detail/error.hpp | 11 +- include/rmm/detail/stack_trace.hpp | 31 ++-- include/rmm/device_uvector.hpp | 80 ++++---- include/rmm/logger.hpp | 6 +- include/rmm/mr/device/detail/arena.hpp | 172 +++++++++--------- 10 files changed, 318 insertions(+), 243 deletions(-) create mode 100644 .clang-tidy diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 000000000..997fd9e6e --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,56 @@ +--- +Checks: 'clang-diagnostic-*, + clang-analyzer-*, + cppcoreguidelines-*, + modernize-*, + bugprone-*, + performance-*, + readability-*, + llvm-*, + -modernize-use-trailing-return-type' +WarningsAsErrors: '' +HeaderFilterRegex: '' +AnalyzeTemporaryDtors: false +FormatStyle: none +CheckOptions: + - key: cert-dcl16-c.NewSuffixes + value: 'L;LL;LU;LLU' + - key: cert-oop54-cpp.WarnOnlyIfThisHasSuspiciousField + value: '0' + - key: cert-str34-c.DiagnoseSignedUnsignedCharComparisons + value: '0' + - key: cppcoreguidelines-explicit-virtual-functions.IgnoreDestructors + value: '1' + - key: cppcoreguidelines-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic + value: '1' + - key: google-readability-braces-around-statements.ShortStatementLines + value: '1' + - key: google-readability-function-size.StatementThreshold + value: '800' + - key: google-readability-namespace-comments.ShortNamespaceLines + value: '10' + - key: google-readability-namespace-comments.SpacesBeforeComments + value: '2' + - key: llvm-else-after-return.WarnOnConditionVariables + value: '0' + - key: llvm-else-after-return.WarnOnUnfixable + value: '0' + - key: llvm-qualified-auto.AddConstToQualified + value: '0' + - key: modernize-loop-convert.MaxCopySize + value: '16' + - key: modernize-loop-convert.MinConfidence + value: reasonable + - key: modernize-loop-convert.NamingStyle + value: CamelCase + - key: modernize-pass-by-value.IncludeStyle + value: llvm + - key: modernize-replace-auto-ptr.IncludeStyle + value: llvm + - key: modernize-use-nullptr.NullMacros + value: 'NULL' + - key: readability-identifier-length.IgnoredParameterNames + value: 'mr|os' + - key: readability-identifier-length.IgnoredVariableNames + value: 'mr|_' +... diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index a69b26b91..dab65b769 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -38,9 +38,9 @@ namespace { constexpr std::size_t size_mb{1 << 20}; struct allocation { - void* p{nullptr}; + void* ptr{nullptr}; std::size_t size{0}; - allocation(void* _p, std::size_t _size) : p{_p}, size{_size} {} + allocation(void* ptr, std::size_t size) : ptr{ptr}, size{size} {} allocation() = default; }; @@ -70,9 +70,10 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr, max_usage *= size_mb; // convert to bytes - constexpr int allocation_probability = 73; // percent - std::uniform_int_distribution op_distribution(0, 99); - std::uniform_int_distribution index_distribution(0, num_allocations - 1); + constexpr int allocation_probability{73}; // percent + constexpr int max_op_chance{99}; + std::uniform_int_distribution op_distribution(0, max_op_chance); + std::uniform_int_distribution index_distribution(0, static_cast(num_allocations) - 1); int active_allocations{0}; std::size_t allocation_count{0}; @@ -117,7 +118,7 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr, std::size_t index = index_distribution(generator) % active_allocations; active_allocations--; allocation to_free = remove_at(allocations, index); - mr.deallocate(to_free.p, to_free.size, stream); + mr.deallocate(to_free.ptr, to_free.size, stream); allocation_size -= to_free.size; #if VERBOSE @@ -136,11 +137,12 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr, } } // namespace -void uniform_random_allocations(rmm::mr::device_memory_resource& mr, - std::size_t num_allocations, - std::size_t max_allocation_size, // in MiB - std::size_t max_usage, - rmm::cuda_stream_view stream = {}) +void uniform_random_allocations( + rmm::mr::device_memory_resource& mr, + std::size_t num_allocations, // NOLINT(bugprone-easily-swappable-parameters) + std::size_t max_allocation_size, // size in MiB + std::size_t max_usage, + rmm::cuda_stream_view stream = {}) { std::uniform_int_distribution size_distribution(1, max_allocation_size * size_mb); random_allocation_free(mr, size_distribution, num_allocations, max_usage, stream); @@ -176,7 +178,10 @@ inline auto make_binning() auto pool = make_pool(); // Add a binning_memory_resource with fixed-size bins of sizes 256, 512, 1024, 2048 and 4096KiB // Larger allocations will use the pool resource - auto mr = rmm::mr::make_owning_wrapper(pool, 18, 22); + constexpr auto min_bin_pow2{18}; + constexpr auto max_bin_pow2{22}; + auto mr = rmm::mr::make_owning_wrapper( + pool, min_bin_pow2, max_bin_pow2); return mr; } @@ -184,7 +189,7 @@ using MRFactoryFunc = std::function{1000, 10000, 100000}) - b->Args({num_allocations, size})->Unit(benchmark::kMillisecond); + for (int num_allocations : std::vector{1000, 10000, 100000}) { + bench->Args({num_allocations, size})->Unit(benchmark::kMillisecond); + } } -static void size_range(benchmark::internal::Benchmark* b, int num) +static void size_range(benchmark::internal::Benchmark* bench, int num) { - for (int max_size : std::vector{1, 4, 64, 256, 1024, 4096}) - b->Args({num, max_size})->Unit(benchmark::kMillisecond); + for (int max_size : std::vector{1, 4, 64, 256, 1024, 4096}) { + bench->Args({num, max_size})->Unit(benchmark::kMillisecond); + } } -static void num_size_range(benchmark::internal::Benchmark* b) +static void num_size_range(benchmark::internal::Benchmark* bench) { - for (int num_allocations : std::vector{1000, 10000, 100000}) - size_range(b, num_allocations); + for (int num_allocations : std::vector{1000, 10000, 100000}) { + size_range(bench, num_allocations); + } } -int num_allocations = -1; -int max_size = -1; +int num_allocations = -1; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) +int max_size = -1; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) -static void benchmark_range(benchmark::internal::Benchmark* b) +void benchmark_range(benchmark::internal::Benchmark* bench) { if (num_allocations > 0) { - if (max_size > 0) - b->Args({num_allocations, max_size})->Unit(benchmark::kMillisecond); - else - size_range(b, num_allocations); + if (max_size > 0) { + bench->Args({num_allocations, max_size})->Unit(benchmark::kMillisecond); + } else { + size_range(bench, num_allocations); + } } else { - if (max_size > 0) - num_range(b, max_size); - else - num_size_range(b); + if (max_size > 0) { + num_range(bench, max_size); + } else { + num_size_range(bench); + } } } -void declare_benchmark(std::string name) +void declare_benchmark(const std::string& name) { - if (name == "cuda") - BENCHMARK_CAPTURE(BM_RandomAllocations, cuda_mr, &make_cuda)->Apply(benchmark_range); - if (name == "cuda_async") - BENCHMARK_CAPTURE(BM_RandomAllocations, cuda_async_mr, &make_cuda_async) + if (name == "cuda") { + BENCHMARK_CAPTURE(BM_RandomAllocations, cuda_mr, &make_cuda) // NOLINT ->Apply(benchmark_range); - else if (name == "binning") - BENCHMARK_CAPTURE(BM_RandomAllocations, binning_mr, &make_binning)->Apply(benchmark_range); - else if (name == "pool") - BENCHMARK_CAPTURE(BM_RandomAllocations, pool_mr, &make_pool)->Apply(benchmark_range); - else if (name == "arena") - BENCHMARK_CAPTURE(BM_RandomAllocations, arena_mr, &make_arena)->Apply(benchmark_range); - else + } + if (name == "cuda_async") { + BENCHMARK_CAPTURE(BM_RandomAllocations, cuda_async_mr, &make_cuda_async) // NOLINT + ->Apply(benchmark_range); + } else if (name == "binning") { + BENCHMARK_CAPTURE(BM_RandomAllocations, binning_mr, &make_binning) // NOLINT + ->Apply(benchmark_range); + } else if (name == "pool") { + BENCHMARK_CAPTURE(BM_RandomAllocations, pool_mr, &make_pool) // NOLINT + ->Apply(benchmark_range); + } else if (name == "arena") { + BENCHMARK_CAPTURE(BM_RandomAllocations, arena_mr, &make_arena) // NOLINT + ->Apply(benchmark_range); + } else { std::cout << "Error: invalid memory_resource name: " << name << "\n"; + } } -static void profile_random_allocations(MRFactoryFunc factory, +static void profile_random_allocations(const MRFactoryFunc& factory, std::size_t num_allocations, std::size_t max_size) { @@ -320,11 +337,12 @@ int main(int argc, char** argv) declare_benchmark(mr_name); } else { #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - std::array mrs{"pool", "binning", "arena", "cuda_async", "cuda"}; + std::vector mrs{"pool", "binning", "arena", "cuda_async", "cuda"}; #else - std::array mrs{"pool", "binning", "arena", "cuda"}; + std::vector mrs{"pool", "binning", "arena", "cuda"}; #endif - std::for_each(std::cbegin(mrs), std::cend(mrs), [](auto const& s) { declare_benchmark(s); }); + std::for_each( + std::cbegin(mrs), std::cend(mrs), [](auto const& mr) { declare_benchmark(mr); }); } ::benchmark::RunSpecifiedBenchmarks(); } diff --git a/include/rmm/cuda_stream_view.hpp b/include/rmm/cuda_stream_view.hpp index c80d4de2f..f913609f9 100644 --- a/include/rmm/cuda_stream_view.hpp +++ b/include/rmm/cuda_stream_view.hpp @@ -54,7 +54,7 @@ class cuda_stream_view { * * @return cudaStream_t The wrapped stream. */ - constexpr cudaStream_t value() const noexcept { return stream_; } + [[nodiscard]] constexpr cudaStream_t value() const noexcept { return stream_; } /** * @brief Implicit conversion to cudaStream_t. @@ -64,26 +64,12 @@ class cuda_stream_view { /** * @brief Return true if the wrapped stream is the CUDA per-thread default stream. */ - bool is_per_thread_default() const noexcept - { -#ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM - return value() == cudaStreamPerThread || value() == 0; -#else - return value() == cudaStreamPerThread; -#endif - } + [[nodiscard]] inline bool is_per_thread_default() const noexcept; /** * @brief Return true if the wrapped stream is explicitly the CUDA legacy default stream. */ - bool is_default() const noexcept - { -#ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM - return value() == cudaStreamLegacy; -#else - return value() == cudaStreamLegacy || value() == 0; -#endif - } + [[nodiscard]] inline bool is_default() const noexcept; /** * @brief Synchronize the viewed CUDA stream. @@ -105,7 +91,7 @@ class cuda_stream_view { } private: - cudaStream_t stream_{0}; + cudaStream_t stream_{}; }; /** @@ -116,12 +102,38 @@ static constexpr cuda_stream_view cuda_stream_default{}; /** * @brief Static cuda_stream_view of cudaStreamLegacy, for convenience */ -static cuda_stream_view cuda_stream_legacy{cudaStreamLegacy}; + +static const cuda_stream_view cuda_stream_legacy{ + cudaStreamLegacy // NOLINT(cppcoreguidelines-pro-type-cstyle-cast) +}; /** * @brief Static cuda_stream_view of cudaStreamPerThread, for convenience */ -static cuda_stream_view cuda_stream_per_thread{cudaStreamPerThread}; +static const cuda_stream_view cuda_stream_per_thread{ + cudaStreamPerThread // NOLINT(cppcoreguidelines-pro-type-cstyle-cast) +}; + +[[nodiscard]] inline bool cuda_stream_view::is_per_thread_default() const noexcept +{ +#ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM + return value() == cuda_stream_per_thread || value() == nullptr; +#else + return value() == cuda_stream_per_thread; +#endif +} + +/** + * @brief Return true if the wrapped stream is explicitly the CUDA legacy default stream. + */ +[[nodiscard]] inline bool cuda_stream_view::is_default() const noexcept +{ +#ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM + return value() == cuda_stream_legacy; +#else + return value() == cuda_stream_legacy || value() == nullptr; +#endif +} /** * @brief Equality comparison operator for streams @@ -151,9 +163,9 @@ inline bool operator!=(cuda_stream_view lhs, cuda_stream_view rhs) { return not( * @param sv The cuda_stream_view to output * @return std::ostream& The output ostream */ -inline std::ostream& operator<<(std::ostream& os, cuda_stream_view sv) +inline std::ostream& operator<<(std::ostream& os, cuda_stream_view stream) { - os << sv.value(); + os << stream.value(); return os; } diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index 17973d033..d4612844e 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,8 +22,7 @@ #include #include -namespace rmm { -namespace detail { +namespace rmm::detail { /** * @brief Default alignment used for host memory allocated by RMM. @@ -41,7 +40,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; * @brief Returns whether or not `n` is a power of 2. * */ -constexpr bool is_pow2(std::size_t n) { return (0 == (n & (n - 1))); } +constexpr bool is_pow2(std::size_t value) { return (0 == (value & (value - 1))); } /** * @brief Returns whether or not `alignment` is a valid memory alignment. @@ -57,10 +56,10 @@ constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(al * * @return Return the aligned value, as one would expect */ -constexpr std::size_t align_up(std::size_t v, std::size_t align_bytes) noexcept +constexpr std::size_t align_up(std::size_t value, std::size_t align_bytes) noexcept { assert(is_supported_alignment(align_bytes)); - return (v + (align_bytes - 1)) & ~(align_bytes - 1); + return (value + (align_bytes - 1)) & ~(align_bytes - 1); } /** @@ -71,10 +70,10 @@ constexpr std::size_t align_up(std::size_t v, std::size_t align_bytes) noexcept * * @return Return the aligned value, as one would expect */ -constexpr std::size_t align_down(std::size_t v, std::size_t align_bytes) noexcept +constexpr std::size_t align_down(std::size_t value, std::size_t align_bytes) noexcept { assert(is_supported_alignment(align_bytes)); - return v & ~(align_bytes - 1); + return value & ~(align_bytes - 1); } /** @@ -85,10 +84,10 @@ constexpr std::size_t align_down(std::size_t v, std::size_t align_bytes) noexcep * * @return true if aligned */ -constexpr bool is_aligned(std::size_t v, std::size_t align_bytes) noexcept +constexpr bool is_aligned(std::size_t value, std::size_t align_bytes) noexcept { assert(is_supported_alignment(align_bytes)); - return v == align_down(v, align_bytes); + return value == align_down(value, align_bytes); } /** @@ -171,5 +170,4 @@ void aligned_deallocate(void* p, std::size_t bytes, std::size_t alignment, Deall dealloc(original); } -} // namespace detail -} // namespace rmm +} // namespace rmm::detail diff --git a/include/rmm/detail/cuda_util.hpp b/include/rmm/detail/cuda_util.hpp index d60bb560f..613b8d156 100644 --- a/include/rmm/detail/cuda_util.hpp +++ b/include/rmm/detail/cuda_util.hpp @@ -17,16 +17,15 @@ #include -namespace rmm { -namespace detail { +namespace rmm::detail { /// Gets the available and total device memory in bytes for the current device inline std::pair available_device_memory() { - std::size_t free{}, total{}; + std::size_t free{}; + std::size_t total{}; RMM_CUDA_TRY(cudaMemGetInfo(&free, &total)); return {free, total}; } -} // namespace detail -} // namespace rmm +} // namespace rmm::detail diff --git a/include/rmm/detail/error.hpp b/include/rmm/detail/error.hpp index 057f67ba3..7c052ea8d 100644 --- a/include/rmm/detail/error.hpp +++ b/include/rmm/detail/error.hpp @@ -51,15 +51,10 @@ struct cuda_error : public std::runtime_error { */ class bad_alloc : public std::bad_alloc { public: - bad_alloc(const char* w) : std::bad_alloc{}, _what{std::string{std::bad_alloc::what()} + ": " + w} - { - } - - bad_alloc(std::string const& w) : bad_alloc(w.c_str()) {} - - virtual ~bad_alloc() = default; + bad_alloc(const char* msg) : _what{std::string{std::bad_alloc::what()} + ": " + msg} {} + bad_alloc(std::string const& msg) : bad_alloc(msg.c_str()) {} - virtual const char* what() const noexcept { return _what.c_str(); } + [[nodiscard]] const char* what() const noexcept override { return _what.c_str(); } private: std::string _what; diff --git a/include/rmm/detail/stack_trace.hpp b/include/rmm/detail/stack_trace.hpp index 1e218fa53..93d8fe555 100644 --- a/include/rmm/detail/stack_trace.hpp +++ b/include/rmm/detail/stack_trace.hpp @@ -33,9 +33,7 @@ #include #endif -namespace rmm { - -namespace detail { +namespace rmm::detail { /** * @brief stack_trace is a class that will capture a stack on instatiation for output later. @@ -52,36 +50,37 @@ class stack_trace { { #if defined(RMM_ENABLE_STACK_TRACES) const int MaxStackDepth = 64; - void* stack[MaxStackDepth]; - auto const depth = backtrace(stack, MaxStackDepth); - stack_ptrs.insert(stack_ptrs.end(), &stack[0], &stack[depth]); + std::array stack{}; + auto const depth = backtrace(stack.begin(), MaxStackDepth); + stack_ptrs.insert(stack_ptrs.end(), stack.begin(), &stack.at(depth)); #endif // RMM_ENABLE_STACK_TRACES } - friend std::ostream& operator<<(std::ostream& os, const stack_trace& st) + friend std::ostream& operator<<(std::ostream& os, const stack_trace& trace) { #if defined(RMM_ENABLE_STACK_TRACES) std::unique_ptr strings( - backtrace_symbols(st.stack_ptrs.data(), st.stack_ptrs.size()), &::free); + backtrace_symbols(trace.stack_ptrs.data(), static_cast(trace.stack_ptrs.size())), + &::free); - if (strings.get() == nullptr) { + if (strings == nullptr) { os << "But no stack trace could be found!" << std::endl; } else { // Iterate over the stack pointers converting to a string - for (std::size_t i = 0; i < st.stack_ptrs.size(); ++i) { + for (std::size_t i = 0; i < trace.stack_ptrs.size(); ++i) { // Leading index os << "#" << i << " in "; auto const str = [&] { Dl_info info; - if (dladdr(st.stack_ptrs[i], &info)) { + if (dladdr(trace.stack_ptrs[i], &info) != 0) { int status = -1; // Demangle the name. This can occasionally fail std::unique_ptr demangled( - abi::__cxa_demangle(info.dli_sname, nullptr, 0, &status), &::free); + abi::__cxa_demangle(info.dli_sname, nullptr, nullptr, &status), &::free); // If it fails, fallback to the dli_name. - if (status == 0 or info.dli_sname) { - auto name = status == 0 ? demangled.get() : info.dli_sname; + if (status == 0 or (info.dli_sname != nullptr)) { + auto const* name = status == 0 ? demangled.get() : info.dli_sname; return name + std::string(" from ") + info.dli_fname; } } @@ -103,6 +102,4 @@ class stack_trace { #endif // RMM_ENABLE_STACK_TRACES }; -} // namespace detail - -} // namespace rmm +} // namespace rmm::detail diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp index ca4cf6d30..f7f55a910 100644 --- a/include/rmm/device_uvector.hpp +++ b/include/rmm/device_uvector.hpp @@ -84,9 +84,9 @@ class device_uvector { ~device_uvector() = default; RMM_EXEC_CHECK_DISABLE - device_uvector(device_uvector&&) = default; + device_uvector(device_uvector&&) noexcept = default; - device_uvector& operator=(device_uvector&&) = default; + device_uvector& operator=(device_uvector&&) noexcept = default; /** * @brief Copy ctor is deleted as it doesn't allow a stream argument @@ -203,24 +203,20 @@ class device_uvector { * @param v The value to copy to the specified element * @param s The stream on which to perform the copy */ - void set_element_async(std::size_t element_index, value_type const& v, cuda_stream_view s) + void set_element_async(std::size_t element_index, + value_type const& value, + cuda_stream_view stream) { RMM_EXPECTS( element_index < size(), rmm::out_of_range, "Attempt to access out of bounds element."); - if constexpr (std::is_fundamental::value) { - if constexpr (std::is_same::value) { - RMM_CUDA_TRY(cudaMemsetAsync(element_ptr(element_index), v, sizeof(v), s.value())); - } else { - if (v == value_type{0}) { - set_element_to_zero_async(element_index, s); - } else { - RMM_CUDA_TRY(cudaMemcpyAsync( - element_ptr(element_index), &v, sizeof(v), cudaMemcpyDefault, s.value())); - } - } - } else { + if constexpr (std::is_same::value) { RMM_CUDA_TRY( - cudaMemcpyAsync(element_ptr(element_index), &v, sizeof(v), cudaMemcpyDefault, s.value())); + cudaMemsetAsync(element_ptr(element_index), value, sizeof(value), stream.value())); + } else if (std::is_fundamental::value and value == value_type{0}) { + set_element_to_zero_async(element_index, stream); + } else { + RMM_CUDA_TRY(cudaMemcpyAsync( + element_ptr(element_index), &value, sizeof(value), cudaMemcpyDefault, stream.value())); } } @@ -250,11 +246,12 @@ class device_uvector { * @param element_index Index of the target element * @param s The stream on which to perform the copy */ - void set_element_to_zero_async(std::size_t element_index, cuda_stream_view s) + void set_element_to_zero_async(std::size_t element_index, cuda_stream_view stream) { RMM_EXPECTS( element_index < size(), rmm::out_of_range, "Attempt to access out of bounds element."); - RMM_CUDA_TRY(cudaMemsetAsync(element_ptr(element_index), 0, sizeof(value_type), s.value())); + RMM_CUDA_TRY( + cudaMemsetAsync(element_ptr(element_index), 0, sizeof(value_type), stream.value())); } /** @@ -283,13 +280,13 @@ class device_uvector { * @throws rmm::out_of_range exception if `element_index >= size()` * * @param element_index Index of the target element - * @param v The value to copy to the specified element - * @param s The stream on which to perform the copy + * @param value The value to copy to the specified element + * @param stream The stream on which to perform the copy */ - void set_element(std::size_t element_index, T const& v, cuda_stream_view s) + void set_element(std::size_t element_index, T const& value, cuda_stream_view stream) { - set_element_async(element_index, v, s); - s.synchronize_no_throw(); + set_element_async(element_index, value, stream); + stream.synchronize_no_throw(); } /** @@ -301,18 +298,18 @@ class device_uvector { * @throws rmm::out_of_range exception if `element_index >= size()` * * @param element_index Index of the desired element - * @param s The stream on which to perform the copy + * @param stream The stream on which to perform the copy * @return The value of the specified element */ - value_type element(std::size_t element_index, cuda_stream_view s) const + value_type element(std::size_t element_index, cuda_stream_view stream) const { RMM_EXPECTS( element_index < size(), rmm::out_of_range, "Attempt to access out of bounds element."); - value_type v; - RMM_CUDA_TRY( - cudaMemcpyAsync(&v, element_ptr(element_index), sizeof(v), cudaMemcpyDefault, s.value())); - s.synchronize(); - return v; + value_type value; + RMM_CUDA_TRY(cudaMemcpyAsync( + &value, element_ptr(element_index), sizeof(value), cudaMemcpyDefault, stream.value())); + stream.synchronize(); + return value; } /** @@ -323,10 +320,10 @@ class device_uvector { * * @throws rmm::out_of_range exception if the vector is empty. * - * @param s The stream on which to perform the copy + * @param stream The stream on which to perform the copy * @return The value of the first element */ - value_type front_element(cuda_stream_view s) const { return element(0, s); } + value_type front_element(cuda_stream_view stream) const { return element(0, stream); } /** * @brief Returns the last element. @@ -336,10 +333,10 @@ class device_uvector { * * @throws rmm::out_of_range exception if the vector is empty. * - * @param s The stream on which to perform the copy + * @param stream The stream on which to perform the copy * @return The value of the last element */ - value_type back_element(cuda_stream_view s) const { return element(size() - 1, s); } + value_type back_element(cuda_stream_view stream) const { return element(size() - 1, stream); } /** * @brief Resizes the vector to contain `new_size` elements. @@ -384,7 +381,10 @@ class device_uvector { * @return std::size_t The number of elements that can be stored without requiring a new * allocation. */ - std::size_t capacity() const noexcept { return bytes_to_elements(_storage.capacity()); } + [[nodiscard]] std::size_t capacity() const noexcept + { + return bytes_to_elements(_storage.capacity()); + } /** * @brief Returns pointer to underlying device storage. @@ -468,7 +468,7 @@ class device_uvector { * * @return The number of elements. */ - std::size_t size() const noexcept { return bytes_to_elements(_storage.size()); } + [[nodiscard]] std::size_t size() const noexcept { return bytes_to_elements(_storage.size()); } /** * @brief Returns true if the vector contains no elements, i.e., `size() == 0`. @@ -476,14 +476,14 @@ class device_uvector { * @return true The vector is empty * @return false The vector is not empty */ - bool is_empty() const noexcept { return size() == 0; } + [[nodiscard]] bool is_empty() const noexcept { return size() == 0; } /** * @brief Returns pointer to the resource used to allocate and deallocate the device storage. * * @return Pointer to underlying resource */ - mr::device_memory_resource* memory_resource() const noexcept + [[nodiscard]] mr::device_memory_resource* memory_resource() const noexcept { return _storage.memory_resource(); } @@ -491,12 +491,12 @@ class device_uvector { private: device_buffer _storage{}; ///< Device memory storage for vector elements - std::size_t constexpr elements_to_bytes(std::size_t num_elements) const noexcept + [[nodiscard]] std::size_t constexpr elements_to_bytes(std::size_t num_elements) const noexcept { return num_elements * sizeof(value_type); } - std::size_t constexpr bytes_to_elements(std::size_t num_bytes) const noexcept + [[nodiscard]] std::size_t constexpr bytes_to_elements(std::size_t num_bytes) const noexcept { return num_bytes / sizeof(value_type); } diff --git a/include/rmm/logger.hpp b/include/rmm/logger.hpp index 99a0f7b2e..d0ce63bac 100644 --- a/include/rmm/logger.hpp +++ b/include/rmm/logger.hpp @@ -42,7 +42,7 @@ namespace detail { */ inline std::string default_log_filename() { - auto filename = std::getenv("RMM_DEBUG_LOG_FILE"); + auto* filename = std::getenv("RMM_DEBUG_LOG_FILE"); return (filename == nullptr) ? std::string{"rmm_log.txt"} : std::string{filename}; } @@ -80,8 +80,8 @@ struct logger_wrapper { */ inline spdlog::logger& logger() { - static detail::logger_wrapper w{}; - return w.logger_; + static detail::logger_wrapper wrapped{}; + return wrapped.logger_; } // The default is INFO, but it should be used sparingly, so that by default a log file is only diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 7a449949c..f99c6bf97 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -30,13 +30,10 @@ #include #include -namespace rmm { -namespace mr { -namespace detail { -namespace arena { +namespace rmm::mr::detail::arena { /// Minimum size of a superblock (256 KiB). -constexpr std::size_t minimum_superblock_size = 1u << 18u; +constexpr std::size_t minimum_superblock_size = 1U << 18U; /** * @brief Represents a chunk of memory that can be allocated and deallocated. @@ -67,16 +64,16 @@ class block { block(void* pointer, std::size_t size) : pointer_(static_cast(pointer)), size_(size) {} /// Returns the underlying pointer. - void* pointer() const { return pointer_; } + [[nodiscard]] void* pointer() const { return pointer_; } /// Returns the size of the block. - std::size_t size() const { return size_; } + [[nodiscard]] std::size_t size() const { return size_; } /// Returns true if this block is valid (non-null), false otherwise. - bool is_valid() const { return pointer_ != nullptr; } + [[nodiscard]] bool is_valid() const { return pointer_ != nullptr; } /// Returns true if this block is a superblock, false otherwise. - bool is_superblock() const { return size_ >= minimum_superblock_size; } + [[nodiscard]] bool is_superblock() const { return size_ >= minimum_superblock_size; } /** * @brief Verifies whether this block can be merged to the beginning of block b. @@ -85,7 +82,10 @@ class block { * @return true Returns true if this block's `pointer` + `size` == `b.ptr`, and `not b.is_head`, false otherwise. */ - bool is_contiguous_before(block const& b) const { return pointer_ + size_ == b.pointer_; } + [[nodiscard]] bool is_contiguous_before(block const& blk) const + { + return pointer_ + size_ == blk.pointer_; + } /** * @brief Is this block large enough to fit `sz` bytes? @@ -93,7 +93,7 @@ class block { * @param sz The size in bytes to check for fit. * @return true if this block is at least `sz` bytes. */ - bool fits(std::size_t sz) const { return size_ >= sz; } + [[nodiscard]] bool fits(std::size_t size) const { return size_ >= size; } /** * @brief Split this block into two by the given size. @@ -101,14 +101,11 @@ class block { * @param sz The size in bytes of the first block. * @return std::pair A pair of blocks split by sz. */ - std::pair split(std::size_t sz) const + [[nodiscard]] std::pair split(std::size_t size) const { RMM_LOGGING_ASSERT(size_ >= sz); - if (size_ > sz) { - return {{pointer_, sz}, {pointer_ + sz, size_ - sz}}; - } else { - return {*this, {}}; - } + if (size_ > size) { return {{pointer_, size}, {pointer_ + size, size_ - size}}; } + return {*this, {}}; } /** @@ -119,14 +116,14 @@ class block { * @param b block to merge. * @return block The merged block. */ - block merge(block const& b) const + [[nodiscard]] block merge(block const& blk) const { RMM_LOGGING_ASSERT(is_contiguous_before(b)); - return {pointer_, size_ + b.size_}; + return {pointer_, size_ + blk.size_}; } /// Used by std::set to compare blocks. - bool operator<(block const& b) const { return pointer_ < b.pointer_; } + bool operator<(block const& blk) const { return pointer_ < blk.pointer_; } private: char* pointer_{}; ///< Raw memory pointer. @@ -139,9 +136,9 @@ class block { * @param[in] v value to align * @return Return the aligned value */ -constexpr std::size_t align_up(std::size_t v) noexcept +constexpr std::size_t align_up(std::size_t value) noexcept { - return rmm::detail::align_up(v, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + return rmm::detail::align_up(value, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); } /** @@ -150,9 +147,9 @@ constexpr std::size_t align_up(std::size_t v) noexcept * @param[in] v value to align * @return Return the aligned value */ -constexpr std::size_t align_down(std::size_t v) noexcept +constexpr std::size_t align_down(std::size_t value) noexcept { - return rmm::detail::align_down(v, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + return rmm::detail::align_down(value, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); } /** @@ -172,24 +169,21 @@ constexpr std::size_t align_down(std::size_t v) noexcept inline block first_fit(std::set& free_blocks, std::size_t size) { auto const iter = std::find_if( - free_blocks.cbegin(), free_blocks.cend(), [size](auto const& b) { return b.fits(size); }); + free_blocks.cbegin(), free_blocks.cend(), [size](auto const& blk) { return blk.fits(size); }); - if (iter == free_blocks.cend()) { - return {}; - } else { - // Remove the block from the free_list. - auto const b = *iter; - auto const i = free_blocks.erase(iter); - - if (b.size() > size) { - // Split the block and put the remainder back. - auto const split = b.split(size); - free_blocks.insert(i, split.second); - return split.first; - } else { - return b; - } + if (iter == free_blocks.cend()) { return {}; } + + // Remove the block from the free_list. + auto const blk = *iter; + auto const next = free_blocks.erase(iter); + + if (blk.size() > size) { + // Split the block and put the remainder back. + auto const split = blk.split(size); + free_blocks.insert(next, split.second); + return split.first; } + return blk; } /** @@ -199,35 +193,35 @@ inline block first_fit(std::set& free_blocks, std::size_t size) * @param b The block to coalesce. * @return block The coalesced block. */ -inline block coalesce_block(std::set& free_blocks, block const& b) +inline block coalesce_block(std::set& free_blocks, block const& blk) { - if (!b.is_valid()) return b; + if (!blk.is_valid()) { return blk; } // Find the right place (in ascending address order) to insert the block. - auto const next = free_blocks.lower_bound(b); + auto const next = free_blocks.lower_bound(blk); auto const previous = next == free_blocks.cbegin() ? next : std::prev(next); // Coalesce with neighboring blocks. - bool const merge_prev = previous->is_contiguous_before(b); - bool const merge_next = next != free_blocks.cend() && b.is_contiguous_before(*next); + bool const merge_prev = previous->is_contiguous_before(blk); + bool const merge_next = next != free_blocks.cend() && blk.is_contiguous_before(*next); block merged{}; if (merge_prev && merge_next) { - merged = previous->merge(b).merge(*next); + merged = previous->merge(blk).merge(*next); free_blocks.erase(previous); - auto const i = free_blocks.erase(next); - free_blocks.insert(i, merged); + auto const iter = free_blocks.erase(next); + free_blocks.insert(iter, merged); } else if (merge_prev) { - merged = previous->merge(b); - auto const i = free_blocks.erase(previous); - free_blocks.insert(i, merged); + merged = previous->merge(blk); + auto const iter = free_blocks.erase(previous); + free_blocks.insert(iter, merged); } else if (merge_next) { - merged = b.merge(*next); - auto const i = free_blocks.erase(next); - free_blocks.insert(i, merged); + merged = blk.merge(*next); + auto const iter = free_blocks.erase(next); + free_blocks.insert(iter, merged); } else { - free_blocks.emplace(b); - merged = b; + free_blocks.emplace(blk); + merged = blk; } return merged; } @@ -248,7 +242,7 @@ class global_arena final { /// The default maximum size for the global arena. static constexpr std::size_t default_maximum_size = std::numeric_limits::max(); /// Reserved memory that should not be allocated (64 MiB). - static constexpr std::size_t reserved_size = 1u << 26u; + static constexpr std::size_t reserved_size = 1U << 26U; /** * @brief Construct a global arena. @@ -275,7 +269,8 @@ class global_arena final { "Error, Maximum arena size required to be a multiple of 256 bytes"); if (initial_size == default_initial_size || maximum_size == default_maximum_size) { - std::size_t free{}, total{}; + std::size_t free{}; + std::size_t total{}; RMM_CUDA_TRY(cudaMemGetInfo(&free, &total)); if (initial_size == default_initial_size) { initial_size = align_up(std::min(free, total / 2)); @@ -292,6 +287,8 @@ class global_arena final { // Disable copy (and move) semantics. global_arena(const global_arena&) = delete; global_arena& operator=(const global_arena&) = delete; + global_arena(global_arena&&) = delete; + global_arena& operator=(global_arena&&) = delete; /** * @brief Destroy the global arena and deallocate all memory it allocated using the upstream @@ -300,8 +297,8 @@ class global_arena final { ~global_arena() { lock_guard lock(mtx_); - for (auto const& b : upstream_blocks_) { - upstream_mr_->deallocate(b.pointer(), b.size()); + for (auto const& blk : upstream_blocks_) { + upstream_mr_->deallocate(blk.pointer(), blk.size()); } } @@ -326,10 +323,10 @@ class global_arena final { * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes` * that was passed to the `allocate` call that returned `p`. */ - void deallocate(block const& b) + void deallocate(block const& blk) { lock_guard lock(mtx_); - coalesce_block(free_blocks_, b); + coalesce_block(free_blocks_, blk); } /** @@ -340,8 +337,8 @@ class global_arena final { void deallocate(std::set const& free_blocks) { lock_guard lock(mtx_); - for (auto const& b : free_blocks) { - coalesce_block(free_blocks_, b); + for (auto const& blk : free_blocks) { + coalesce_block(free_blocks_, blk); } } @@ -357,8 +354,8 @@ class global_arena final { block get_block(std::size_t size) { // Find the first-fit free block. - auto const b = first_fit(free_blocks_, size); - if (b.is_valid()) return b; + auto const blk = first_fit(free_blocks_, size); + if (blk.is_valid()) { return blk; } // No existing larger blocks available, so grow the arena. auto const upstream_block = expand_arena(size_to_grow(size)); @@ -427,10 +424,13 @@ class arena { * @param global_arena The global arena from which to allocate superblocks. */ explicit arena(global_arena& global_arena) : global_arena_{global_arena} {} + ~arena() = default; // Disable copy (and move) semantics. arena(const arena&) = delete; arena& operator=(const arena&) = delete; + arena(arena&&) = delete; + arena& operator=(arena&&) = delete; /** * @brief Allocates memory of size at least `bytes`. @@ -443,11 +443,11 @@ class arena { void* allocate(std::size_t bytes) { lock_guard lock(mtx_); - auto const b = get_block(bytes); + auto const blk = get_block(bytes); #ifdef RMM_POOL_TRACK_ALLOCATIONS allocated_blocks_.emplace(b.pointer(), b); #endif - return b.pointer(); + return blk.pointer(); } /** @@ -459,19 +459,19 @@ class arena { * @param stream Stream on which to perform deallocation. * @return true if the allocation is found, false otherwise. */ - bool deallocate(void* p, std::size_t bytes, cuda_stream_view stream) + bool deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) { lock_guard lock(mtx_); #ifdef RMM_POOL_TRACK_ALLOCATIONS auto const b = free_block(p, bytes); #else - block const b{p, bytes}; + block const blk{ptr, bytes}; #endif - if (b.is_valid()) { - auto const merged = coalesce_block(free_blocks_, b); + if (blk.is_valid()) { + auto const merged = coalesce_block(free_blocks_, blk); shrink_arena(merged, stream); } - return b.is_valid(); + return blk.is_valid(); } #ifdef RMM_POOL_TRACK_ALLOCATIONS @@ -524,8 +524,8 @@ class arena { { if (size < minimum_superblock_size) { // Find the first-fit free block. - auto const b = first_fit(free_blocks_, size); - if (b.is_valid()) { return b; } + auto const blk = first_fit(free_blocks_, size); + if (blk.is_valid()) { return blk; } } // No existing larger blocks available, so grow the arena and obtain a superblock. @@ -575,15 +575,15 @@ class arena { * @param b The block that can be used to shrink the arena. * @param stream Stream on which to perform shrinking. */ - void shrink_arena(block const& b, cuda_stream_view stream) + void shrink_arena(block const& blk, cuda_stream_view stream) { // Don't shrink if b is not a superblock. - if (!b.is_superblock()) return; + if (!blk.is_superblock()) { return; } stream.synchronize_no_throw(); - global_arena_.deallocate(b); - free_blocks_.erase(b); + global_arena_.deallocate(blk); + free_blocks_.erase(blk); } /// The global arena to allocate superblocks from. @@ -609,11 +609,14 @@ class arena { template class arena_cleaner { public: - explicit arena_cleaner(std::shared_ptr> const& a) : arena_(a) {} + explicit arena_cleaner(std::shared_ptr> const& arena) : arena_(arena) {} // Disable copy (and move) semantics. - arena_cleaner(const arena_cleaner&) = delete; - arena_cleaner& operator=(const arena_cleaner&) = delete; + arena_cleaner() = delete; + arena_cleaner(arena_cleaner const&) = delete; + arena_cleaner& operator=(arena_cleaner const&) = delete; + arena_cleaner(arena_cleaner&&) = delete; + arena_cleaner& operator=(arena_cleaner&&) = delete; ~arena_cleaner() { @@ -628,7 +631,4 @@ class arena_cleaner { std::weak_ptr> arena_; }; -} // namespace arena -} // namespace detail -} // namespace mr -} // namespace rmm +} // namespace rmm::mr::detail::arena From d9b9ab49b7eec42c349ce4216864c25d857218f7 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 31 Aug 2021 11:16:22 +1000 Subject: [PATCH 06/72] Suppress cppcoreguidelines-macro-usage --- .clang-tidy | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index 997fd9e6e..3d19ae996 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -7,7 +7,8 @@ Checks: 'clang-diagnostic-*, performance-*, readability-*, llvm-*, - -modernize-use-trailing-return-type' + -modernize-use-trailing-return-type', + -cppcoreguidelines-macro-usage WarningsAsErrors: '' HeaderFilterRegex: '' AnalyzeTemporaryDtors: false From f65249b75b64cb915672e0a1c0abef41c9a555b8 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 31 Aug 2021 11:16:39 +1000 Subject: [PATCH 07/72] parameter name --- include/rmm/detail/aligned.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index d4612844e..8b15d5463 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -159,14 +159,14 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) * @tparam Dealloc A unary callable type that deallocates memory. */ template -void aligned_deallocate(void* p, std::size_t bytes, std::size_t alignment, Dealloc dealloc) +void aligned_deallocate(void* ptr, std::size_t bytes, std::size_t alignment, Dealloc dealloc) { (void)alignment; // Get offset from the location immediately prior to the aligned pointer - std::ptrdiff_t const offset = *(reinterpret_cast(p) - 1); + std::ptrdiff_t const offset = *(reinterpret_cast(ptr) - 1); - void* const original = static_cast(p) - offset; + void* const original = static_cast(ptr) - offset; dealloc(original); } From 401f2aed6331d73f553210f6954f8334dfe3d65e Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 2 Sep 2021 11:29:38 +1000 Subject: [PATCH 08/72] tidying --- .clang-tidy | 4 +- include/rmm/detail/error.hpp | 22 +++--- include/rmm/detail/stack_trace.hpp | 1 + tests/cuda_stream_pool_tests.cpp | 1 - tests/device_buffer_tests.cu | 107 +++++++++++++++-------------- 5 files changed, 69 insertions(+), 66 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 3d19ae996..a0bf9994a 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -7,8 +7,8 @@ Checks: 'clang-diagnostic-*, performance-*, readability-*, llvm-*, - -modernize-use-trailing-return-type', - -cppcoreguidelines-macro-usage + -modernize-use-trailing-return-type, + -cppcoreguidelines-macro-usage' WarningsAsErrors: '' HeaderFilterRegex: '' AnalyzeTemporaryDtors: false diff --git a/include/rmm/detail/error.hpp b/include/rmm/detail/error.hpp index 7c052ea8d..50ae70b2f 100644 --- a/include/rmm/detail/error.hpp +++ b/include/rmm/detail/error.hpp @@ -101,7 +101,7 @@ class out_of_range : public std::out_of_range { (__VA_ARGS__) #define GET_RMM_EXPECTS_MACRO(_1, _2, _3, NAME, ...) NAME #define RMM_EXPECTS_3(_condition, _exception_type, _reason) \ - (!!(_condition)) ? static_cast(0) : throw _exception_type \ + (!!(_condition)) ? static_cast(0) : throw(_exception_type) \ { \ "RMM failure at: " __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _reason \ } @@ -124,7 +124,7 @@ class out_of_range : public std::out_of_range { (__VA_ARGS__) #define GET_RMM_FAIL_MACRO(_1, _2, NAME, ...) NAME #define RMM_FAIL_2(_what, _exception_type) \ - throw _exception_type{"RMM failure at:" __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _what}; + throw(_exception_type){"RMM failure at:" __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _what}; #define RMM_FAIL_1(_what) RMM_FAIL_2(_what, rmm::logic_error) /** @@ -152,15 +152,15 @@ class out_of_range : public std::out_of_range { GET_RMM_CUDA_TRY_MACRO(__VA_ARGS__, RMM_CUDA_TRY_2, RMM_CUDA_TRY_1) \ (__VA_ARGS__) #define GET_RMM_CUDA_TRY_MACRO(_1, _2, NAME, ...) NAME -#define RMM_CUDA_TRY_2(_call, _exception_type) \ - do { \ - cudaError_t const error = (_call); \ - if (cudaSuccess != error) { \ - cudaGetLastError(); \ - throw _exception_type{std::string{"CUDA error at: "} + __FILE__ + ":" + \ - RMM_STRINGIFY(__LINE__) + ": " + cudaGetErrorName(error) + " " + \ - cudaGetErrorString(error)}; \ - } \ +#define RMM_CUDA_TRY_2(_call, _exception_type) \ + do { \ + cudaError_t const error = (_call); \ + if (cudaSuccess != error) { \ + cudaGetLastError(); \ + throw(_exception_type){std::string{"CUDA error at: "} + __FILE__ + ":" + \ + RMM_STRINGIFY(__LINE__) + ": " + cudaGetErrorName(error) + " " + \ + cudaGetErrorString(error)}; \ + } \ } while (0) #define RMM_CUDA_TRY_1(_call) RMM_CUDA_TRY_2(_call, rmm::cuda_error) diff --git a/include/rmm/detail/stack_trace.hpp b/include/rmm/detail/stack_trace.hpp index 93d8fe555..2b83aa7cf 100644 --- a/include/rmm/detail/stack_trace.hpp +++ b/include/rmm/detail/stack_trace.hpp @@ -28,6 +28,7 @@ #include #include #include + #include #include #include diff --git a/tests/cuda_stream_pool_tests.cpp b/tests/cuda_stream_pool_tests.cpp index 1e14e2abf..b1f61a5fb 100644 --- a/tests/cuda_stream_pool_tests.cpp +++ b/tests/cuda_stream_pool_tests.cpp @@ -37,7 +37,6 @@ TEST_F(CudaStreamPoolTest, Unequal) TEST_F(CudaStreamPoolTest, Nondefault) { auto const stream_a = this->pool.get_stream(); - auto const stream_b = this->pool.get_stream(); // pool streams are explicit, non-default streams EXPECT_FALSE(stream_a.is_default()); diff --git a/tests/device_buffer_tests.cu b/tests/device_buffer_tests.cu index fa36a2751..05ecef4c4 100644 --- a/tests/device_buffer_tests.cu +++ b/tests/device_buffer_tests.cu @@ -42,7 +42,10 @@ struct DeviceBufferTest : public ::testing::Test { DeviceBufferTest() { std::default_random_engine generator; - std::uniform_int_distribution distribution(1000, 100000); + + auto constexpr range_min{1000}; + auto constexpr range_max{100000}; + std::uniform_int_distribution distribution(range_min, range_max); size = distribution(generator); } }; @@ -263,16 +266,16 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSizeExplicitMr) TYPED_TEST(DeviceBufferTest, MoveConstructor) { rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); - auto p = buff.data(); + auto* ptr = buff.data(); auto size = buff.size(); auto capacity = buff.capacity(); - auto mr = buff.memory_resource(); + auto* mr = buff.memory_resource(); auto stream = buff.stream(); // New buffer should have the same contents as the original rmm::device_buffer buff_new(std::move(buff)); EXPECT_NE(nullptr, buff_new.data()); - EXPECT_EQ(p, buff_new.data()); + EXPECT_EQ(ptr, buff_new.data()); EXPECT_EQ(size, buff_new.size()); EXPECT_EQ(capacity, buff_new.capacity()); EXPECT_EQ(stream, buff_new.stream()); @@ -290,17 +293,17 @@ TYPED_TEST(DeviceBufferTest, MoveConstructorStream) { rmm::device_buffer buff(this->size, this->stream, &this->mr); this->stream.synchronize(); - auto p = buff.data(); + auto* ptr = buff.data(); auto size = buff.size(); auto capacity = buff.capacity(); - auto mr = buff.memory_resource(); + auto* mr = buff.memory_resource(); auto stream = buff.stream(); // New buffer should have the same contents as the original rmm::device_buffer buff_new(std::move(buff)); this->stream.synchronize(); EXPECT_NE(nullptr, buff_new.data()); - EXPECT_EQ(p, buff_new.data()); + EXPECT_EQ(ptr, buff_new.data()); EXPECT_EQ(size, buff_new.size()); EXPECT_EQ(capacity, buff_new.capacity()); EXPECT_EQ(stream, buff_new.stream()); @@ -316,72 +319,72 @@ TYPED_TEST(DeviceBufferTest, MoveConstructorStream) TYPED_TEST(DeviceBufferTest, MoveAssignmentToDefault) { - rmm::device_buffer from(this->size, rmm::cuda_stream_default, &this->mr); - auto p = from.data(); - auto size = from.size(); - auto capacity = from.capacity(); - auto mr = from.memory_resource(); - auto stream = from.stream(); + rmm::device_buffer src(this->size, rmm::cuda_stream_default, &this->mr); + auto* ptr = src.data(); + auto size = src.size(); + auto capacity = src.capacity(); + auto* mr = src.memory_resource(); + auto stream = src.stream(); - rmm::device_buffer to; - EXPECT_NO_THROW(to = std::move(from)); + rmm::device_buffer dest; + dest = std::move(src); // contents of `from` should be in `to` - EXPECT_NE(nullptr, to.data()); - EXPECT_EQ(p, to.data()); - EXPECT_EQ(size, to.size()); - EXPECT_EQ(capacity, to.capacity()); - EXPECT_EQ(stream, to.stream()); - EXPECT_EQ(mr, to.memory_resource()); + EXPECT_NE(nullptr, dest.data()); + EXPECT_EQ(ptr, dest.data()); + EXPECT_EQ(size, dest.size()); + EXPECT_EQ(capacity, dest.capacity()); + EXPECT_EQ(stream, dest.stream()); + EXPECT_EQ(mr, dest.memory_resource()); // `from` should be empty - EXPECT_EQ(nullptr, from.data()); - EXPECT_EQ(0, from.size()); - EXPECT_EQ(0, from.capacity()); - EXPECT_EQ(rmm::cuda_stream_default, from.stream()); - EXPECT_NE(nullptr, from.memory_resource()); + EXPECT_EQ(nullptr, src.data()); + EXPECT_EQ(0, src.size()); + EXPECT_EQ(0, src.capacity()); + EXPECT_EQ(rmm::cuda_stream_default, src.stream()); + EXPECT_NE(nullptr, src.memory_resource()); } TYPED_TEST(DeviceBufferTest, MoveAssignment) { - rmm::device_buffer from(this->size, rmm::cuda_stream_default, &this->mr); - auto p = from.data(); - auto size = from.size(); - auto capacity = from.capacity(); - auto mr = from.memory_resource(); - auto stream = from.stream(); + rmm::device_buffer src(this->size, rmm::cuda_stream_default, &this->mr); + auto* ptr = src.data(); + auto size = src.size(); + auto capacity = src.capacity(); + auto* mr = src.memory_resource(); + auto stream = src.stream(); - rmm::device_buffer to(this->size - 1, rmm::cuda_stream_default, &this->mr); - EXPECT_NO_THROW(to = std::move(from)); + rmm::device_buffer dest(this->size - 1, rmm::cuda_stream_default, &this->mr); + dest = std::move(src); // contents of `from` should be in `to` - EXPECT_NE(nullptr, to.data()); - EXPECT_EQ(p, to.data()); - EXPECT_EQ(size, to.size()); - EXPECT_EQ(capacity, to.capacity()); - EXPECT_EQ(stream, to.stream()); - EXPECT_EQ(mr, to.memory_resource()); + EXPECT_NE(nullptr, dest.data()); + EXPECT_EQ(ptr, dest.data()); + EXPECT_EQ(size, dest.size()); + EXPECT_EQ(capacity, dest.capacity()); + EXPECT_EQ(stream, dest.stream()); + EXPECT_EQ(mr, dest.memory_resource()); // `from` should be empty - EXPECT_EQ(nullptr, from.data()); - EXPECT_EQ(0, from.size()); - EXPECT_EQ(0, from.capacity()); - EXPECT_EQ(rmm::cuda_stream_default, from.stream()); - EXPECT_NE(nullptr, from.memory_resource()); + EXPECT_EQ(nullptr, src.data()); + EXPECT_EQ(0, src.size()); + EXPECT_EQ(0, src.capacity()); + EXPECT_EQ(rmm::cuda_stream_default, src.stream()); + EXPECT_NE(nullptr, src.memory_resource()); } TYPED_TEST(DeviceBufferTest, SelfMoveAssignment) { rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); - auto p = buff.data(); + auto* ptr = buff.data(); auto size = buff.size(); auto capacity = buff.capacity(); - auto mr = buff.memory_resource(); + auto* mr = buff.memory_resource(); auto stream = buff.stream(); buff = std::move(buff); // self-move-assignment shouldn't modify the buffer EXPECT_NE(nullptr, buff.data()); - EXPECT_EQ(p, buff.data()); + EXPECT_EQ(ptr, buff.data()); EXPECT_EQ(size, buff.size()); EXPECT_EQ(capacity, buff.capacity()); EXPECT_EQ(stream, buff.stream()); @@ -397,7 +400,7 @@ TYPED_TEST(DeviceBufferTest, ResizeSmaller) static_cast(buff.data()) + buff.size(), 0); - auto old_data = buff.data(); + auto* old_data = buff.data(); rmm::device_buffer old_content( old_data, buff.size(), rmm::cuda_stream_default, &this->mr); // for comparison @@ -408,7 +411,7 @@ TYPED_TEST(DeviceBufferTest, ResizeSmaller) // Resizing smaller means the existing allocation should remain unchanged EXPECT_EQ(old_data, buff.data()); - EXPECT_NO_THROW(buff.shrink_to_fit(rmm::cuda_stream_default)); + buff.shrink_to_fit(rmm::cuda_stream_default); EXPECT_NE(nullptr, buff.data()); // A reallocation should have occured EXPECT_NE(old_data, buff.data()); @@ -424,8 +427,8 @@ TYPED_TEST(DeviceBufferTest, ResizeSmaller) TYPED_TEST(DeviceBufferTest, ResizeBigger) { rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); - auto old_data = buff.data(); - auto new_size = this->size + 1; + auto* old_data = buff.data(); + auto new_size = this->size + 1; buff.resize(new_size, rmm::cuda_stream_default); EXPECT_EQ(new_size, buff.size()); EXPECT_EQ(new_size, buff.capacity()); From 573dd3610be17efcb6a40250148790aa359814c3 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 2 Sep 2021 11:51:10 +1000 Subject: [PATCH 09/72] tidy some tests --- tests/cuda_stream_pool_tests.cpp | 11 +- tests/device_scalar_tests.cpp | 7 +- tests/device_uvector_tests.cpp | 186 ++++++++++++++++--------------- tests/logger_tests.cpp | 90 +++++++++------ 4 files changed, 160 insertions(+), 134 deletions(-) diff --git a/tests/cuda_stream_pool_tests.cpp b/tests/cuda_stream_pool_tests.cpp index b1f61a5fb..3f1919600 100644 --- a/tests/cuda_stream_pool_tests.cpp +++ b/tests/cuda_stream_pool_tests.cpp @@ -49,13 +49,14 @@ TEST_F(CudaStreamPoolTest, ValidStreams) auto const stream_b = this->pool.get_stream(); // Operations on the streams should work correctly and without throwing exceptions - auto v = rmm::device_uvector{100, stream_a}; - RMM_CUDA_TRY(cudaMemsetAsync(v.data(), 0xcc, 100, stream_a.value())); + auto constexpr vector_size{100}; + auto vec1 = rmm::device_uvector{vector_size, stream_a}; + RMM_CUDA_TRY(cudaMemsetAsync(vec1.data(), 0xcc, 100, stream_a.value())); stream_a.synchronize(); - auto v2 = rmm::device_uvector{v, stream_b}; - auto x = v2.front_element(stream_b); - EXPECT_EQ(x, 0xcc); + auto vec2 = rmm::device_uvector{vec1, stream_b}; + auto element = vec2.front_element(stream_b); + EXPECT_EQ(element, 0xcc); } TEST_F(CudaStreamPoolTest, PoolSize) { EXPECT_GE(this->pool.get_pool_size(), 1); } diff --git a/tests/device_scalar_tests.cpp b/tests/device_scalar_tests.cpp index e4c1a42ff..f58655951 100644 --- a/tests/device_scalar_tests.cpp +++ b/tests/device_scalar_tests.cpp @@ -22,6 +22,7 @@ #include #include + #include #include #include @@ -34,7 +35,7 @@ struct DeviceScalarTest : public ::testing::Test { rmm::mr::device_memory_resource* mr{rmm::mr::get_current_device_resource()}; std::default_random_engine generator{}; - DeviceScalarTest() { value = random_value(); } + DeviceScalarTest() : value{random_value()} {} template ::value, bool> = true> U random_value() @@ -56,7 +57,9 @@ struct DeviceScalarTest : public ::testing::Test { template ::value, bool> = true> U random_value() { - static std::normal_distribution distribution{100, 20}; + auto const mean{100}; + auto const stddev{20}; + static std::normal_distribution distribution(mean, stddev); return distribution(generator); } }; diff --git a/tests/device_uvector_tests.cpp b/tests/device_uvector_tests.cpp index db06f7c47..b3c06885b 100644 --- a/tests/device_uvector_tests.cpp +++ b/tests/device_uvector_tests.cpp @@ -23,7 +23,7 @@ template struct TypedUVectorTest : ::testing::Test { - rmm::cuda_stream_view stream() const noexcept { return rmm::cuda_stream_view{}; } + [[nodiscard]] rmm::cuda_stream_view stream() const noexcept { return rmm::cuda_stream_view{}; } }; using TestTypes = ::testing::Types; @@ -32,27 +32,29 @@ TYPED_TEST_CASE(TypedUVectorTest, TestTypes); TYPED_TEST(TypedUVectorTest, ZeroSizeConstructor) { - rmm::device_uvector uv(0, this->stream()); - EXPECT_EQ(uv.size(), 0); - EXPECT_EQ(uv.end(), uv.begin()); - EXPECT_TRUE(uv.is_empty()); + rmm::device_uvector vec(0, this->stream()); + EXPECT_EQ(vec.size(), 0); + EXPECT_EQ(vec.end(), vec.begin()); + EXPECT_TRUE(vec.is_empty()); } TYPED_TEST(TypedUVectorTest, NonZeroSizeConstructor) { - rmm::device_uvector uv(12345, this->stream()); - EXPECT_EQ(uv.size(), 12345); - EXPECT_NE(uv.data(), nullptr); - EXPECT_EQ(uv.end(), uv.begin() + uv.size()); - EXPECT_FALSE(uv.is_empty()); - EXPECT_NE(uv.element_ptr(0), nullptr); + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); + EXPECT_EQ(vec.size(), 12345); + EXPECT_NE(vec.data(), nullptr); + EXPECT_EQ(vec.end(), vec.begin() + vec.size()); + EXPECT_FALSE(vec.is_empty()); + EXPECT_NE(vec.element_ptr(0), nullptr); } TYPED_TEST(TypedUVectorTest, CopyConstructor) { - rmm::device_uvector uv(12345, this->stream()); - rmm::device_uvector uv_copy(uv, this->stream()); - EXPECT_EQ(uv_copy.size(), uv.size()); + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); + rmm::device_uvector uv_copy(vec, this->stream()); + EXPECT_EQ(uv_copy.size(), vec.size()); EXPECT_NE(uv_copy.data(), nullptr); EXPECT_EQ(uv_copy.end(), uv_copy.begin() + uv_copy.size()); EXPECT_FALSE(uv_copy.is_empty()); @@ -61,145 +63,145 @@ TYPED_TEST(TypedUVectorTest, CopyConstructor) TYPED_TEST(TypedUVectorTest, ResizeSmaller) { - auto original_size = 12345; - rmm::device_uvector uv(original_size, this->stream()); - auto original_data = uv.data(); - auto original_begin = uv.begin(); + auto const original_size{12345}; + rmm::device_uvector vec(original_size, this->stream()); + auto original_data = vec.data(); + auto original_begin = vec.begin(); - auto smaller_size = uv.size() - 1; - uv.resize(smaller_size, this->stream()); + auto smaller_size = vec.size() - 1; + vec.resize(smaller_size, this->stream()); - EXPECT_EQ(original_data, uv.data()); - EXPECT_EQ(original_begin, uv.begin()); - EXPECT_EQ(uv.size(), smaller_size); - EXPECT_EQ(uv.capacity(), original_size); + EXPECT_EQ(original_data, vec.data()); + EXPECT_EQ(original_begin, vec.begin()); + EXPECT_EQ(vec.size(), smaller_size); + EXPECT_EQ(vec.capacity(), original_size); // shrink_to_fit should force a new allocation - uv.shrink_to_fit(this->stream()); - EXPECT_EQ(uv.size(), smaller_size); - EXPECT_EQ(uv.capacity(), smaller_size); + vec.shrink_to_fit(this->stream()); + EXPECT_EQ(vec.size(), smaller_size); + EXPECT_EQ(vec.capacity(), smaller_size); } TYPED_TEST(TypedUVectorTest, ResizeLarger) { - auto original_size = 12345; - rmm::device_uvector uv(original_size, this->stream()); - auto original_data = uv.data(); - auto original_begin = uv.begin(); + auto const original_size{12345}; + rmm::device_uvector vec(original_size, this->stream()); + auto original_data = vec.data(); + auto original_begin = vec.begin(); - auto larger_size = uv.size() + 1; - uv.resize(larger_size, this->stream()); + auto larger_size = vec.size() + 1; + vec.resize(larger_size, this->stream()); - EXPECT_NE(uv.data(), original_data); - EXPECT_NE(uv.begin(), original_begin); - EXPECT_EQ(uv.size(), larger_size); - EXPECT_EQ(uv.capacity(), larger_size); + EXPECT_NE(vec.data(), original_data); + EXPECT_NE(vec.begin(), original_begin); + EXPECT_EQ(vec.size(), larger_size); + EXPECT_EQ(vec.capacity(), larger_size); - auto larger_data = uv.data(); - auto larger_begin = uv.begin(); + auto larger_data = vec.data(); + auto larger_begin = vec.begin(); // shrink_to_fit shouldn't have any effect - uv.shrink_to_fit(this->stream()); - EXPECT_EQ(uv.size(), larger_size); - EXPECT_EQ(uv.capacity(), larger_size); - EXPECT_EQ(uv.data(), larger_data); - EXPECT_EQ(uv.begin(), larger_begin); + vec.shrink_to_fit(this->stream()); + EXPECT_EQ(vec.size(), larger_size); + EXPECT_EQ(vec.capacity(), larger_size); + EXPECT_EQ(vec.data(), larger_data); + EXPECT_EQ(vec.begin(), larger_begin); } TYPED_TEST(TypedUVectorTest, ResizeToZero) { - auto original_size = 12345; - rmm::device_uvector uv(original_size, this->stream()); - uv.resize(0, this->stream()); + auto const original_size{12345}; + rmm::device_uvector vec(original_size, this->stream()); + vec.resize(0, this->stream()); - EXPECT_EQ(uv.size(), 0); - EXPECT_TRUE(uv.is_empty()); - EXPECT_EQ(uv.capacity(), original_size); + EXPECT_EQ(vec.size(), 0); + EXPECT_TRUE(vec.is_empty()); + EXPECT_EQ(vec.capacity(), original_size); - uv.shrink_to_fit(this->stream()); - EXPECT_EQ(uv.capacity(), 0); + vec.shrink_to_fit(this->stream()); + EXPECT_EQ(vec.capacity(), 0); } TYPED_TEST(TypedUVectorTest, Release) { - auto original_size = 12345; - rmm::device_uvector uv(original_size, this->stream()); + auto const original_size{12345}; + rmm::device_uvector vec(original_size, this->stream()); - auto original_data = uv.data(); + auto original_data = vec.data(); - rmm::device_buffer storage = uv.release(); + rmm::device_buffer storage = vec.release(); - EXPECT_EQ(uv.size(), 0); - EXPECT_EQ(uv.capacity(), 0); - EXPECT_TRUE(uv.is_empty()); + EXPECT_EQ(vec.size(), 0); + EXPECT_EQ(vec.capacity(), 0); + EXPECT_TRUE(vec.is_empty()); EXPECT_EQ(storage.data(), original_data); EXPECT_EQ(storage.size(), original_size * sizeof(TypeParam)); } TYPED_TEST(TypedUVectorTest, ElementPointer) { - auto size = 12345; - rmm::device_uvector uv(size, this->stream()); - for (std::size_t i = 0; i < uv.size(); ++i) { - EXPECT_NE(uv.element_ptr(i), nullptr); + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); + for (std::size_t i = 0; i < vec.size(); ++i) { + EXPECT_NE(vec.element_ptr(i), nullptr); } } TYPED_TEST(TypedUVectorTest, OOBSetElement) { - auto size = 12345; - rmm::device_uvector uv(size, this->stream()); - EXPECT_THROW(uv.set_element(uv.size() + 1, 42, this->stream()), rmm::out_of_range); + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); + EXPECT_THROW(vec.set_element(vec.size() + 1, 42, this->stream()), rmm::out_of_range); } TYPED_TEST(TypedUVectorTest, OOBGetElement) { - auto size = 12345; - rmm::device_uvector uv(size, this->stream()); - EXPECT_THROW(uv.element(uv.size() + 1, this->stream()), rmm::out_of_range); + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); + EXPECT_THROW(vec.element(vec.size() + 1, this->stream()), rmm::out_of_range); } TYPED_TEST(TypedUVectorTest, GetSetElement) { - auto size = 12345; - rmm::device_uvector uv(size, this->stream()); - for (std::size_t i = 0; i < uv.size(); ++i) { - uv.set_element(i, i, this->stream()); - EXPECT_EQ(static_cast(i), uv.element(i, this->stream())); + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); + for (std::size_t i = 0; i < vec.size(); ++i) { + vec.set_element(i, i, this->stream()); + EXPECT_EQ(static_cast(i), vec.element(i, this->stream())); } } TYPED_TEST(TypedUVectorTest, GetSetElementAsync) { - auto size = 12345; - rmm::device_uvector uv(size, this->stream()); - for (std::size_t i = 0; i < uv.size(); ++i) { + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); + for (std::size_t i = 0; i < vec.size(); ++i) { auto init = static_cast(i); - uv.set_element_async(i, init, this->stream()); - EXPECT_EQ(init, uv.element(i, this->stream())); + vec.set_element_async(i, init, this->stream()); + EXPECT_EQ(init, vec.element(i, this->stream())); } } TYPED_TEST(TypedUVectorTest, SetElementZeroAsync) { - auto size = 12345; - rmm::device_uvector uv(size, this->stream()); - for (std::size_t i = 0; i < uv.size(); ++i) { - uv.set_element_to_zero_async(i, this->stream()); - EXPECT_EQ(TypeParam{0}, uv.element(i, this->stream())); + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); + for (std::size_t i = 0; i < vec.size(); ++i) { + vec.set_element_to_zero_async(i, this->stream()); + EXPECT_EQ(TypeParam{0}, vec.element(i, this->stream())); } } TYPED_TEST(TypedUVectorTest, FrontBackElement) { - auto size = 12345; - rmm::device_uvector uv(size, this->stream()); + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); - auto first = TypeParam{42}; - auto last = TypeParam{13}; - uv.set_element(0, first, this->stream()); - uv.set_element(uv.size() - 1, last, this->stream()); + auto const first = TypeParam{42}; + auto const last = TypeParam{13}; + vec.set_element(0, first, this->stream()); + vec.set_element(vec.size() - 1, last, this->stream()); - EXPECT_EQ(first, uv.front_element(this->stream())); - EXPECT_EQ(last, uv.back_element(this->stream())); + EXPECT_EQ(first, vec.front_element(this->stream())); + EXPECT_EQ(last, vec.back_element(this->stream())); } diff --git a/tests/logger_tests.cpp b/tests/logger_tests.cpp index b343c7e35..1da32a137 100644 --- a/tests/logger_tests.cpp +++ b/tests/logger_tests.cpp @@ -26,7 +26,7 @@ class raii_restore_env { public: raii_restore_env(char const* name) : name_(name) { - auto const value_or_null = getenv(name); + auto* const value_or_null = getenv(name); if (value_or_null != nullptr) { value_ = value_or_null; is_set_ = true; @@ -42,6 +42,11 @@ class raii_restore_env { } } + raii_restore_env(raii_restore_env const&) = default; + raii_restore_env& operator=(raii_restore_env const&) = default; + raii_restore_env(raii_restore_env&&) = default; + raii_restore_env& operator=(raii_restore_env&&) = default; + private: std::string name_{}; std::string value_{}; @@ -88,19 +93,22 @@ TEST(Adaptor, FilenameConstructor) rmm::mr::cuda_memory_resource upstream; rmm::mr::logging_resource_adaptor log_mr{&upstream, filename}; - auto p0 = log_mr.allocate(100); - auto p1 = log_mr.allocate(42); - log_mr.deallocate(p0, 100); - log_mr.deallocate(p1, 42); + auto const size0{100}; + auto const size1{42}; + + auto* ptr0 = log_mr.allocate(size0); + auto* ptr1 = log_mr.allocate(size1); + log_mr.deallocate(ptr0, size0); + log_mr.deallocate(ptr1, size1); log_mr.flush(); using rmm::detail::action; using rmm::detail::event; - std::vector expected_events{{action::ALLOCATE, 100, p0}, - {action::ALLOCATE, 42, p1}, - {action::FREE, 100, p0}, - {action::FREE, 42, p1}}; + std::vector expected_events{{action::ALLOCATE, size0, ptr0}, + {action::ALLOCATE, size1, ptr1}, + {action::FREE, size0, ptr0}, + {action::FREE, size1, ptr1}}; expect_log_events(filename, expected_events); } @@ -117,19 +125,22 @@ TEST(Adaptor, MultiSinkConstructor) rmm::mr::logging_resource_adaptor log_mr{&upstream, {file_sink1, file_sink2}}; - auto p0 = log_mr.allocate(100); - auto p1 = log_mr.allocate(42); - log_mr.deallocate(p0, 100); - log_mr.deallocate(p1, 42); + auto const size0{100}; + auto const size1{42}; + + auto* ptr0 = log_mr.allocate(size0); + auto* ptr1 = log_mr.allocate(size1); + log_mr.deallocate(ptr0, size0); + log_mr.deallocate(ptr1, size1); log_mr.flush(); using rmm::detail::action; using rmm::detail::event; - std::vector expected_events{{action::ALLOCATE, 100, p0}, - {action::ALLOCATE, 42, p1}, - {action::FREE, 100, p0}, - {action::FREE, 42, p1}}; + std::vector expected_events{{action::ALLOCATE, size0, ptr0}, + {action::ALLOCATE, size1, ptr1}, + {action::FREE, size0, ptr0}, + {action::FREE, size1, ptr1}}; expect_log_events(filename1, expected_events); expect_log_events(filename2, expected_events); @@ -142,19 +153,22 @@ TEST(Adaptor, Factory) auto log_mr = rmm::mr::make_logging_adaptor(&upstream, filename); - auto p0 = log_mr.allocate(99); - log_mr.deallocate(p0, 99); - auto p1 = log_mr.allocate(42); - log_mr.deallocate(p1, 42); + auto const size0{99}; + auto const size1{42}; + + auto* ptr0 = log_mr.allocate(size0); + log_mr.deallocate(ptr0, size0); + auto* ptr1 = log_mr.allocate(size1); + log_mr.deallocate(ptr1, size1); log_mr.flush(); using rmm::detail::action; using rmm::detail::event; - std::vector expected_events{{action::ALLOCATE, 99, p0}, - {action::FREE, 99, p0}, - {action::ALLOCATE, 42, p1}, - {action::FREE, 42, p1}}; + std::vector expected_events{{action::ALLOCATE, size0, ptr0}, + {action::FREE, size0, ptr0}, + {action::ALLOCATE, size1, ptr1}, + {action::FREE, size1, ptr1}}; expect_log_events(filename, expected_events); } @@ -178,8 +192,10 @@ TEST(Adaptor, EnvironmentPath) // use log file location specified in environment variable RMM_LOG_FILE auto log_mr = rmm::mr::make_logging_adaptor(&upstream); - auto p = log_mr.allocate(100); - log_mr.deallocate(p, 100); + auto const size{100}; + + auto* ptr = log_mr.allocate(size); + log_mr.deallocate(ptr, size); log_mr.flush(); @@ -187,8 +203,8 @@ TEST(Adaptor, EnvironmentPath) using rmm::detail::event; std::vector expected_events{ - {action::ALLOCATE, 100, p}, - {action::FREE, 100, p}, + {action::ALLOCATE, size, ptr}, + {action::FREE, size, ptr}, }; expect_log_events(filename, expected_events); @@ -202,11 +218,13 @@ TEST(Adaptor, STDOUT) auto log_mr = rmm::mr::make_logging_adaptor(&upstream, std::cout); - auto p = log_mr.allocate(100); - log_mr.deallocate(p, 100); + auto const size{100}; + + auto* p = log_mr.allocate(size); + log_mr.deallocate(p, size); std::string output = testing::internal::GetCapturedStdout(); - std::string header = output.substr(0, output.find("\n")); + std::string header = output.substr(0, output.find('\n')); ASSERT_EQ(header, log_mr.header()); } @@ -218,10 +236,12 @@ TEST(Adaptor, STDERR) auto log_mr = rmm::mr::make_logging_adaptor(&upstream, std::cerr); - auto p = log_mr.allocate(100); - log_mr.deallocate(p, 100); + auto const size{100}; + + auto* p = log_mr.allocate(size); + log_mr.deallocate(p, size); std::string output = testing::internal::GetCapturedStderr(); - std::string header = output.substr(0, output.find("\n")); + std::string header = output.substr(0, output.find('\n')); ASSERT_EQ(header, log_mr.header()); } From be955033804934a34a9cba5383c85e42f1640f0b Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 2 Sep 2021 12:24:02 +1000 Subject: [PATCH 10/72] tidy cuda_stream.hpp --- include/rmm/cuda_stream.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/rmm/cuda_stream.hpp b/include/rmm/cuda_stream.hpp index 10d944c8f..6a3304e2c 100644 --- a/include/rmm/cuda_stream.hpp +++ b/include/rmm/cuda_stream.hpp @@ -57,13 +57,13 @@ class cuda_stream { */ cuda_stream() : stream_{[]() { - cudaStream_t* s = new cudaStream_t; + auto* s = new cudaStream_t; RMM_CUDA_TRY(cudaStreamCreate(s)); return s; }(), - [](cudaStream_t* s) { - RMM_ASSERT_CUDA_SUCCESS(cudaStreamDestroy(*s)); - delete s; + [](cudaStream_t* stream) { + RMM_ASSERT_CUDA_SUCCESS(cudaStreamDestroy(*stream)); + delete stream; }} { } @@ -74,14 +74,14 @@ class cuda_stream { * @return true If the owned stream has not been explicitly moved and is therefore non-null. * @return false If the owned stream has been explicitly moved and is therefore null. */ - bool is_valid() const { return stream_ != nullptr; } + [[nodiscard]] bool is_valid() const { return stream_ != nullptr; } /** * @brief Get the value of the wrapped CUDA stream. * * @return cudaStream_t The wrapped CUDA stream. */ - cudaStream_t value() const + [[nodiscard]] cudaStream_t value() const { RMM_LOGGING_ASSERT(is_valid()); return *stream_; @@ -97,7 +97,7 @@ class cuda_stream { * * @return rmm::cuda_stream_view The view of the CUDA stream */ - cuda_stream_view view() const { return cuda_stream_view{value()}; } + [[nodiscard]] cuda_stream_view view() const { return cuda_stream_view{value()}; } /** * @brief Implicit conversion to cuda_stream_view From 7c2653d54ce606ef96724096efc5e02386326e53 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 08:37:54 +1000 Subject: [PATCH 11/72] Remove incorrect fix for warning --- include/rmm/detail/error.hpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/rmm/detail/error.hpp b/include/rmm/detail/error.hpp index 50ae70b2f..7c052ea8d 100644 --- a/include/rmm/detail/error.hpp +++ b/include/rmm/detail/error.hpp @@ -101,7 +101,7 @@ class out_of_range : public std::out_of_range { (__VA_ARGS__) #define GET_RMM_EXPECTS_MACRO(_1, _2, _3, NAME, ...) NAME #define RMM_EXPECTS_3(_condition, _exception_type, _reason) \ - (!!(_condition)) ? static_cast(0) : throw(_exception_type) \ + (!!(_condition)) ? static_cast(0) : throw _exception_type \ { \ "RMM failure at: " __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _reason \ } @@ -124,7 +124,7 @@ class out_of_range : public std::out_of_range { (__VA_ARGS__) #define GET_RMM_FAIL_MACRO(_1, _2, NAME, ...) NAME #define RMM_FAIL_2(_what, _exception_type) \ - throw(_exception_type){"RMM failure at:" __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _what}; + throw _exception_type{"RMM failure at:" __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _what}; #define RMM_FAIL_1(_what) RMM_FAIL_2(_what, rmm::logic_error) /** @@ -152,15 +152,15 @@ class out_of_range : public std::out_of_range { GET_RMM_CUDA_TRY_MACRO(__VA_ARGS__, RMM_CUDA_TRY_2, RMM_CUDA_TRY_1) \ (__VA_ARGS__) #define GET_RMM_CUDA_TRY_MACRO(_1, _2, NAME, ...) NAME -#define RMM_CUDA_TRY_2(_call, _exception_type) \ - do { \ - cudaError_t const error = (_call); \ - if (cudaSuccess != error) { \ - cudaGetLastError(); \ - throw(_exception_type){std::string{"CUDA error at: "} + __FILE__ + ":" + \ - RMM_STRINGIFY(__LINE__) + ": " + cudaGetErrorName(error) + " " + \ - cudaGetErrorString(error)}; \ - } \ +#define RMM_CUDA_TRY_2(_call, _exception_type) \ + do { \ + cudaError_t const error = (_call); \ + if (cudaSuccess != error) { \ + cudaGetLastError(); \ + throw _exception_type{std::string{"CUDA error at: "} + __FILE__ + ":" + \ + RMM_STRINGIFY(__LINE__) + ": " + cudaGetErrorName(error) + " " + \ + cudaGetErrorString(error)}; \ + } \ } while (0) #define RMM_CUDA_TRY_1(_call) RMM_CUDA_TRY_2(_call, rmm::cuda_error) From d108e80c3be02e0f531bf966983ac218aa1e24c6 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 08:38:02 +1000 Subject: [PATCH 12/72] include order --- tests/device_buffer_tests.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/device_buffer_tests.cu b/tests/device_buffer_tests.cu index 05ecef4c4..63841a67e 100644 --- a/tests/device_buffer_tests.cu +++ b/tests/device_buffer_tests.cu @@ -27,9 +27,11 @@ #include #include -#include #include #include + +#include + #include #include From d4ee0d4ce1cfd92e94b88193ee721e0ab44b1916 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 08:38:30 +1000 Subject: [PATCH 13/72] tidying --- include/rmm/cuda_device.hpp | 6 +- tests/mr/device/aligned_mr_tests.cpp | 179 ++++++++++++++++++--------- 2 files changed, 125 insertions(+), 60 deletions(-) diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index cad929de7..ab225490e 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -33,10 +33,10 @@ struct cuda_device_id { * * @param id The device's integer identifier */ - explicit constexpr cuda_device_id(value_type id) noexcept : id_{id} {} + explicit constexpr cuda_device_id(value_type dev_id) noexcept : id_{dev_id} {} /// Returns the wrapped integer value - constexpr value_type value() const noexcept { return id_; } + [[nodiscard]] constexpr value_type value() const noexcept { return id_; } private: value_type id_; @@ -52,7 +52,7 @@ namespace detail { */ inline cuda_device_id current_device() { - int dev_id; + int dev_id{}; RMM_CUDA_TRY(cudaGetDevice(&dev_id)); return cuda_device_id{dev_id}; } diff --git a/tests/mr/device/aligned_mr_tests.cpp b/tests/mr/device/aligned_mr_tests.cpp index 3eafd624e..a39dcdbfd 100644 --- a/tests/mr/device/aligned_mr_tests.cpp +++ b/tests/mr/device/aligned_mr_tests.cpp @@ -49,7 +49,9 @@ TEST(AlignedTest, ThrowOnNullUpstream) TEST(AlignedTest, ThrowOnInvalidAllocationAlignment) { mock_resource mock; - auto construct_alignment = [](auto* r, std::size_t a) { aligned_mock mr{r, a}; }; + auto construct_alignment = [](auto* memres, std::size_t align) { + aligned_mock mr{memres, align}; + }; EXPECT_THROW(construct_alignment(&mock, 255), rmm::logic_error); EXPECT_NO_THROW(construct_alignment(&mock, 256)); EXPECT_THROW(construct_alignment(&mock, 768), rmm::logic_error); @@ -85,97 +87,160 @@ TEST(AlignedTest, DefaultAllocationAlignmentPassthrough) aligned_mock mr{&mock}; cuda_stream_view stream; - void* pointer = reinterpret_cast(123); + auto const unaligned_address{123}; + void* const pointer = reinterpret_cast(unaligned_address); // device_memory_resource aligns to 8. - EXPECT_CALL(mock, do_allocate(8, stream)).WillOnce(Return(pointer)); - EXPECT_CALL(mock, do_deallocate(pointer, 8, stream)).Times(1); - EXPECT_EQ(mr.allocate(5, stream), pointer); - mr.deallocate(pointer, 5, stream); + { + auto const size{8}; + EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer)); + EXPECT_CALL(mock, do_deallocate(pointer, size, stream)).Times(1); + } + + { + auto const size{5}; + EXPECT_EQ(mr.allocate(size, stream), pointer); + mr.deallocate(pointer, size, stream); + } } TEST(AlignedTest, BelowAlignmentThresholdPassthrough) { mock_resource mock; - aligned_mock mr{&mock, 4096, 65536}; + auto const alignment{4096}; + auto const threshold{65536}; + aligned_mock mr{&mock, alignment, threshold}; cuda_stream_view stream; - void* pointer = reinterpret_cast(123); + auto const unaligned_address1{123}; + void* const pointer = reinterpret_cast(unaligned_address1); // device_memory_resource aligns to 8. - EXPECT_CALL(mock, do_allocate(8, stream)).WillOnce(Return(pointer)); - EXPECT_CALL(mock, do_deallocate(pointer, 8, stream)).Times(1); - EXPECT_EQ(mr.allocate(3, stream), pointer); - mr.deallocate(pointer, 3, stream); - - void* pointer1 = reinterpret_cast(456); - EXPECT_CALL(mock, do_allocate(65528, stream)).WillOnce(Return(pointer1)); - EXPECT_CALL(mock, do_deallocate(pointer1, 65528, stream)).Times(1); - EXPECT_EQ(mr.allocate(65528, stream), pointer1); - mr.deallocate(pointer1, 65528, stream); + { + auto const size{8}; + EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer)); + EXPECT_CALL(mock, do_deallocate(pointer, size, stream)).Times(1); + } + + { + auto const size{3}; + EXPECT_EQ(mr.allocate(size, stream), pointer); + mr.deallocate(pointer, size, stream); + } + + { + auto const unaligned_address2{456}; + auto const size{65528}; + void* const pointer1 = reinterpret_cast(unaligned_address2); + EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer1)); + EXPECT_CALL(mock, do_deallocate(pointer1, size, stream)).Times(1); + EXPECT_EQ(mr.allocate(size, stream), pointer1); + mr.deallocate(pointer1, size, stream); + } } TEST(AlignedTest, UpstreamAddressAlreadyAligned) { mock_resource mock; - aligned_mock mr{&mock, 4096, 65536}; + auto const alignment{4096}; + auto const threshold{65536}; + aligned_mock mr{&mock, alignment, threshold}; cuda_stream_view stream; - void* pointer = reinterpret_cast(4096); - EXPECT_CALL(mock, do_allocate(69376, stream)).WillOnce(Return(pointer)); - EXPECT_CALL(mock, do_deallocate(pointer, 69376, stream)).Times(1); - - EXPECT_EQ(mr.allocate(65536, stream), pointer); - mr.deallocate(pointer, 65536, stream); + auto const aligned_address{4096}; + void* const pointer = reinterpret_cast(aligned_address); + + { + auto const size{69376}; + EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer)); + EXPECT_CALL(mock, do_deallocate(pointer, size, stream)).Times(1); + } + + { + auto const size{65536}; + EXPECT_EQ(mr.allocate(size, stream), pointer); + mr.deallocate(pointer, size, stream); + } } TEST(AlignedTest, AlignUpstreamAddress) { mock_resource mock; - aligned_mock mr{&mock, 4096, 65536}; + auto const alignment{4096}; + auto const threshold{65536}; + aligned_mock mr{&mock, alignment, threshold}; cuda_stream_view stream; - void* pointer = reinterpret_cast(256); - EXPECT_CALL(mock, do_allocate(69376, stream)).WillOnce(Return(pointer)); - EXPECT_CALL(mock, do_deallocate(pointer, 69376, stream)).Times(1); - - void* expected_pointer = reinterpret_cast(4096); - EXPECT_EQ(mr.allocate(65536, stream), expected_pointer); - mr.deallocate(expected_pointer, 65536, stream); + { + auto const address{256}; + void* const pointer = reinterpret_cast(address); + auto const size{69376}; + EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer)); + EXPECT_CALL(mock, do_deallocate(pointer, size, stream)).Times(1); + } + + { + auto const address{4096}; + void* const expected_pointer = reinterpret_cast(address); + auto const size{65536}; + EXPECT_EQ(mr.allocate(size, stream), expected_pointer); + mr.deallocate(expected_pointer, size, stream); + } } TEST(AlignedTest, AlignMultiple) { mock_resource mock; - aligned_mock mr{&mock, 4096, 65536}; + auto const alignment{4096}; + auto const threshold{65536}; + aligned_mock mr{&mock, alignment, threshold}; cuda_stream_view stream; - void* pointer = reinterpret_cast(256); - void* pointer1 = reinterpret_cast(131584); - void* pointer2 = reinterpret_cast(263168); - EXPECT_CALL(mock, do_allocate(69376, stream)).WillOnce(Return(pointer)); - EXPECT_CALL(mock, do_allocate(77568, stream)).WillOnce(Return(pointer1)); - EXPECT_CALL(mock, do_allocate(81664, stream)).WillOnce(Return(pointer2)); - EXPECT_CALL(mock, do_deallocate(pointer, 69376, stream)).Times(1); - EXPECT_CALL(mock, do_deallocate(pointer1, 77568, stream)).Times(1); - EXPECT_CALL(mock, do_deallocate(pointer2, 81664, stream)).Times(1); - - void* expected_pointer = reinterpret_cast(4096); - void* expected_pointer1 = reinterpret_cast(135168); - void* expected_pointer2 = reinterpret_cast(266240); - EXPECT_EQ(mr.allocate(65536, stream), expected_pointer); - EXPECT_EQ(mr.allocate(73728, stream), expected_pointer1); - EXPECT_EQ(mr.allocate(77800, stream), expected_pointer2); - mr.deallocate(expected_pointer1, 73728, stream); - mr.deallocate(expected_pointer, 65536, stream); - mr.deallocate(expected_pointer2, 77800, stream); + + { + auto const address1{256}; + auto const address2{131584}; + auto const address3{263168}; + void* const pointer1 = reinterpret_cast(address1); + void* const pointer2 = reinterpret_cast(address2); + void* const pointer3 = reinterpret_cast(address3); + auto const size1{69376}; + auto const size2{77568}; + auto const size3{81664}; + EXPECT_CALL(mock, do_allocate(size1, stream)).WillOnce(Return(pointer1)); + EXPECT_CALL(mock, do_allocate(size2, stream)).WillOnce(Return(pointer2)); + EXPECT_CALL(mock, do_allocate(size3, stream)).WillOnce(Return(pointer3)); + EXPECT_CALL(mock, do_deallocate(pointer1, size1, stream)).Times(1); + EXPECT_CALL(mock, do_deallocate(pointer2, size2, stream)).Times(1); + EXPECT_CALL(mock, do_deallocate(pointer3, size3, stream)).Times(1); + } + + { + auto const expected_address1{4096}; + auto const expected_address2{135168}; + auto const expected_address3{266240}; + void* const expected_pointer1 = reinterpret_cast(expected_address1); + void* const expected_pointer2 = reinterpret_cast(expected_address2); + void* const expected_pointer3 = reinterpret_cast(expected_address3); + auto const size1{65536}; + auto const size2{73728}; + auto const size3{77800}; + EXPECT_EQ(mr.allocate(size1, stream), expected_pointer1); + EXPECT_EQ(mr.allocate(size2, stream), expected_pointer2); + EXPECT_EQ(mr.allocate(size3, stream), expected_pointer3); + mr.deallocate(expected_pointer1, size1, stream); + mr.deallocate(expected_pointer2, size2, stream); + mr.deallocate(expected_pointer3, size3, stream); + } } TEST(AlignedTest, AlignRealPointer) { - aligned_real mr{rmm::mr::get_current_device_resource(), 4096, 65536}; - void* alloc = mr.allocate(65536); + auto const alignment{4096}; + auto const threshold{65536}; + aligned_real mr{rmm::mr::get_current_device_resource(), alignment, threshold}; + void* alloc = mr.allocate(threshold); auto const address = reinterpret_cast(alloc); - EXPECT_TRUE(address % 4096 == 0); - mr.deallocate(alloc, 65536); + EXPECT_TRUE(address % alignment == 0); + mr.deallocate(alloc, threshold); } } // namespace From 7aafe385e5adf59ddebd3dd7e13592ec006e8144 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 10:46:07 +1000 Subject: [PATCH 14/72] Use temporary fork of gtest --- cmake/thirdparty/get_gtest.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/thirdparty/get_gtest.cmake b/cmake/thirdparty/get_gtest.cmake index 0b9287734..7dd02eb8b 100644 --- a/cmake/thirdparty/get_gtest.cmake +++ b/cmake/thirdparty/get_gtest.cmake @@ -23,8 +23,8 @@ function(find_and_configure_gtest VERSION) GTest ${VERSION} GLOBAL_TARGETS gmock gmock_main gtest gtest_main GTest::gmock GTest::gtest GTest::gtest_main CPM_ARGS - GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG release-${VERSION} + GIT_REPOSITORY https://github.com/harrism/googletest.git + GIT_TAG fix-clang-tidy-nolint GIT_SHALLOW TRUE OPTIONS "INSTALL_GTEST OFF" # googletest >= 1.10.0 provides a cmake config file -- use it if it exists @@ -40,4 +40,4 @@ function(find_and_configure_gtest VERSION) endfunction() -find_and_configure_gtest(1.10.0) +find_and_configure_gtest(1.11.0) From b7b43007fd43cc07d50c3356503757681443e268 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 10:46:46 +1000 Subject: [PATCH 15/72] function-cognitive-complexity.IgnoreMacros=1 --- .clang-tidy | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.clang-tidy b/.clang-tidy index a0bf9994a..b8fba1f09 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -54,4 +54,6 @@ CheckOptions: value: 'mr|os' - key: readability-identifier-length.IgnoredVariableNames value: 'mr|_' + - key: readability-function-cognitive-complexity.IgnoreMacros + value: '1' ... From 6143ffa15287666fe3845898d24443e17384808a Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 10:47:00 +1000 Subject: [PATCH 16/72] Tidy cuda_async_mr_tests --- tests/mr/device/cuda_async_mr_tests.cpp | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/tests/mr/device/cuda_async_mr_tests.cpp b/tests/mr/device/cuda_async_mr_tests.cpp index a43f68fab..4bf0c3d5b 100644 --- a/tests/mr/device/cuda_async_mr_tests.cpp +++ b/tests/mr/device/cuda_async_mr_tests.cpp @@ -19,8 +19,7 @@ #include -namespace rmm { -namespace test { +namespace rmm::test { namespace { using cuda_async_mr = rmm::mr::cuda_async_memory_resource; @@ -38,24 +37,24 @@ TEST(PoolTest, ThrowIfNotSupported) #if defined(RMM_CUDA_MALLOC_ASYNC_SUPPORT) TEST(PoolTest, ExplicitInitialPoolSize) { - cuda_async_mr mr{100}; - void* p; - EXPECT_NO_THROW(p = mr.allocate(100)); - EXPECT_NO_THROW(mr.deallocate(p, 100)); + const auto pool_init_size{100}; + cuda_async_mr mr{pool_init_size}; + void* ptr = mr.allocate(pool_init_size); + mr.deallocate(ptr, pool_init_size); RMM_CUDA_TRY(cudaDeviceSynchronize()); } TEST(PoolTest, ExplicitReleaseThreshold) { - cuda_async_mr mr{100, 1000}; - void* p; - EXPECT_NO_THROW(p = mr.allocate(100)); - EXPECT_NO_THROW(mr.deallocate(p, 100)); + const auto pool_init_size{100}; + const auto pool_release_threshold{1000}; + cuda_async_mr mr{pool_init_size, pool_release_threshold}; + void* ptr = mr.allocate(pool_init_size); + mr.deallocate(ptr, pool_init_size); RMM_CUDA_TRY(cudaDeviceSynchronize()); } #endif } // namespace -} // namespace test -} // namespace rmm +} // namespace rmm::test From 2419b1c8e20a6e80637c00f59d88d5b70d6c0cf0 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 11:23:34 +1000 Subject: [PATCH 17/72] Factor out byte_literals for reuse --- tests/byte_literals.hpp | 35 +++++++++++++++++++++++++++++++++++ tests/mr/device/mr_test.hpp | 12 +++--------- 2 files changed, 38 insertions(+), 9 deletions(-) create mode 100644 tests/byte_literals.hpp diff --git a/tests/byte_literals.hpp b/tests/byte_literals.hpp new file mode 100644 index 000000000..b22e98a1c --- /dev/null +++ b/tests/byte_literals.hpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace rmm::test { + +constexpr auto kilo{long{1} << 10}; +constexpr auto mega{long{1} << 20}; +constexpr auto giga{long{1} << 30}; +constexpr auto tera{long{1} << 40}; +constexpr auto peta{long{1} << 50}; + +// user-defined Byte literals +constexpr unsigned long long operator""_B(unsigned long long val) { return val; } +constexpr unsigned long long operator""_KiB(unsigned long long const val) { return kilo * val; } +constexpr unsigned long long operator""_MiB(unsigned long long const val) { return mega * val; } +constexpr unsigned long long operator""_GiB(unsigned long long const val) { return giga * val; } +constexpr unsigned long long operator""_TiB(unsigned long long const val) { return tera * val; } +constexpr unsigned long long operator""_PiB(unsigned long long const val) { return peta * val; } + +} // namespace rmm::test diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index 27da69fbe..72387d71d 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -16,7 +16,7 @@ #pragma once -#include +#include "../../byte_literals.hpp" #include #include @@ -31,6 +31,8 @@ #include #include +#include + #include #include @@ -61,14 +63,6 @@ inline bool is_device_memory(void* p) #endif } -// some useful allocation sizes -constexpr long operator""_B(unsigned long long const x) { return x; } -constexpr long operator""_KiB(unsigned long long const x) { return x * (long{1} << 10); } -constexpr long operator""_MiB(unsigned long long const x) { return x * (long{1} << 20); } -constexpr long operator""_GiB(unsigned long long const x) { return x * (long{1} << 30); } -constexpr long operator""_TiB(unsigned long long const x) { return x * (long{1} << 40); } -constexpr long operator""_PiB(unsigned long long const x) { return x * (long{1} << 50); } - struct allocation { void* p{nullptr}; std::size_t size{0}; From 507176a0aeca7b9d2379477f456f5152ec38fcda Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 11:23:47 +1000 Subject: [PATCH 18/72] tidy limiting_mr_tests --- tests/mr/device/limiting_mr_tests.cpp | 84 ++++++++++++++++----------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/tests/mr/device/limiting_mr_tests.cpp b/tests/mr/device/limiting_mr_tests.cpp index 64ec688be..3bc643abc 100644 --- a/tests/mr/device/limiting_mr_tests.cpp +++ b/tests/mr/device/limiting_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,65 +14,81 @@ * limitations under the License. */ +#include "../../byte_literals.hpp" + #include #include #include #include -#include "mr_test.hpp" -namespace rmm { -namespace test { +namespace rmm::test { namespace { + using Limiting_adaptor = rmm::mr::limiting_resource_adaptor; + TEST(LimitingTest, ThrowOnNullUpstream) { - auto construct_nullptr = []() { Limiting_adaptor mr{nullptr, 5_MiB}; }; + auto const max_size{5_MiB}; + auto construct_nullptr = []() { Limiting_adaptor mr{nullptr, max_size}; }; EXPECT_THROW(construct_nullptr(), rmm::logic_error); } TEST(LimitingTest, TooBig) { - Limiting_adaptor mr{rmm::mr::get_current_device_resource(), 1_MiB}; - EXPECT_THROW(mr.allocate(5_MiB), rmm::bad_alloc); + auto const max_size{5_MiB}; + Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size}; + EXPECT_THROW(mr.allocate(max_size + 1), rmm::bad_alloc); } TEST(LimitingTest, UnderLimitDueToFrees) { - Limiting_adaptor mr{rmm::mr::get_current_device_resource(), 10_MiB}; - auto p1 = mr.allocate(4_MiB); - EXPECT_EQ(mr.get_allocated_bytes(), 4_MiB); - EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 6_MiB); - auto p2 = mr.allocate(4_MiB); - EXPECT_EQ(mr.get_allocated_bytes(), 8_MiB); - EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 2_MiB); - mr.deallocate(p1, 4_MiB); - EXPECT_EQ(mr.get_allocated_bytes(), 4_MiB); - EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 6_MiB); + auto const max_size{10_MiB}; + Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size}; + auto const size1{4_MiB}; + auto* ptr1 = mr.allocate(size1); + auto allocated_bytes = size1; + EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); + EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); + auto* ptr2 = mr.allocate(size1); + allocated_bytes += size1; + EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); + EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); + mr.deallocate(ptr1, size1); + allocated_bytes -= size1; + EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); + EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); // note that we don't keep track of fragmentation or things like page size // so this should fill 100% of the memory even though it is probably over. - EXPECT_NO_THROW(mr.allocate(6_MiB)); - EXPECT_EQ(mr.get_allocated_bytes(), 10_MiB); + auto const size2{6_MiB}; + auto* ptr3 = mr.allocate(size2); + allocated_bytes += size2; + EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 0); - mr.deallocate(p2, 4_MiB); + mr.deallocate(ptr2, size1); + mr.deallocate(ptr3, size2); } TEST(LimitingTest, OverLimit) { - Limiting_adaptor mr{rmm::mr::get_current_device_resource(), 10_MiB}; - auto p1 = mr.allocate(4_MiB); - EXPECT_EQ(mr.get_allocated_bytes(), 4_MiB); - EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 6_MiB); - auto p2 = mr.allocate(4_MiB); - EXPECT_EQ(mr.get_allocated_bytes(), 8_MiB); - EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 2_MiB); - EXPECT_THROW(mr.allocate(3_MiB), rmm::bad_alloc); - EXPECT_EQ(mr.get_allocated_bytes(), 8_MiB); - EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 2_MiB); - mr.deallocate(p1, 4_MiB); - mr.deallocate(p2, 4_MiB); + auto const max_size{10_MiB}; + Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size}; + auto const size1{4_MiB}; + auto* ptr1 = mr.allocate(size1); + auto allocated_bytes = size1; + EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); + EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); + auto* ptr2 = mr.allocate(size1); + allocated_bytes += size1; + EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); + EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); + auto const size2{3_MiB}; + EXPECT_THROW(mr.allocate(size2), rmm::bad_alloc); + EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); + EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); + mr.deallocate(ptr1, 4_MiB); + mr.deallocate(ptr2, 4_MiB); } } // namespace -} // namespace test -} // namespace rmm +} // namespace rmm::test From 3820f1bbe53d632152723fb0ce247213ee318e2c Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 13:02:00 +1000 Subject: [PATCH 19/72] tidy logger_tests --- tests/logger_tests.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/logger_tests.cpp b/tests/logger_tests.cpp index 1da32a137..498a96722 100644 --- a/tests/logger_tests.cpp +++ b/tests/logger_tests.cpp @@ -220,8 +220,8 @@ TEST(Adaptor, STDOUT) auto const size{100}; - auto* p = log_mr.allocate(size); - log_mr.deallocate(p, size); + auto* ptr = log_mr.allocate(size); + log_mr.deallocate(ptr, size); std::string output = testing::internal::GetCapturedStdout(); std::string header = output.substr(0, output.find('\n')); @@ -238,8 +238,8 @@ TEST(Adaptor, STDERR) auto const size{100}; - auto* p = log_mr.allocate(size); - log_mr.deallocate(p, size); + auto* ptr = log_mr.allocate(size); + log_mr.deallocate(ptr, size); std::string output = testing::internal::GetCapturedStderr(); std::string header = output.substr(0, output.find('\n')); From 2580056f8c16821cf09e6f872a0264209702cb24 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 13:57:56 +1000 Subject: [PATCH 20/72] tidy mr_tests --- include/rmm/detail/aligned.hpp | 11 ++ tests/mr/device/mr_multithreaded_tests.cpp | 53 +++++---- tests/mr/device/mr_test.hpp | 128 +++++++++++---------- tests/mr/device/mr_tests.cpp | 35 +++--- 4 files changed, 124 insertions(+), 103 deletions(-) diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index 8b15d5463..c949ad035 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -90,6 +90,12 @@ constexpr bool is_aligned(std::size_t value, std::size_t align_bytes) noexcept return value == align_down(value, align_bytes); } +inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) +{ + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return rmm::detail::is_aligned(reinterpret_cast(ptr), alignment); +} + /** * @brief Allocates sufficient memory to satisfy the requested size `bytes` with * alignment `alignment` using the unary callable `alloc` to allocate memory. @@ -129,6 +135,7 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) char* const original = static_cast(alloc(padded_allocation_size)); // account for storage of offset immediately prior to the aligned pointer + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) void* aligned{original + sizeof(std::ptrdiff_t)}; // std::align modifies `aligned` to point to the first aligned location @@ -138,6 +145,7 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) std::ptrdiff_t offset = static_cast(aligned) - original; // Store the offset immediately before the aligned pointer + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) *(static_cast(aligned) - 1) = offset; return aligned; @@ -159,13 +167,16 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) * @tparam Dealloc A unary callable type that deallocates memory. */ template +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) void aligned_deallocate(void* ptr, std::size_t bytes, std::size_t alignment, Dealloc dealloc) { (void)alignment; // Get offset from the location immediately prior to the aligned pointer + // NOLINTNEXTLINE std::ptrdiff_t const offset = *(reinterpret_cast(ptr) - 1); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) void* const original = static_cast(ptr) - offset; dealloc(original); diff --git a/tests/mr/device/mr_multithreaded_tests.cpp b/tests/mr/device/mr_multithreaded_tests.cpp index 233686f7e..a80af38fe 100644 --- a/tests/mr/device/mr_multithreaded_tests.cpp +++ b/tests/mr/device/mr_multithreaded_tests.cpp @@ -28,8 +28,7 @@ #include #include -namespace rmm { -namespace test { +namespace rmm::test { namespace { struct mr_test_mt : public mr_test { @@ -49,11 +48,13 @@ void spawn_n(std::size_t num_threads, Task task, Arguments&&... args) { std::vector threads; threads.reserve(num_threads); - for (std::size_t i = 0; i < num_threads; ++i) + for (std::size_t i = 0; i < num_threads; ++i) { threads.emplace_back(std::thread(task, std::forward(args)...)); + } - for (auto& t : threads) - t.join(); + for (auto& thread : threads) { + thread.join(); + } } template @@ -102,7 +103,7 @@ TEST_P(mr_test_mt, SetCurrentDeviceResource_mt) TEST_P(mr_test_mt, SetCurrentDeviceResourcePerThread_mt) { - int num_devices; + int num_devices{}; RMM_CUDA_TRY(cudaGetDeviceCount(&num_devices)); std::vector threads; @@ -111,7 +112,7 @@ TEST_P(mr_test_mt, SetCurrentDeviceResourcePerThread_mt) threads.emplace_back(std::thread{ [mr = this->mr.get()](auto dev_id) { RMM_CUDA_TRY(cudaSetDevice(dev_id)); - rmm::mr::device_memory_resource* old; + rmm::mr::device_memory_resource* old{}; EXPECT_NO_THROW(old = rmm::mr::set_current_device_resource(mr)); EXPECT_NE(nullptr, old); // initial resource for this device should be CUDA mr @@ -129,8 +130,9 @@ TEST_P(mr_test_mt, SetCurrentDeviceResourcePerThread_mt) i}); } - for (auto& t : threads) - t.join(); + for (auto& thread : threads) { + thread.join(); + } } TEST_P(mr_test_mt, AllocateDefaultStream) @@ -145,22 +147,31 @@ TEST_P(mr_test_mt, AllocateOnStream) TEST_P(mr_test_mt, RandomAllocationsDefaultStream) { - spawn(test_random_allocations, this->mr.get(), 100, 5_MiB, rmm::cuda_stream_view{}); + spawn(test_random_allocations, + this->mr.get(), + default_num_allocations, + default_max_size, + rmm::cuda_stream_view{}); } TEST_P(mr_test_mt, RandomAllocationsStream) { - spawn(test_random_allocations, this->mr.get(), 100, 5_MiB, this->stream.view()); + spawn(test_random_allocations, + this->mr.get(), + default_num_allocations, + default_max_size, + this->stream.view()); } TEST_P(mr_test_mt, MixedRandomAllocationFreeDefaultStream) { - spawn(test_mixed_random_allocation_free, this->mr.get(), 5_MiB, rmm::cuda_stream_view{}); + spawn( + test_mixed_random_allocation_free, this->mr.get(), default_max_size, rmm::cuda_stream_view{}); } TEST_P(mr_test_mt, MixedRandomAllocationFreeStream) { - spawn(test_mixed_random_allocation_free, this->mr.get(), 5_MiB, this->stream.view()); + spawn(test_mixed_random_allocation_free, this->mr.get(), default_max_size, this->stream.view()); } void allocate_loop(rmm::mr::device_memory_resource* mr, @@ -193,14 +204,11 @@ void deallocate_loop(rmm::mr::device_memory_resource* mr, { for (std::size_t i = 0; i < num_allocations;) { std::lock_guard lock(mtx); - if (allocations.empty()) - continue; - else { - i++; - allocation alloc = allocations.front(); - allocations.pop_front(); - EXPECT_NO_THROW(mr->deallocate(alloc.p, alloc.size, stream)); - } + if (allocations.empty()) { continue; } + i++; + allocation alloc = allocations.front(); + allocations.pop_front(); + EXPECT_NO_THROW(mr->deallocate(alloc.ptr, alloc.size, stream)); } } @@ -250,5 +258,4 @@ TEST_P(mr_test_mt, AllocFreeDifferentThreadsDifferentStream) } } // namespace -} // namespace test -} // namespace rmm +} // namespace rmm::test diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index 72387d71d..35ff7d544 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -39,23 +40,18 @@ #include #include #include +#include -namespace rmm { -namespace test { - -inline bool is_pointer_aligned(void* p, std::size_t alignment = 256) -{ - return (0 == reinterpret_cast(p) % alignment); -} +namespace rmm::test { /** * @brief Returns if a pointer points to a device memory or managed memory * allocation. */ -inline bool is_device_memory(void* p) +inline bool is_device_memory(void* ptr) { cudaPointerAttributes attributes{}; - if (cudaSuccess != cudaPointerGetAttributes(&attributes, p)) { return false; } + if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } #if CUDART_VERSION < 10000 // memoryType is deprecated in CUDA 10 return attributes.memoryType == cudaMemoryTypeDevice; #else @@ -63,10 +59,15 @@ inline bool is_device_memory(void* p) #endif } +enum size_in_bytes : size_t {}; + +constexpr auto default_num_allocations{100}; +constexpr size_in_bytes default_max_size{5_MiB}; + struct allocation { - void* p{nullptr}; + void* ptr{nullptr}; std::size_t size{0}; - allocation(void* _p, std::size_t _size) : p{_p}, size{_size} {} + allocation(void* ptr, std::size_t size) : ptr{ptr}, size{size} {} allocation() = default; }; @@ -75,36 +76,33 @@ struct allocation { inline void test_get_current_device_resource() { EXPECT_NE(nullptr, rmm::mr::get_current_device_resource()); - void* p{nullptr}; - EXPECT_NO_THROW(p = rmm::mr::get_current_device_resource()->allocate(1_MiB)); - EXPECT_NE(nullptr, p); - EXPECT_TRUE(is_pointer_aligned(p)); - EXPECT_TRUE(is_device_memory(p)); - EXPECT_NO_THROW(rmm::mr::get_current_device_resource()->deallocate(p, 1_MiB)); + void* ptr = rmm::mr::get_current_device_resource()->allocate(1_MiB); + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(is_device_memory(ptr)); + rmm::mr::get_current_device_resource()->deallocate(ptr, 1_MiB); } inline void test_allocate(rmm::mr::device_memory_resource* mr, std::size_t bytes, cuda_stream_view stream = {}) { - void* p{nullptr}; - EXPECT_NO_THROW(p = mr->allocate(bytes)); - if (not stream.is_default()) stream.synchronize(); - EXPECT_NE(nullptr, p); - EXPECT_TRUE(is_pointer_aligned(p)); - EXPECT_TRUE(is_device_memory(p)); - EXPECT_NO_THROW(mr->deallocate(p, bytes)); - if (not stream.is_default()) stream.synchronize(); + void* ptr = mr->allocate(bytes); + if (not stream.is_default()) { stream.synchronize(); } + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(is_device_memory(ptr)); + mr->deallocate(ptr, bytes); + if (not stream.is_default()) { stream.synchronize(); } } inline void test_various_allocations(rmm::mr::device_memory_resource* mr, cuda_stream_view stream) { // test allocating zero bytes on non-default stream { - void* p{nullptr}; - EXPECT_NO_THROW(p = mr->allocate(0, stream)); + void* ptr = mr->allocate(0, stream); stream.synchronize(); - EXPECT_NO_THROW(mr->deallocate(p, 0, stream)); + EXPECT_NO_THROW(mr->deallocate(ptr, 0, stream)); stream.synchronize(); } @@ -115,15 +113,15 @@ inline void test_various_allocations(rmm::mr::device_memory_resource* mr, cuda_s // should fail to allocate too much { - void* p{nullptr}; - EXPECT_THROW(p = mr->allocate(1_PiB, stream), rmm::bad_alloc); - EXPECT_EQ(nullptr, p); + void* ptr{nullptr}; + EXPECT_THROW(ptr = mr->allocate(1_PiB, stream), rmm::bad_alloc); + EXPECT_EQ(nullptr, ptr); } } inline void test_random_allocations(rmm::mr::device_memory_resource* mr, - std::size_t num_allocations = 100, - std::size_t max_size = 5_MiB, + std::size_t num_allocations = default_num_allocations, + size_in_bytes max_size = default_max_size, cuda_stream_view stream = {}) { std::vector allocations(num_allocations); @@ -131,24 +129,25 @@ inline void test_random_allocations(rmm::mr::device_memory_resource* mr, std::default_random_engine generator; std::uniform_int_distribution distribution(1, max_size); - // 100 allocations from [0,5MB) - std::for_each( - allocations.begin(), allocations.end(), [&generator, &distribution, stream, mr](allocation& a) { - a.size = distribution(generator); - EXPECT_NO_THROW(a.p = mr->allocate(a.size, stream)); - if (not stream.is_default()) stream.synchronize(); - EXPECT_NE(nullptr, a.p); - EXPECT_TRUE(is_pointer_aligned(a.p)); - }); - - std::for_each(allocations.begin(), allocations.end(), [stream, mr](allocation& a) { - EXPECT_NO_THROW(mr->deallocate(a.p, a.size, stream)); - if (not stream.is_default()) stream.synchronize(); + // num_allocations allocations from [0,max_size) + std::for_each(allocations.begin(), + allocations.end(), + [&generator, &distribution, stream, mr](allocation& alloc) { + alloc.size = distribution(generator); + EXPECT_NO_THROW(alloc.ptr = mr->allocate(alloc.size, stream)); + if (not stream.is_default()) { stream.synchronize(); } + EXPECT_NE(nullptr, alloc.ptr); + EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc.ptr)); + }); + + std::for_each(allocations.begin(), allocations.end(), [stream, mr](allocation& alloc) { + EXPECT_NO_THROW(mr->deallocate(alloc.ptr, alloc.size, stream)); + if (not stream.is_default()) { stream.synchronize(); } }); } inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* mr, - std::size_t max_size = 5_MiB, + size_in_bytes max_size = default_max_size, cuda_stream_view stream = {}) { std::default_random_engine generator; @@ -156,8 +155,9 @@ inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* m std::uniform_int_distribution size_distribution(1, max_size); - constexpr int allocation_probability = 53; // percent - std::uniform_int_distribution op_distribution(0, 99); + constexpr int allocation_probability{53}; // percent + constexpr int max_probability{99}; + std::uniform_int_distribution op_distribution(0, max_probability); std::uniform_int_distribution index_distribution(0, num_allocations - 1); std::size_t active_allocations{0}; @@ -178,14 +178,14 @@ inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* m allocation_count++; EXPECT_NO_THROW(allocations.emplace_back(mr->allocate(size, stream), size)); auto new_allocation = allocations.back(); - EXPECT_NE(nullptr, new_allocation.p); - EXPECT_TRUE(is_pointer_aligned(new_allocation.p)); + EXPECT_NE(nullptr, new_allocation.ptr); + EXPECT_TRUE(rmm::detail::is_pointer_aligned(new_allocation.ptr)); } else { - std::size_t index = index_distribution(generator) % active_allocations; + auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; allocation to_free = allocations[index]; allocations.erase(std::next(allocations.begin(), index)); - EXPECT_NO_THROW(mr->deallocate(to_free.p, to_free.size, stream)); + EXPECT_NO_THROW(mr->deallocate(to_free.ptr, to_free.size, stream)); } } @@ -197,18 +197,21 @@ using MRFactoryFunc = std::function { void SetUp() override { - auto factory = GetParam().f; + auto factory = GetParam().factory; mr = factory(); } @@ -243,9 +246,12 @@ inline auto make_binning() auto pool = make_pool(); // Add a binning_memory_resource with fixed-size bins of sizes 256, 512, 1024, 2048 and 4096KiB // Larger allocations will use the pool resource - auto mr = rmm::mr::make_owning_wrapper(pool, 18, 22); + auto const bin_range_start{18}; + auto const bin_range_end{22}; + + auto mr = rmm::mr::make_owning_wrapper( + pool, bin_range_start, bin_range_end); return mr; } -} // namespace test -} // namespace rmm +} // namespace rmm::test diff --git a/tests/mr/device/mr_tests.cpp b/tests/mr/device/mr_tests.cpp index 5a8ac02b1..7dbe225b9 100644 --- a/tests/mr/device/mr_tests.cpp +++ b/tests/mr/device/mr_tests.cpp @@ -20,21 +20,20 @@ #include -namespace rmm { -namespace test { +namespace rmm::test { namespace { -INSTANTIATE_TEST_CASE_P(ResourceTests, - mr_test, - ::testing::Values(mr_factory{"CUDA", &make_cuda}, +INSTANTIATE_TEST_SUITE_P(ResourceTests, + mr_test, + ::testing::Values(mr_factory{"CUDA", &make_cuda}, #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - mr_factory{"CUDA_Async", &make_cuda_async}, + mr_factory{"CUDA_Async", &make_cuda_async}, #endif - mr_factory{"Managed", &make_managed}, - mr_factory{"Pool", &make_pool}, - mr_factory{"Arena", &make_arena}, - mr_factory{"Binning", &make_binning}), - [](auto const& info) { return info.param.name; }); + mr_factory{"Managed", &make_managed}, + mr_factory{"Pool", &make_pool}, + mr_factory{"Arena", &make_arena}, + mr_factory{"Binning", &make_binning}), + [](auto const& info) { return info.param.name; }); TEST(DefaultTest, CurrentDeviceResourceIsCUDA) { @@ -46,8 +45,7 @@ TEST(DefaultTest, UseCurrentDeviceResource) { test_get_current_device_resource() TEST(DefaultTest, GetCurrentDeviceResource) { - rmm::mr::device_memory_resource* mr; - EXPECT_NO_THROW(mr = rmm::mr::get_current_device_resource()); + auto* mr = rmm::mr::get_current_device_resource(); EXPECT_NE(nullptr, mr); EXPECT_TRUE(mr->is_equal(rmm::mr::cuda_memory_resource{})); } @@ -84,17 +82,17 @@ TEST_P(mr_test, RandomAllocations) { test_random_allocations(this->mr.get()); } TEST_P(mr_test, RandomAllocationsStream) { - test_random_allocations(this->mr.get(), 100, 5_MiB, this->stream); + test_random_allocations(this->mr.get(), default_num_allocations, default_max_size, this->stream); } TEST_P(mr_test, MixedRandomAllocationFree) { - test_mixed_random_allocation_free(this->mr.get(), 5_MiB, cuda_stream_view{}); + test_mixed_random_allocation_free(this->mr.get(), default_max_size, cuda_stream_view{}); } TEST_P(mr_test, MixedRandomAllocationFreeStream) { - test_mixed_random_allocation_free(this->mr.get(), 5_MiB, this->stream); + test_mixed_random_allocation_free(this->mr.get(), default_max_size, this->stream); } TEST_P(mr_test, GetMemInfo) @@ -102,7 +100,7 @@ TEST_P(mr_test, GetMemInfo) if (this->mr->supports_get_mem_info()) { std::pair mem_info; EXPECT_NO_THROW(mem_info = this->mr->get_mem_info(rmm::cuda_stream_view{})); - std::size_t allocation_size = 16 * 256; + const auto allocation_size{16 * 256}; void* ptr{nullptr}; EXPECT_NO_THROW(ptr = this->mr->allocate(allocation_size)); EXPECT_NO_THROW(mem_info = this->mr->get_mem_info(rmm::cuda_stream_view{})); @@ -111,5 +109,4 @@ TEST_P(mr_test, GetMemInfo) } } } // namespace -} // namespace test -} // namespace rmm +} // namespace rmm::test From 633261a0c88830cc3bfd519f3fb9f2efe15f0363 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 13:58:13 +1000 Subject: [PATCH 21/72] Fix device_scalar_test hang --- include/rmm/device_scalar.hpp | 4 ++-- tests/device_scalar_tests.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index f44ba1c28..f48aba84b 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -47,9 +47,9 @@ class device_scalar { ~device_scalar() = default; RMM_EXEC_CHECK_DISABLE - device_scalar(device_scalar&&) = default; + device_scalar(device_scalar&&) noexcept = default; - device_scalar& operator=(device_scalar&&) = default; + device_scalar& operator=(device_scalar&&) noexcept = default; /** * @brief Copy ctor is deleted as it doesn't allow a stream argument diff --git a/tests/device_scalar_tests.cpp b/tests/device_scalar_tests.cpp index f58655951..65f963132 100644 --- a/tests/device_scalar_tests.cpp +++ b/tests/device_scalar_tests.cpp @@ -30,10 +30,10 @@ template struct DeviceScalarTest : public ::testing::Test { + std::default_random_engine generator{}; T value{}; rmm::cuda_stream stream{}; rmm::mr::device_memory_resource* mr{rmm::mr::get_current_device_resource()}; - std::default_random_engine generator{}; DeviceScalarTest() : value{random_value()} {} From 0390808482f2f98ba5f031000c2ba170f5b4ebf5 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 13:58:22 +1000 Subject: [PATCH 22/72] Fix arena.hpp debug build --- include/rmm/mr/device/detail/arena.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index f99c6bf97..6795fb3bd 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -98,12 +98,12 @@ class block { /** * @brief Split this block into two by the given size. * - * @param sz The size in bytes of the first block. - * @return std::pair A pair of blocks split by sz. + * @param size The size in bytes of the first block. + * @return std::pair A pair of blocks split by size. */ [[nodiscard]] std::pair split(std::size_t size) const { - RMM_LOGGING_ASSERT(size_ >= sz); + RMM_LOGGING_ASSERT(size_ >= size); if (size_ > size) { return {{pointer_, size}, {pointer_ + size, size_ - size}}; } return {*this, {}}; } @@ -113,12 +113,12 @@ class block { * * `this->is_contiguous_before(b)` must be true. * - * @param b block to merge. + * @param blk block to merge. * @return block The merged block. */ [[nodiscard]] block merge(block const& blk) const { - RMM_LOGGING_ASSERT(is_contiguous_before(b)); + RMM_LOGGING_ASSERT(is_contiguous_before(blk)); return {pointer_, size_ + blk.size_}; } From 973bc998acf97cdc7638c4ae39dfe6415f969bf2 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 14:01:34 +1000 Subject: [PATCH 23/72] tidy device_scalar --- include/rmm/device_scalar.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index f48aba84b..ff8461599 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -186,9 +186,9 @@ class device_scalar { * @param v The host value which will be copied to device * @param stream CUDA stream on which to perform the copy */ - void set_value_async(value_type const& v, cuda_stream_view s) + void set_value_async(value_type const& value, cuda_stream_view stream) { - _storage.set_element_async(0, v, s); + _storage.set_element_async(0, value, stream); } // Disallow passing literals to set_value to avoid race conditions where the memory holding the @@ -209,9 +209,9 @@ class device_scalar { * * @param stream CUDA stream on which to perform the copy */ - void set_value_to_zero_async(cuda_stream_view s) + void set_value_to_zero_async(cuda_stream_view stream) { - _storage.set_element_to_zero_async(value_type{0}, s); + _storage.set_element_to_zero_async(value_type{0}, stream); } /** From c2402b6508b53c57d2f93a62d69feb3e08da2aaa Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 14:06:47 +1000 Subject: [PATCH 24/72] suppress pointer arith warnings --- include/rmm/mr/device/detail/arena.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 6795fb3bd..29da10c28 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -84,6 +84,7 @@ class block { */ [[nodiscard]] bool is_contiguous_before(block const& blk) const { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) return pointer_ + size_ == blk.pointer_; } @@ -104,6 +105,7 @@ class block { [[nodiscard]] std::pair split(std::size_t size) const { RMM_LOGGING_ASSERT(size_ >= size); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) if (size_ > size) { return {{pointer_, size}, {pointer_ + size, size_ - size}}; } return {*this, {}}; } From 27fbde47c39df0f487855594407e5087156df5d0 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 14:12:53 +1000 Subject: [PATCH 25/72] tidy device_buffer --- include/rmm/device_buffer.hpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp index f034b28fe..ee8e4e927 100644 --- a/include/rmm/device_buffer.hpp +++ b/include/rmm/device_buffer.hpp @@ -88,10 +88,7 @@ class device_buffer { // `__host__ __device__` specifiers to the defaulted constructor when it is called within the // context of both host and device functions. Specifically, the `cudf::type_dispatcher` is a host- // device function. This causes warnings/errors because this ctor invokes host-only functions. - device_buffer() - : _data{nullptr}, _size{}, _capacity{}, _stream{}, _mr{rmm::mr::get_current_device_resource()} - { - } + device_buffer() : _mr{rmm::mr::get_current_device_resource()} {} /** * @brief Constructs a new device buffer of `size` uninitialized bytes @@ -310,7 +307,7 @@ class device_buffer { /** * @brief Returns raw pointer to underlying device memory allocation */ - void const* data() const noexcept { return _data; } + [[nodiscard]] void const* data() const noexcept { return _data; } /** * @brief Returns raw pointer to underlying device memory allocation @@ -321,7 +318,7 @@ class device_buffer { * @brief Returns size in bytes that was requested for the device memory * allocation */ - std::size_t size() const noexcept { return _size; } + [[nodiscard]] std::size_t size() const noexcept { return _size; } /** * @brief Returns whether the size in bytes of the `device_buffer` is zero. @@ -330,19 +327,19 @@ class device_buffer { * if `capacity() > 0`. * */ - bool is_empty() const noexcept { return 0 == size(); } + [[nodiscard]] bool is_empty() const noexcept { return 0 == size(); } /** * @brief Returns actual size in bytes of device memory allocation. * * The invariant `size() <= capacity()` holds. */ - std::size_t capacity() const noexcept { return _capacity; } + [[nodiscard]] std::size_t capacity() const noexcept { return _capacity; } /** * @brief Returns stream most recently specified for allocation/deallocation */ - cuda_stream_view stream() const noexcept { return _stream; } + [[nodiscard]] cuda_stream_view stream() const noexcept { return _stream; } /** * @brief Sets the stream to be used for deallocation @@ -360,7 +357,7 @@ class device_buffer { * @brief Returns pointer to the memory resource used to allocate and * deallocate the device memory */ - mr::device_memory_resource* memory_resource() const noexcept { return _mr; } + [[nodiscard]] mr::device_memory_resource* memory_resource() const noexcept { return _mr; } private: void* _data{nullptr}; ///< Pointer to device memory allocation From c47674208170aec50a043634c15a46ee0e2a20dd Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 14:26:06 +1000 Subject: [PATCH 26/72] Suppress owning-memory warnings --- include/rmm/cuda_stream.hpp | 8 ++++---- include/rmm/thrust_rmm_allocator.h | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/include/rmm/cuda_stream.hpp b/include/rmm/cuda_stream.hpp index 6a3304e2c..185cd049e 100644 --- a/include/rmm/cuda_stream.hpp +++ b/include/rmm/cuda_stream.hpp @@ -57,13 +57,13 @@ class cuda_stream { */ cuda_stream() : stream_{[]() { - auto* s = new cudaStream_t; - RMM_CUDA_TRY(cudaStreamCreate(s)); - return s; + auto* stream = new cudaStream_t; // NOLINT(cppcoreguidelines-owning-memory) + RMM_CUDA_TRY(cudaStreamCreate(stream)); + return stream; }(), [](cudaStream_t* stream) { RMM_ASSERT_CUDA_SUCCESS(cudaStreamDestroy(*stream)); - delete stream; + delete stream; // NOLINT(cppcoreguidelines-owning-memory) }} { } diff --git a/include/rmm/thrust_rmm_allocator.h b/include/rmm/thrust_rmm_allocator.h index 889faa3bd..894f402a1 100644 --- a/include/rmm/thrust_rmm_allocator.h +++ b/include/rmm/thrust_rmm_allocator.h @@ -38,12 +38,13 @@ using exec_policy_t = std::unique_ptr; * allocation. */ [[deprecated("Use new exec_policy in rmm/exec_policy.hpp")]] inline exec_policy_t exec_policy( - cudaStream_t stream = 0) + cudaStream_t stream = nullptr) { + // NOLINTNEXTLINE(cppcoreguidelines-owning-memory) auto* alloc = new rmm::mr::thrust_allocator(cuda_stream_view{stream}); auto deleter = [alloc](par_t* pointer) { - delete alloc; - delete pointer; + delete alloc; // NOLINT(cppcoreguidelines-owning-memory) + delete pointer; // NOLINT(cppcoreguidelines-owning-memory) }; exec_policy_t policy{new par_t(*alloc), deleter}; From a1162b13a3b0ee4db96b5641fa62d9afc5ff12fb Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 14:30:23 +1000 Subject: [PATCH 27/72] NOLINT macro parentheses --- include/rmm/detail/error.hpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/include/rmm/detail/error.hpp b/include/rmm/detail/error.hpp index 7c052ea8d..1f550f75e 100644 --- a/include/rmm/detail/error.hpp +++ b/include/rmm/detail/error.hpp @@ -100,10 +100,11 @@ class out_of_range : public std::out_of_range { GET_RMM_EXPECTS_MACRO(__VA_ARGS__, RMM_EXPECTS_3, RMM_EXPECTS_2) \ (__VA_ARGS__) #define GET_RMM_EXPECTS_MACRO(_1, _2, _3, NAME, ...) NAME -#define RMM_EXPECTS_3(_condition, _exception_type, _reason) \ - (!!(_condition)) ? static_cast(0) : throw _exception_type \ - { \ - "RMM failure at: " __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _reason \ +#define RMM_EXPECTS_3(_condition, _exception_type, _reason) \ + (!!(_condition)) ? static_cast(0) \ + : throw _exception_type /*NOLINT(bugprone-macro-parentheses)*/ \ + { \ + "RMM failure at: " __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _reason \ } #define RMM_EXPECTS_2(_condition, _reason) RMM_EXPECTS_3(_condition, rmm::logic_error, _reason) @@ -123,7 +124,8 @@ class out_of_range : public std::out_of_range { GET_RMM_FAIL_MACRO(__VA_ARGS__, RMM_FAIL_2, RMM_FAIL_1) \ (__VA_ARGS__) #define GET_RMM_FAIL_MACRO(_1, _2, NAME, ...) NAME -#define RMM_FAIL_2(_what, _exception_type) \ +#define RMM_FAIL_2(_what, _exception_type) \ + /*NOLINTNEXTLINE(bugprone-macro-parentheses)*/ \ throw _exception_type{"RMM failure at:" __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _what}; #define RMM_FAIL_1(_what) RMM_FAIL_2(_what, rmm::logic_error) @@ -157,6 +159,7 @@ class out_of_range : public std::out_of_range { cudaError_t const error = (_call); \ if (cudaSuccess != error) { \ cudaGetLastError(); \ + /*NOLINTNEXTLINE(bugprone-macro-parentheses)*/ \ throw _exception_type{std::string{"CUDA error at: "} + __FILE__ + ":" + \ RMM_STRINGIFY(__LINE__) + ": " + cudaGetErrorName(error) + " " + \ cudaGetErrorString(error)}; \ From 77fdc940f76714e871bd678a6a24fad3096f4b6a Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 14:59:18 +1000 Subject: [PATCH 28/72] tidy free lists --- .../mr/device/detail/coalescing_free_list.hpp | 60 +++++++++---------- .../mr/device/detail/fixed_size_free_list.hpp | 27 ++++----- include/rmm/mr/device/detail/free_list.hpp | 28 ++++----- 3 files changed, 51 insertions(+), 64 deletions(-) diff --git a/include/rmm/mr/device/detail/coalescing_free_list.hpp b/include/rmm/mr/device/detail/coalescing_free_list.hpp index d0c0f399e..a32469c73 100644 --- a/include/rmm/mr/device/detail/coalescing_free_list.hpp +++ b/include/rmm/mr/device/detail/coalescing_free_list.hpp @@ -25,9 +25,7 @@ #include #include -namespace rmm { -namespace mr { -namespace detail { +namespace rmm::mr::detail { /** * @brief A simple block structure specifying the size and location of a block @@ -46,14 +44,14 @@ struct block : public block_base { * * @return the pointer to the memory represented by this block. */ - inline char* pointer() const { return static_cast(ptr); } + [[nodiscard]] inline char* pointer() const { return static_cast(ptr); } /** * @brief Returns the size of the memory represented by this block. * * @return the size in bytes of the memory represented by this block. */ - inline std::size_t size() const { return size_bytes; } + [[nodiscard]] inline std::size_t size() const { return size_bytes; } /** * @brief Returns whether this block is the start of an allocation from an upstream allocator. @@ -62,7 +60,7 @@ struct block : public block_base { * * @return true if this block is the start of an allocation from an upstream allocator. */ - inline bool is_head() const { return head; } + [[nodiscard]] inline bool is_head() const { return head; } /** * @brief Comparison operator to enable comparing blocks and storing in ordered containers. @@ -84,10 +82,10 @@ struct block : public block_base { * @param b block to merge * @return block The merged block */ - inline block merge(block const& b) const noexcept + [[nodiscard]] inline block merge(block const& blk) const noexcept { assert(is_contiguous_before(b)); - return block(pointer(), size() + b.size(), is_head()); + return {pointer(), size() + blk.size(), is_head()}; } /** @@ -97,9 +95,10 @@ struct block : public block_base { * @return true Returns true if this blocks's `ptr` + `size` == `b.ptr`, and `not b.is_head`, false otherwise. */ - inline bool is_contiguous_before(block const& b) const noexcept + [[nodiscard]] inline bool is_contiguous_before(block const& blk) const noexcept { - return (pointer() + size() == b.ptr) and not(b.is_head()); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return (pointer() + size() == blk.ptr) and not(blk.is_head()); } /** @@ -108,7 +107,7 @@ struct block : public block_base { * @param sz The size in bytes to check for fit. * @return true if this block is at least `sz` bytes */ - inline bool fits(std::size_t sz) const noexcept { return size() >= sz; } + [[nodiscard]] inline bool fits(std::size_t bytes) const noexcept { return size() >= bytes; } /** * @brief Is this block a better fit for `sz` bytes than block `b`? @@ -118,9 +117,9 @@ struct block : public block_base { * @return true If this block is a tighter fit for `sz` bytes than block `b`. * @return false If this block does not fit `sz` bytes or `b` is a tighter fit. */ - inline bool is_better_fit(std::size_t sz, block const& b) const noexcept + [[nodiscard]] inline bool is_better_fit(std::size_t bytes, block const& blk) const noexcept { - return fits(sz) && (size() < b.size() || b.size() < sz); + return fits(bytes) && (size() < blk.size() || blk.size() < bytes); } /** @@ -128,7 +127,7 @@ struct block : public block_base { */ inline void print() const { - std::cout << reinterpret_cast(pointer()) << " " << size() << " B\n"; + std::cout << fmt::format("{} {} B", fmt::ptr(pointer()), size()) << std::endl; } private: @@ -137,9 +136,9 @@ struct block : public block_base { }; /// Print block on an ostream -inline std::ostream& operator<<(std::ostream& out, const block& b) +inline std::ostream& operator<<(std::ostream& out, const block& blk) { - out << b.pointer() << " " << b.size() << " B\n"; + out << fmt::format("{} {} B\n", fmt::ptr(blk.pointer()), blk.size()); return out; } @@ -166,8 +165,8 @@ struct compare_blocks { * @tparam list_type the type of the internal list data structure. */ struct coalescing_free_list : free_list { - coalescing_free_list() = default; - ~coalescing_free_list() = default; + coalescing_free_list() = default; + ~coalescing_free_list() override = default; coalescing_free_list(coalescing_free_list const&) = delete; coalescing_free_list& operator=(coalescing_free_list const&) = delete; @@ -180,31 +179,32 @@ struct coalescing_free_list : free_list { * * @param b The block to insert. */ - void insert(block_type const& b) + void insert(block_type const& block) { if (is_empty()) { - free_list::insert(cend(), b); + free_list::insert(cend(), block); return; } // Find the right place (in ascending ptr order) to insert the block // Can't use binary_search because it's a linked list and will be quadratic - auto const next = std::find_if(begin(), end(), [b](block_type const& i) { return b < i; }); + auto const next = + std::find_if(begin(), end(), [block](block_type const& blk) { return block < blk; }); auto const previous = (next == cbegin()) ? next : std::prev(next); // Coalesce with neighboring blocks or insert the new block if it can't be coalesced - bool const merge_prev = previous->is_contiguous_before(b); - bool const merge_next = (next != cend()) && b.is_contiguous_before(*next); + bool const merge_prev = previous->is_contiguous_before(block); + bool const merge_next = (next != cend()) && block.is_contiguous_before(*next); if (merge_prev && merge_next) { - *previous = previous->merge(b).merge(*next); + *previous = previous->merge(block).merge(*next); erase(next); } else if (merge_prev) { - *previous = previous->merge(b); + *previous = previous->merge(block); } else if (merge_next) { - *next = b.merge(*next); + *next = block.merge(*next); } else { - free_list::insert(next, b); // cannot be coalesced, just insert + free_list::insert(next, block); // cannot be coalesced, just insert } } @@ -220,7 +220,7 @@ struct coalescing_free_list : free_list { { std::for_each(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()), - [this](block_type&& b) { this->insert(std::move(b)); }); + [this](block_type&& block) { this->insert(block); }); } /** @@ -259,6 +259,4 @@ struct coalescing_free_list : free_list { } }; // coalescing_free_list -} // namespace detail -} // namespace mr -} // namespace rmm +} // namespace rmm::mr::detail diff --git a/include/rmm/mr/device/detail/fixed_size_free_list.hpp b/include/rmm/mr/device/detail/fixed_size_free_list.hpp index a7794c9b2..4d2f7253f 100644 --- a/include/rmm/mr/device/detail/fixed_size_free_list.hpp +++ b/include/rmm/mr/device/detail/fixed_size_free_list.hpp @@ -21,13 +21,11 @@ #include #include -namespace rmm { -namespace mr { -namespace detail { +namespace rmm::mr::detail { struct fixed_size_free_list : free_list { - fixed_size_free_list() = default; - ~fixed_size_free_list() = default; + fixed_size_free_list() = default; + ~fixed_size_free_list() override = default; fixed_size_free_list(fixed_size_free_list const&) = delete; fixed_size_free_list& operator=(fixed_size_free_list const&) = delete; @@ -44,7 +42,7 @@ struct fixed_size_free_list : free_list { template fixed_size_free_list(InputIt first, InputIt last) { - std::for_each(first, last, [this](block_type const& b) { insert(b); }); + std::for_each(first, last, [this](block_type const& block) { insert(block); }); } /** @@ -53,7 +51,7 @@ struct fixed_size_free_list : free_list { * * @param b The block to insert. */ - void insert(block_type const& b) { push_back(b); } + void insert(block_type const& block) { push_back(block); } /** * @brief Splices blocks from range `[first, last)` onto the free_list. @@ -71,16 +69,11 @@ struct fixed_size_free_list : free_list { */ block_type get_block(std::size_t size) { - if (is_empty()) - return block_type{}; - else { - block_type b = *begin(); - pop_front(); - return b; - } + if (is_empty()) { return block_type{}; } + block_type block = *begin(); + pop_front(); + return block; } }; -} // namespace detail -} // namespace mr -} // namespace rmm +} // namespace rmm::mr::detail diff --git a/include/rmm/mr/device/detail/free_list.hpp b/include/rmm/mr/device/detail/free_list.hpp index e6f4effc0..6f302e7b9 100644 --- a/include/rmm/mr/device/detail/free_list.hpp +++ b/include/rmm/mr/device/detail/free_list.hpp @@ -20,25 +20,23 @@ #include #include -namespace rmm { -namespace mr { -namespace detail { +namespace rmm::mr::detail { struct block_base { void* ptr{}; ///< Raw memory pointer /// Returns the raw pointer for this block - inline void* pointer() const { return ptr; } + [[nodiscard]] inline void* pointer() const { return ptr; } /// Returns true if this block is valid (non-null), false otherwise - inline bool is_valid() const { return pointer() != nullptr; } + [[nodiscard]] inline bool is_valid() const { return pointer() != nullptr; } /// Prints the block to stdout inline void print() const { std::cout << pointer(); } }; /// Print block_base on an ostream -inline std::ostream& operator<<(std::ostream& out, const block_base& b) +inline std::ostream& operator<<(std::ostream& out, const block_base& block) { - out << b.pointer(); + out << block.pointer(); return out; } @@ -93,7 +91,7 @@ class free_list { * @return true If there are blocks in the free_list. * @return false If there are no blocks in the free_list. */ - bool is_empty() const noexcept { return blocks.empty(); } + [[nodiscard]] bool is_empty() const noexcept { return blocks.empty(); } /** * @brief Removes the block indicated by `iter` from the free list. @@ -114,8 +112,8 @@ class free_list { void print() const { std::cout << size() << std::endl; - for (auto const& b : blocks) { - std::cout << b << std::endl; + for (auto const& block : blocks) { + std::cout << block << std::endl; } } @@ -126,7 +124,7 @@ class free_list { * @param pos iterator before which the block will be inserted. pos may be the end() iterator. * @param b The block to insert. */ - void insert(const_iterator pos, block_type const& b) { blocks.insert(pos, b); } + void insert(const_iterator pos, block_type const& block) { blocks.insert(pos, block); } /** * @brief Inserts a list of blocks in the free list before the specified position @@ -144,14 +142,14 @@ class free_list { * * @param b The block to append. */ - void push_back(const block_type& b) { blocks.push_back(b); } + void push_back(const block_type& block) { blocks.push_back(block); } /** * @brief Appends the given block to the end of the free list. `b` is moved to the new element. * * @param b The block to append. */ - void push_back(block_type&& b) { blocks.push_back(std::move(b)); } + void push_back(block_type&& block) { blocks.push_back(std::move(block)); } /** * @brief Removes the first element of the free list. If there are no elements in the free list, @@ -165,6 +163,4 @@ class free_list { list_type blocks; // The internal container of blocks }; -} // namespace detail -} // namespace mr -} // namespace rmm +} // namespace rmm::mr::detail From c54c513a9a16b63dcdc9f19090be6027b6f4bdec Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 7 Sep 2021 15:27:09 +1000 Subject: [PATCH 29/72] tidy stream_ordered_memory_resource and free_lists --- .../mr/device/detail/coalescing_free_list.hpp | 29 +++-- .../mr/device/detail/fixed_size_free_list.hpp | 9 +- include/rmm/mr/device/detail/free_list.hpp | 8 +- .../detail/stream_ordered_memory_resource.hpp | 105 ++++++++++-------- 4 files changed, 79 insertions(+), 72 deletions(-) diff --git a/include/rmm/mr/device/detail/coalescing_free_list.hpp b/include/rmm/mr/device/detail/coalescing_free_list.hpp index a32469c73..bbdd98ec9 100644 --- a/include/rmm/mr/device/detail/coalescing_free_list.hpp +++ b/include/rmm/mr/device/detail/coalescing_free_list.hpp @@ -79,8 +79,8 @@ struct block : public block_base { * `this` must immediately precede `b` and both `this` and `b` must be from the same upstream * allocation. That is, `this->is_contiguous_before(b)`. Otherwise behavior is undefined. * - * @param b block to merge - * @return block The merged block + * @param blk block to merge + * @return The merged block */ [[nodiscard]] inline block merge(block const& blk) const noexcept { @@ -91,9 +91,9 @@ struct block : public block_base { /** * @brief Verifies whether this block can be merged to the beginning of block b. * - * @param b The block to check for contiguity. - * @return true Returns true if this blocks's `ptr` + `size` == `b.ptr`, and `not b.is_head`, - false otherwise. + * @param blk The block to check for contiguity. + * @return Returns true if this blocks's `ptr` + `size` == `b.ptr`, and `not b.is_head`, + false otherwise. */ [[nodiscard]] inline bool is_contiguous_before(block const& blk) const noexcept { @@ -104,18 +104,18 @@ struct block : public block_base { /** * @brief Is this block large enough to fit `sz` bytes? * - * @param sz The size in bytes to check for fit. - * @return true if this block is at least `sz` bytes + * @param bytes The size in bytes to check for fit. + * @return true if this block is at least `bytes` bytes */ [[nodiscard]] inline bool fits(std::size_t bytes) const noexcept { return size() >= bytes; } /** * @brief Is this block a better fit for `sz` bytes than block `b`? * - * @param sz The size in bytes to check for best fit. - * @param b The other block to check for fit. - * @return true If this block is a tighter fit for `sz` bytes than block `b`. - * @return false If this block does not fit `sz` bytes or `b` is a tighter fit. + * @param bytes The size in bytes to check for best fit. + * @param blk The other block to check for fit. + * @return true If this block is a tighter fit for `bytes` bytes than block `blk`. + * @return false If this block does not fit `bytes` bytes or `blk` is a tighter fit. */ [[nodiscard]] inline bool is_better_fit(std::size_t bytes, block const& blk) const noexcept { @@ -209,12 +209,11 @@ struct coalescing_free_list : free_list { } /** - * @brief Moves blocks from range `[first, last)` into the free_list in their correct order, + * @brief Moves blocks from free_list `other` into this free_list in their correct order, * coalescing them with their preceding and following blocks if they are contiguous. * * @tparam InputIt iterator type - * @param first The beginning of the range of blocks to insert - * @param last The end of the range of blocks to insert. + * @param other free_list of blocks to insert */ void insert(free_list&& other) { @@ -229,7 +228,7 @@ struct coalescing_free_list : free_list { * This is a "best fit" search. * * @param size The size in bytes of the desired block. - * @return block A block large enough to store `size` bytes. + * @return A block large enough to store `size` bytes. */ block_type get_block(std::size_t size) { diff --git a/include/rmm/mr/device/detail/fixed_size_free_list.hpp b/include/rmm/mr/device/detail/fixed_size_free_list.hpp index 4d2f7253f..1ca1656b0 100644 --- a/include/rmm/mr/device/detail/fixed_size_free_list.hpp +++ b/include/rmm/mr/device/detail/fixed_size_free_list.hpp @@ -49,15 +49,14 @@ struct fixed_size_free_list : free_list { * @brief Inserts a block into the `free_list` in the correct order, coalescing it with the * preceding and following blocks if either is contiguous. * - * @param b The block to insert. + * @param block The block to insert. */ void insert(block_type const& block) { push_back(block); } /** - * @brief Splices blocks from range `[first, last)` onto the free_list. + * @brief Inserts blocks from another free list into this free_list. * - * @param first The beginning of the range of blocks to insert - * @param last The end of the range of blocks to insert. + * @param other The free_list to insert into this free_list. */ void insert(free_list&& other) { splice(cend(), std::move(other)); } @@ -65,7 +64,7 @@ struct fixed_size_free_list : free_list { * @brief Returns the first block in the free list. * * @param size The size in bytes of the desired block (unused). - * @return block A block large enough to store `size` bytes. + * @return A block large enough to store `size` bytes. */ block_type get_block(std::size_t size) { diff --git a/include/rmm/mr/device/detail/free_list.hpp b/include/rmm/mr/device/detail/free_list.hpp index 6f302e7b9..f249c2f25 100644 --- a/include/rmm/mr/device/detail/free_list.hpp +++ b/include/rmm/mr/device/detail/free_list.hpp @@ -122,7 +122,7 @@ class free_list { * @brief Insert a block in the free list before the specified position * * @param pos iterator before which the block will be inserted. pos may be the end() iterator. - * @param b The block to insert. + * @param block The block to insert. */ void insert(const_iterator pos, block_type const& block) { blocks.insert(pos, block); } @@ -130,7 +130,7 @@ class free_list { * @brief Inserts a list of blocks in the free list before the specified position * * @param pos iterator before which the block will be inserted. pos may be the end() iterator. - * @param b The block to insert. + * @param other The free list to insert. */ void splice(const_iterator pos, free_list&& other) { @@ -140,14 +140,14 @@ class free_list { /** * @brief Appends the given block to the end of the free list. * - * @param b The block to append. + * @param block The block to append. */ void push_back(const block_type& block) { blocks.push_back(block); } /** * @brief Appends the given block to the end of the free list. `b` is moved to the new element. * - * @param b The block to append. + * @param block The block to append. */ void push_back(block_type&& block) { blocks.push_back(std::move(block)); } diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp index 8824e0ad9..f9106e17d 100644 --- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp +++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp @@ -31,9 +31,7 @@ #include #include -namespace rmm { -namespace mr { -namespace detail { +namespace rmm::mr::detail { /** * @brief A CRTP helper function @@ -76,7 +74,7 @@ struct crtp { template class stream_ordered_memory_resource : public crtp, public device_memory_resource { public: - ~stream_ordered_memory_resource() { release(); } + ~stream_ordered_memory_resource() override { release(); } stream_ordered_memory_resource() = default; stream_ordered_memory_resource(stream_ordered_memory_resource const&) = delete; @@ -148,12 +146,12 @@ class stream_ordered_memory_resource : public crtp, public device_ /** * @brief Returns the block `b` (last used on stream `stream_event`) to the pool. * - * @param b The block to insert into the pool. + * @param block The block to insert into the pool. * @param stream The stream on which the memory was last used. */ - void insert_block(block_type const& b, cuda_stream_view stream) + void insert_block(block_type const& block, cuda_stream_view stream) { - stream_free_blocks_[get_event(stream)].insert(b); + stream_free_blocks_[get_event(stream)].insert(block); } void insert_blocks(free_list&& blocks, cuda_stream_view stream) @@ -164,9 +162,10 @@ class stream_ordered_memory_resource : public crtp, public device_ void print_free_blocks() const { std::cout << "stream free blocks: "; - for (auto& s : stream_free_blocks_) { - std::cout << "stream: " << s.first.stream << " event: " << s.first.event << " "; - s.second.print(); + for (auto& free_blocks : stream_free_blocks_) { + std::cout << "stream: " << free_blocks.first.stream << " event: " << free_blocks.first.event + << " "; + free_blocks.second.print(); std::cout << std::endl; } std::cout << std::endl; @@ -193,32 +192,34 @@ class stream_ordered_memory_resource : public crtp, public device_ * * @throws `std::bad_alloc` if the requested allocation could not be fulfilled * - * @param bytes The size in bytes of the allocation - * @param stream The stream to associate this allocation with + * @param size The size in bytes of the allocation + * @param stream The stream in which to order this allocation * @return void* Pointer to the newly allocated memory */ - virtual void* do_allocate(std::size_t bytes, cuda_stream_view stream) override + void* do_allocate(std::size_t size, cuda_stream_view stream) override { RMM_LOG_TRACE("[A][stream {:p}][{}B]", fmt::ptr(stream.value()), bytes); - if (bytes <= 0) return nullptr; + if (size <= 0) { return nullptr; } lock_guard lock(mtx_); auto stream_event = get_event(stream); - bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - RMM_EXPECTS(bytes <= this->underlying().get_maximum_allocation_size(), + size = rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + RMM_EXPECTS(size <= this->underlying().get_maximum_allocation_size(), rmm::bad_alloc, "Maximum allocation size exceeded"); - auto const b = this->underlying().get_block(bytes, stream_event); + auto const block = this->underlying().get_block(size, stream_event); - RMM_LOG_TRACE( - "[A][stream {:p}][{}B][{:p}]", fmt::ptr(stream_event.stream), bytes, fmt::ptr(b.pointer())); + RMM_LOG_TRACE("[A][stream {:p}][{}B][{:p}]", + fmt::ptr(stream_event.stream), + bytes, + fmt::ptr(block.pointer())); log_summary_trace(); - return b.pointer(); + return block.pointer(); } /** @@ -227,25 +228,27 @@ class stream_ordered_memory_resource : public crtp, public device_ * @throws nothing * * @param p Pointer to be deallocated + * @param size The size in bytes of the allocation to deallocate + * @param stream The stream in which to order this deallocation */ - virtual void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t size, cuda_stream_view stream) override { RMM_LOG_TRACE("[D][stream {:p}][{}B][{:p}]", fmt::ptr(stream.value()), bytes, p); - if (bytes <= 0 || p == nullptr) return; + if (size <= 0 || ptr == nullptr) { return; } lock_guard lock(mtx_); auto stream_event = get_event(stream); - bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - auto const b = this->underlying().free_block(p, bytes); + size = rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + auto const block = this->underlying().free_block(ptr, size); // TODO: cudaEventRecord has significant overhead on deallocations. For the non-PTDS case // we may be able to delay recording the event in some situations. But using events rather than // streams allows stealing from deleted streams. RMM_ASSERT_CUDA_SUCCESS(cudaEventRecord(stream_event.event, stream.value())); - stream_free_blocks_[stream_event].insert(b); + stream_free_blocks_[stream_event].insert(block); log_summary_trace(); } @@ -261,6 +264,11 @@ class stream_ordered_memory_resource : public crtp, public device_ } ~event_wrapper() { RMM_ASSERT_CUDA_SUCCESS(cudaEventDestroy(event)); } cudaEvent_t event{}; + + event_wrapper(event_wrapper const&) = delete; + event_wrapper& operator=(event_wrapper const&) = delete; + event_wrapper(event_wrapper&&) noexcept = delete; + event_wrapper& operator=(event_wrapper&&) = delete; }; /** @@ -287,13 +295,15 @@ class stream_ordered_memory_resource : public crtp, public device_ // user explicitly passes it, so it is used as the default location for the free list // at construction. For consistency, the same key is used for null stream free lists in non-PTDS // mode. - auto const stream_to_store = stream.is_default() ? cudaStreamLegacy : stream.value(); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) + auto* const stream_to_store = stream.is_default() ? cudaStreamLegacy : stream.value(); auto const iter = stream_events_.find(stream_to_store); return (iter != stream_events_.end()) ? iter->second : [&]() { stream_event_pair stream_event{stream_to_store}; RMM_ASSERT_CUDA_SUCCESS( cudaEventCreateWithFlags(&stream_event.event, cudaEventDisableTiming)); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) stream_events_[stream_to_store] = stream_event; return stream_event; }(); @@ -303,14 +313,14 @@ class stream_ordered_memory_resource : public crtp, public device_ * @brief Splits a block into an allocated block of `size` bytes and a remainder block, and * inserts the remainder into a free list. * - * @param b The block to split into allocated and remainder portions. + * @param block The block to split into allocated and remainder portions. * @param size The size of the block to allocate from `b`. * @param blocks The `free_list` in which to insert the remainder block. * @return The allocated block. */ - block_type allocate_and_insert_remainder(block_type b, std::size_t size, free_list& blocks) + block_type allocate_and_insert_remainder(block_type block, std::size_t size, free_list& blocks) { - auto const [allocated, remainder] = this->underlying().allocate_from_block(b, size); + auto const [allocated, remainder] = this->underlying().allocate_from_block(block, size); if (remainder.is_valid()) { blocks.insert(remainder); } return allocated; } @@ -327,8 +337,8 @@ class stream_ordered_memory_resource : public crtp, public device_ // Try to find a satisfactory block in free list for the same stream (no sync required) auto iter = stream_free_blocks_.find(stream_event); if (iter != stream_free_blocks_.end()) { - block_type const b = iter->second.get_block(size); - if (b.is_valid()) { return allocate_and_insert_remainder(b, size, iter->second); } + block_type const block = iter->second.get_block(size); + if (block.is_valid()) { return allocate_and_insert_remainder(block, size, iter->second); } } free_list& blocks = @@ -336,23 +346,23 @@ class stream_ordered_memory_resource : public crtp, public device_ // Try to find an existing block in another stream { - block_type const b = get_block_from_other_stream(size, stream_event, blocks, false); - if (b.is_valid()) return b; + block_type const block = get_block_from_other_stream(size, stream_event, blocks, false); + if (block.is_valid()) { return block; } } // no large enough blocks available on other streams, so sync and merge until we find one { - block_type const b = get_block_from_other_stream(size, stream_event, blocks, true); - if (b.is_valid()) return b; + block_type const block = get_block_from_other_stream(size, stream_event, blocks, true); + if (block.is_valid()) { return block; } } log_summary_trace(); // no large enough blocks available after merging, so grow the pool - block_type const b = + block_type const block = this->underlying().expand_pool(size, blocks, cuda_stream_view{stream_event.stream}); - return allocate_and_insert_remainder(b, size, blocks); + return allocate_and_insert_remainder(block, size, blocks); } /** @@ -380,7 +390,7 @@ class stream_ordered_memory_resource : public crtp, public device_ if (other_event != stream_event.event) { free_list& other_blocks = it->second; - block_type const b = [&]() { + block_type const block = [&]() { if (merge_first) { merge_lists(stream_event, blocks, other_event, std::move(other_blocks)); @@ -391,27 +401,28 @@ class stream_ordered_memory_resource : public crtp, public device_ stream_free_blocks_.erase(it); - block_type const b = blocks.get_block(size); // get the best fit block in merged lists - if (b.is_valid()) { return allocate_and_insert_remainder(b, size, blocks); } + block_type const block = + blocks.get_block(size); // get the best fit block in merged lists + if (block.is_valid()) { return allocate_and_insert_remainder(block, size, blocks); } } else { - block_type const b = other_blocks.get_block(size); - if (b.is_valid()) { + block_type const block = other_blocks.get_block(size); + if (block.is_valid()) { // Since we found a block associated with a different stream, we have to insert a wait // on the stream's associated event into the allocating stream. RMM_CUDA_TRY(cudaStreamWaitEvent(stream_event.stream, other_event, 0)); - return allocate_and_insert_remainder(b, size, other_blocks); + return allocate_and_insert_remainder(block, size, other_blocks); } } return block_type{}; }(); - if (b.is_valid()) { + if (block.is_valid()) { RMM_LOG_DEBUG((merge_first) ? "[A][Stream {:p}][{}B][Found after merging stream {:p}]" : "[A][Stream {:p}][{}B][Taken from stream {:p}]", fmt::ptr(stream_event.stream), size, fmt::ptr(it->first.stream)); - return b; + return block; } } } @@ -486,6 +497,4 @@ class stream_ordered_memory_resource : public crtp, public device_ std::mutex mtx_; // mutex for thread-safe access }; // namespace detail -} // namespace detail -} // namespace mr -} // namespace rmm +} // namespace rmm::mr::detail From a4e0d9a510d6c639d1731fc432f7e0e3318d4cfd Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 09:52:12 +1000 Subject: [PATCH 30/72] tidy device_memory_resource and aligned_resource_adaptor --- include/rmm/detail/aligned.hpp | 32 +++++----- .../mr/device/aligned_resource_adaptor.hpp | 58 +++++++++---------- .../rmm/mr/device/device_memory_resource.hpp | 39 ++++++++----- 3 files changed, 70 insertions(+), 59 deletions(-) diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index c949ad035..6a8d70597 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -24,17 +24,19 @@ namespace rmm::detail { +enum alignment_type : std::size_t {}; + /** * @brief Default alignment used for host memory allocated by RMM. * */ -static constexpr std::size_t RMM_DEFAULT_HOST_ALIGNMENT{alignof(std::max_align_t)}; +static constexpr alignment_type RMM_DEFAULT_HOST_ALIGNMENT{alignof(std::max_align_t)}; /** * @brief Default alignment used for CUDA memory allocation. * */ -static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; +static constexpr alignment_type CUDA_ALLOCATION_ALIGNMENT{256}; /** * @brief Returns whether or not `n` is a power of 2. @@ -46,7 +48,7 @@ constexpr bool is_pow2(std::size_t value) { return (0 == (value & (value - 1))); * @brief Returns whether or not `alignment` is a valid memory alignment. * */ -constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(alignment); } +constexpr bool is_supported_alignment(alignment_type alignment) { return is_pow2(alignment); } /** * @brief Align up to nearest multiple of specified power of 2 @@ -56,10 +58,10 @@ constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(al * * @return Return the aligned value, as one would expect */ -constexpr std::size_t align_up(std::size_t value, std::size_t align_bytes) noexcept +constexpr std::size_t align_up(std::size_t value, alignment_type alignment) noexcept { - assert(is_supported_alignment(align_bytes)); - return (value + (align_bytes - 1)) & ~(align_bytes - 1); + assert(is_supported_alignment(alignment)); + return (value + (alignment - 1)) & ~(alignment - 1); } /** @@ -70,10 +72,10 @@ constexpr std::size_t align_up(std::size_t value, std::size_t align_bytes) noexc * * @return Return the aligned value, as one would expect */ -constexpr std::size_t align_down(std::size_t value, std::size_t align_bytes) noexcept +constexpr std::size_t align_down(std::size_t value, alignment_type alignment) noexcept { - assert(is_supported_alignment(align_bytes)); - return value & ~(align_bytes - 1); + assert(is_supported_alignment(alignment)); + return value & ~(alignment - 1); } /** @@ -84,13 +86,13 @@ constexpr std::size_t align_down(std::size_t value, std::size_t align_bytes) noe * * @return true if aligned */ -constexpr bool is_aligned(std::size_t value, std::size_t align_bytes) noexcept +constexpr bool is_aligned(std::size_t value, alignment_type alignment) noexcept { - assert(is_supported_alignment(align_bytes)); - return value == align_down(value, align_bytes); + assert(is_supported_alignment(alignment)); + return value == align_down(value, alignment); } -inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) +inline bool is_pointer_aligned(void* ptr, alignment_type alignment = CUDA_ALLOCATION_ALIGNMENT) { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) return rmm::detail::is_aligned(reinterpret_cast(ptr), alignment); @@ -124,7 +126,7 @@ inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATIO * `alignment`. */ template -void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) +void* aligned_allocate(std::size_t bytes, alignment_type alignment, Alloc alloc) { assert(is_pow2(alignment)); @@ -168,7 +170,7 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) */ template // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -void aligned_deallocate(void* ptr, std::size_t bytes, std::size_t alignment, Dealloc dealloc) +void aligned_deallocate(void* ptr, std::size_t bytes, alignment_type alignment, Dealloc dealloc) { (void)alignment; diff --git a/include/rmm/mr/device/aligned_resource_adaptor.hpp b/include/rmm/mr/device/aligned_resource_adaptor.hpp index 4e29b90b3..3d70596ff 100644 --- a/include/rmm/mr/device/aligned_resource_adaptor.hpp +++ b/include/rmm/mr/device/aligned_resource_adaptor.hpp @@ -61,8 +61,8 @@ class aligned_resource_adaptor final : public device_memory_resource { */ explicit aligned_resource_adaptor( Upstream* upstream, - std::size_t allocation_alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT, - std::size_t alignment_threshold = default_alignment_threshold) + rmm::detail::alignment_type allocation_alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT, + std::size_t alignment_threshold = default_alignment_threshold) : upstream_{upstream}, allocation_alignment_{allocation_alignment}, alignment_threshold_{alignment_threshold} @@ -124,18 +124,20 @@ class aligned_resource_adaptor final : public device_memory_resource { if (allocation_alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { return upstream_->allocate(bytes, stream); - } else { - auto const size = upstream_allocation_size(bytes); - void* pointer = upstream_->allocate(size, stream); - auto const address = reinterpret_cast(pointer); - auto const aligned_address = rmm::detail::align_up(address, allocation_alignment_); - void* aligned_pointer = reinterpret_cast(aligned_address); - if (pointer != aligned_pointer) { - lock_guard lock(mtx_); - pointers_.emplace(aligned_pointer, pointer); - } - return aligned_pointer; } + auto const size = upstream_allocation_size(bytes); + void* pointer = upstream_->allocate(size, stream); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + auto const address = reinterpret_cast(pointer); + auto const aligned_address = + rmm::detail::align_up(address, rmm::detail::alignment_type{allocation_alignment_}); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) + void* aligned_pointer = reinterpret_cast(aligned_address); + if (pointer != aligned_pointer) { + lock_guard lock(mtx_); + pointers_.emplace(aligned_pointer, pointer); + } + return aligned_pointer; } /** @@ -147,21 +149,21 @@ class aligned_resource_adaptor final : public device_memory_resource { * @param bytes Size of the allocation * @param stream Stream on which to perform the deallocation */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { if (allocation_alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { - upstream_->deallocate(p, bytes, stream); + upstream_->deallocate(ptr, bytes, stream); } else { { lock_guard lock(mtx_); - auto const i = pointers_.find(p); - if (i != pointers_.end()) { - p = i->second; - pointers_.erase(i); + auto const iter = pointers_.find(ptr); + if (iter != pointers_.end()) { + ptr = iter->second; + pointers_.erase(iter); } } - upstream_->deallocate(p, upstream_allocation_size(bytes), stream); + upstream_->deallocate(ptr, upstream_allocation_size(bytes), stream); } } @@ -176,14 +178,11 @@ class aligned_resource_adaptor final : public device_memory_resource { */ [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { - if (this == &other) - return true; - else { - auto cast = dynamic_cast const*>(&other); - return cast != nullptr && upstream_->is_equal(*cast->get_upstream()) && - allocation_alignment_ == cast->allocation_alignment_ && - alignment_threshold_ == cast->alignment_threshold_; - } + if (this == &other) { return true; } + auto cast = dynamic_cast const*>(&other); + return cast != nullptr && upstream_->is_equal(*cast->get_upstream()) && + allocation_alignment_ == cast->allocation_alignment_ && + alignment_threshold_ == cast->alignment_threshold_; } /** @@ -211,7 +210,8 @@ class aligned_resource_adaptor final : public device_memory_resource { */ std::size_t upstream_allocation_size(std::size_t bytes) const { - auto const aligned_size = rmm::detail::align_up(bytes, allocation_alignment_); + auto const aligned_size = + rmm::detail::align_up(bytes, rmm::detail::alignment_type{allocation_alignment_}); return aligned_size + allocation_alignment_ - rmm::detail::CUDA_ALLOCATION_ALIGNMENT; } diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp index d9817a933..9200dfd00 100644 --- a/include/rmm/mr/device/device_memory_resource.hpp +++ b/include/rmm/mr/device/device_memory_resource.hpp @@ -21,9 +21,8 @@ #include #include -namespace rmm { +namespace rmm::mr { -namespace mr { /** * @brief Base class for all libcudf device memory allocation. * @@ -82,7 +81,11 @@ namespace mr { */ class device_memory_resource { public: - virtual ~device_memory_resource() = default; + virtual ~device_memory_resource() = default; + device_memory_resource(device_memory_resource const&) = default; + device_memory_resource& operator=(device_memory_resource const&) = default; + device_memory_resource(device_memory_resource&&) = default; + device_memory_resource& operator=(device_memory_resource&&) = default; /** * @brief Allocates memory of size at least \p bytes. @@ -101,7 +104,7 @@ class device_memory_resource { */ void* allocate(std::size_t bytes, cuda_stream_view stream = cuda_stream_view{}) { - return do_allocate(rmm::detail::align_up(bytes, 8), stream); + return do_allocate(rmm::detail::align_up(bytes, allocation_size_alignment), stream); } /** @@ -122,9 +125,9 @@ class device_memory_resource { * value of `bytes` that was passed to the `allocate` call that returned `p`. * @param stream Stream on which to perform deallocation */ - void deallocate(void* p, std::size_t bytes, cuda_stream_view stream = cuda_stream_view{}) + void deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream = cuda_stream_view{}) { - do_deallocate(p, rmm::detail::align_up(bytes, 8), stream); + do_deallocate(ptr, rmm::detail::align_up(bytes, allocation_size_alignment), stream); } /** @@ -140,7 +143,10 @@ class device_memory_resource { * @param other The other resource to compare to * @returns If the two resources are equivalent */ - bool is_equal(device_memory_resource const& other) const noexcept { return do_is_equal(other); } + [[nodiscard]] bool is_equal(device_memory_resource const& other) const noexcept + { + return do_is_equal(other); + } /** * @brief Query whether the resource supports use of non-null CUDA streams for @@ -148,14 +154,14 @@ class device_memory_resource { * * @returns bool true if the resource supports non-null CUDA streams. */ - virtual bool supports_streams() const noexcept = 0; + [[nodiscard]] virtual bool supports_streams() const noexcept = 0; /** * @brief Query whether the resource supports the get_mem_info API. * * @return bool true if the resource supports get_mem_info, false otherwise. */ - virtual bool supports_get_mem_info() const noexcept = 0; + [[nodiscard]] virtual bool supports_get_mem_info() const noexcept = 0; /** * @brief Queries the amount of free and total memory for the resource. @@ -165,12 +171,15 @@ class device_memory_resource { * @returns a pair containing the free memory in bytes in .first and total amount of memory in * .second */ - std::pair get_mem_info(cuda_stream_view stream) const + [[nodiscard]] std::pair get_mem_info(cuda_stream_view stream) const { return do_get_mem_info(stream); } private: + // All allocations are padded to a multiple of allocation_size_alignment bytes. + static constexpr auto allocation_size_alignment = rmm::detail::alignment_type{8}; + /** * @brief Allocates memory of size at least \p bytes. * @@ -196,7 +205,7 @@ class device_memory_resource { * value of `bytes` that was passed to the `allocate` call that returned `p`. * @param stream Stream on which to perform deallocation */ - virtual void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) = 0; + virtual void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) = 0; /** * @brief Compare this resource to another. @@ -212,7 +221,7 @@ class device_memory_resource { * @return true If the two resources are equivalent * @return false If the two resources are not equal */ - virtual bool do_is_equal(device_memory_resource const& other) const noexcept + [[nodiscard]] virtual bool do_is_equal(device_memory_resource const& other) const noexcept { return this == &other; } @@ -225,7 +234,7 @@ class device_memory_resource { * @param stream the stream being executed on * @return std::pair with available and free memory for resource */ - virtual std::pair do_get_mem_info(cuda_stream_view stream) const = 0; + [[nodiscard]] virtual std::pair do_get_mem_info( + cuda_stream_view stream) const = 0; }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr From a6a0cab4470b6f95e62f978fb24faaa14c73958a Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 10:16:40 +1000 Subject: [PATCH 31/72] Fix nodiscard compilation error --- tests/mr/device/mr_tests.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/mr/device/mr_tests.cpp b/tests/mr/device/mr_tests.cpp index 0e2b63dcc..f1248dbc0 100644 --- a/tests/mr/device/mr_tests.cpp +++ b/tests/mr/device/mr_tests.cpp @@ -109,14 +109,20 @@ TEST_P(mr_test, MixedRandomAllocationFreeStream) TEST_P(mr_test, GetMemInfo) { if (this->mr->supports_get_mem_info()) { - this->mr->get_mem_info(rmm::cuda_stream_view{}); const auto allocation_size{16 * 256}; + { + auto const [free, total] = this->mr->get_mem_info(rmm::cuda_stream_view{}); + EXPECT_TRUE(free >= allocation_size); + } + void* ptr{nullptr}; ptr = this->mr->allocate(allocation_size); + { auto const [free, total] = this->mr->get_mem_info(rmm::cuda_stream_view{}); EXPECT_TRUE(free >= allocation_size); } + this->mr->deallocate(ptr, allocation_size); } } From a12bb90c2c58cc4ce068b1e3f3982f3467227ce1 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 10:17:00 +1000 Subject: [PATCH 32/72] tidying more MRs --- include/rmm/detail/aligned.hpp | 3 +- .../mr/device/cuda_async_memory_resource.hpp | 50 +++++++++---------- .../rmm/mr/device/cuda_memory_resource.hpp | 33 ++++++------ .../rmm/mr/device/device_memory_resource.hpp | 1 + include/rmm/mr/host/new_delete_resource.hpp | 7 +-- tests/mr/device/cuda_async_mr_tests.cpp | 2 +- 6 files changed, 49 insertions(+), 47 deletions(-) diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index 6a8d70597..954bf082c 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -24,7 +24,8 @@ namespace rmm::detail { -enum alignment_type : std::size_t {}; +// enum alignment_type : std::size_t {}; +using alignment_type = std::size_t; /** * @brief Default alignment used for host memory allocated by RMM. diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp index 57ec97ff8..19d52b16b 100644 --- a/include/rmm/mr/device/cuda_async_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp @@ -32,8 +32,7 @@ #define RMM_CUDA_MALLOC_ASYNC_SUPPORT #endif -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief `device_memory_resource` derived class that uses `cudaMallocAsync`/`cudaFreeAsync` for @@ -41,6 +40,7 @@ namespace mr { */ class cuda_async_memory_resource final : public device_memory_resource { public: + enum release_threshold_size_type : std::size_t {}; /** * @brief Constructs a cuda_async_memory_resource with the optionally specified initial pool size * and release threshold. @@ -55,16 +55,16 @@ class cuda_async_memory_resource final : public device_memory_resource { * @param release_threshold Optional release threshold size in bytes of the pool. If no value is * provided, the release threshold is set to the total amount of memory on the current device. */ - cuda_async_memory_resource(thrust::optional initial_pool_size = {}, - thrust::optional release_threshold = {}) + cuda_async_memory_resource(thrust::optional initial_pool_size = {}, + thrust::optional release_threshold = {}) { #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT // Check if cudaMallocAsync Memory pool supported auto const device = rmm::detail::current_device(); int cuda_pool_supported{}; - auto e = + auto result = cudaDeviceGetAttribute(&cuda_pool_supported, cudaDevAttrMemoryPoolsSupported, device.value()); - RMM_EXPECTS(e == cudaSuccess && cuda_pool_supported, + RMM_EXPECTS(result == cudaSuccess && cuda_pool_supported, "cudaMallocAsync not supported with this CUDA driver/runtime version"); // Construct explicit pool @@ -78,15 +78,15 @@ class cuda_async_memory_resource final : public device_memory_resource { auto const [free, total] = rmm::detail::available_device_memory(); // Need an l-value to take address to pass to cudaMemPoolSetAttribute - uint64_t threshold = release_threshold.value_or(total); + uint64_t threshold = release_threshold.value_or(release_threshold_size_type{total}); RMM_CUDA_TRY( cudaMemPoolSetAttribute(cuda_pool_handle_, cudaMemPoolAttrReleaseThreshold, &threshold)); // Allocate and immediately deallocate the initial_pool_size to prime the pool with the // specified size - auto const pool_size = initial_pool_size.value_or(free * 0.5); - auto p = do_allocate(pool_size, cuda_stream_default); - do_deallocate(p, pool_size, cuda_stream_default); + auto const pool_size = initial_pool_size.value_or(free / 2); + auto* ptr = do_allocate(pool_size, cuda_stream_default); + do_deallocate(ptr, pool_size, cuda_stream_default); #else RMM_FAIL( @@ -99,10 +99,10 @@ class cuda_async_memory_resource final : public device_memory_resource { * @brief Returns the underlying native handle to the CUDA pool * */ - cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; } + [[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; } #endif - ~cuda_async_memory_resource() + ~cuda_async_memory_resource() override { #if defined(RMM_CUDA_MALLOC_ASYNC_SUPPORT) RMM_ASSERT_CUDA_SUCCESS(cudaMemPoolDestroy(pool_handle())); @@ -119,18 +119,18 @@ class cuda_async_memory_resource final : public device_memory_resource { * * @returns bool true */ - bool supports_streams() const noexcept override { return true; } + [[nodiscard]] bool supports_streams() const noexcept override { return true; } /** * @brief Query whether the resource supports the get_mem_info API. * * @return true */ - bool supports_get_mem_info() const noexcept override { return false; } + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } private: #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - cudaMemPool_t cuda_pool_handle_; + cudaMemPool_t cuda_pool_handle_{}; #endif /** @@ -145,17 +145,17 @@ class cuda_async_memory_resource final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override { - void* p{nullptr}; + void* ptr{nullptr}; #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT if (bytes > 0) { - RMM_CUDA_TRY(cudaMallocFromPoolAsync(&p, bytes, pool_handle(), stream.value()), + RMM_CUDA_TRY(cudaMallocFromPoolAsync(&ptr, bytes, pool_handle(), stream.value()), rmm::bad_alloc); } #else (void)bytes; (void)stream; #endif - return p; + return ptr; } /** @@ -165,12 +165,12 @@ class cuda_async_memory_resource final : public device_memory_resource { * * @param p Pointer to be deallocated */ - void do_deallocate(void* p, std::size_t, rmm::cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t /*bytes*/, rmm::cuda_stream_view stream) override { #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - if (p != nullptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeAsync(p, stream.value())); } + if (ptr != nullptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeAsync(ptr, stream.value())); } #else - (void)p; + (void)ptr; (void)stream; #endif } @@ -184,7 +184,7 @@ class cuda_async_memory_resource final : public device_memory_resource { * @return true If the two resources are equivalent * @return false If the two resources are not equal */ - bool do_is_equal(device_memory_resource const& other) const noexcept override + [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { return dynamic_cast(&other) != nullptr; } @@ -196,11 +196,11 @@ class cuda_async_memory_resource final : public device_memory_resource { * * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(rmm::cuda_stream_view) const override + [[nodiscard]] std::pair do_get_mem_info( + rmm::cuda_stream_view /*stream*/) const override { return std::make_pair(0, 0); } }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/cuda_memory_resource.hpp b/include/rmm/mr/device/cuda_memory_resource.hpp index d419ce335..59a729297 100644 --- a/include/rmm/mr/device/cuda_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_memory_resource.hpp @@ -22,8 +22,7 @@ #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief `device_memory_resource` derived class that uses cudaMalloc/Free for * allocation/deallocation. @@ -31,7 +30,7 @@ namespace mr { class cuda_memory_resource final : public device_memory_resource { public: cuda_memory_resource() = default; - ~cuda_memory_resource() = default; + ~cuda_memory_resource() override = default; cuda_memory_resource(cuda_memory_resource const&) = default; cuda_memory_resource(cuda_memory_resource&&) = default; cuda_memory_resource& operator=(cuda_memory_resource const&) = default; @@ -43,14 +42,14 @@ class cuda_memory_resource final : public device_memory_resource { * * @returns bool false */ - bool supports_streams() const noexcept override { return false; } + [[nodiscard]] bool supports_streams() const noexcept override { return false; } /** * @brief Query whether the resource supports the get_mem_info API. * * @return true */ - bool supports_get_mem_info() const noexcept override { return true; } + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; } private: /** @@ -65,11 +64,11 @@ class cuda_memory_resource final : public device_memory_resource { * @param bytes The size, in bytes, of the allocation * @return void* Pointer to the newly allocated memory */ - void* do_allocate(std::size_t bytes, cuda_stream_view) override + void* do_allocate(std::size_t bytes, cuda_stream_view /*stream*/) override { - void* p{nullptr}; - RMM_CUDA_TRY(cudaMalloc(&p, bytes), rmm::bad_alloc); - return p; + void* ptr{nullptr}; + RMM_CUDA_TRY(cudaMalloc(&ptr, bytes), rmm::bad_alloc); + return ptr; } /** @@ -81,9 +80,9 @@ class cuda_memory_resource final : public device_memory_resource { * * @param p Pointer to be deallocated */ - void do_deallocate(void* p, std::size_t, cuda_stream_view) override + void do_deallocate(void* ptr, std::size_t /*bytes*/, cuda_stream_view /*stream*/) override { - RMM_ASSERT_CUDA_SUCCESS(cudaFree(p)); + RMM_ASSERT_CUDA_SUCCESS(cudaFree(ptr)); } /** @@ -98,7 +97,7 @@ class cuda_memory_resource final : public device_memory_resource { * @return true If the two resources are equivalent * @return false If the two resources are not equal */ - bool do_is_equal(device_memory_resource const& other) const noexcept override + [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { return dynamic_cast(&other) != nullptr; } @@ -110,13 +109,13 @@ class cuda_memory_resource final : public device_memory_resource { * * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(cuda_stream_view) const override + [[nodiscard]] std::pair do_get_mem_info( + cuda_stream_view /*stream*/) const override { - std::size_t free_size; - std::size_t total_size; + std::size_t free_size{}; + std::size_t total_size{}; RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size)); return std::make_pair(free_size, total_size); } }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp index 9200dfd00..6105db207 100644 --- a/include/rmm/mr/device/device_memory_resource.hpp +++ b/include/rmm/mr/device/device_memory_resource.hpp @@ -81,6 +81,7 @@ namespace rmm::mr { */ class device_memory_resource { public: + device_memory_resource() = default; virtual ~device_memory_resource() = default; device_memory_resource(device_memory_resource const&) = default; device_memory_resource& operator=(device_memory_resource const&) = default; diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp index 0f27cbf3c..6028e0ead 100644 --- a/include/rmm/mr/host/new_delete_resource.hpp +++ b/include/rmm/mr/host/new_delete_resource.hpp @@ -56,11 +56,12 @@ class new_delete_resource final : public host_memory_resource { std::size_t alignment = detail::RMM_DEFAULT_HOST_ALIGNMENT) override { // If the requested alignment isn't supported, use default - alignment = - (detail::is_supported_alignment(alignment)) ? alignment : detail::RMM_DEFAULT_HOST_ALIGNMENT; + auto align = (detail::is_supported_alignment(rmm::detail::alignment_type{alignment})) + ? rmm::detail::alignment_type{alignment} + : detail::RMM_DEFAULT_HOST_ALIGNMENT; return detail::aligned_allocate( - bytes, alignment, [](std::size_t size) { return ::operator new(size); }); + bytes, align, [](std::size_t size) { return ::operator new(size); }); } /**---------------------------------------------------------------------------* diff --git a/tests/mr/device/cuda_async_mr_tests.cpp b/tests/mr/device/cuda_async_mr_tests.cpp index 4bf0c3d5b..5a507162c 100644 --- a/tests/mr/device/cuda_async_mr_tests.cpp +++ b/tests/mr/device/cuda_async_mr_tests.cpp @@ -47,7 +47,7 @@ TEST(PoolTest, ExplicitInitialPoolSize) TEST(PoolTest, ExplicitReleaseThreshold) { const auto pool_init_size{100}; - const auto pool_release_threshold{1000}; + const auto pool_release_threshold = cuda_async_mr::release_threshold_size_type{1000}; cuda_async_mr mr{pool_init_size, pool_release_threshold}; void* ptr = mr.allocate(pool_init_size); mr.deallocate(ptr, pool_init_size); From 2b1b49d69a81db46f99691492cb8aeb574767da2 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 10:43:45 +1000 Subject: [PATCH 33/72] Remove `alignment_type` and ignore swappable parameters when one is called `alignment`. --- .clang-tidy | 2 ++ include/rmm/detail/aligned.hpp | 21 +++++------ .../mr/device/aligned_resource_adaptor.hpp | 36 ++++++++----------- .../rmm/mr/device/device_memory_resource.hpp | 2 +- include/rmm/mr/host/new_delete_resource.hpp | 19 +++++----- 5 files changed, 34 insertions(+), 46 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index b8fba1f09..b76743aeb 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -56,4 +56,6 @@ CheckOptions: value: 'mr|_' - key: readability-function-cognitive-complexity.IgnoreMacros value: '1' + - key: bugprone-easily-swappable-parameters.IgnoredParameterNames + value: 'alignment' ... diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index 954bf082c..321be53b5 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -24,20 +24,17 @@ namespace rmm::detail { -// enum alignment_type : std::size_t {}; -using alignment_type = std::size_t; - /** * @brief Default alignment used for host memory allocated by RMM. * */ -static constexpr alignment_type RMM_DEFAULT_HOST_ALIGNMENT{alignof(std::max_align_t)}; +static constexpr std::size_t RMM_DEFAULT_HOST_ALIGNMENT{alignof(std::max_align_t)}; /** * @brief Default alignment used for CUDA memory allocation. * */ -static constexpr alignment_type CUDA_ALLOCATION_ALIGNMENT{256}; +static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; /** * @brief Returns whether or not `n` is a power of 2. @@ -49,7 +46,7 @@ constexpr bool is_pow2(std::size_t value) { return (0 == (value & (value - 1))); * @brief Returns whether or not `alignment` is a valid memory alignment. * */ -constexpr bool is_supported_alignment(alignment_type alignment) { return is_pow2(alignment); } +constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(alignment); } /** * @brief Align up to nearest multiple of specified power of 2 @@ -59,7 +56,7 @@ constexpr bool is_supported_alignment(alignment_type alignment) { return is_pow2 * * @return Return the aligned value, as one would expect */ -constexpr std::size_t align_up(std::size_t value, alignment_type alignment) noexcept +constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept { assert(is_supported_alignment(alignment)); return (value + (alignment - 1)) & ~(alignment - 1); @@ -73,7 +70,7 @@ constexpr std::size_t align_up(std::size_t value, alignment_type alignment) noex * * @return Return the aligned value, as one would expect */ -constexpr std::size_t align_down(std::size_t value, alignment_type alignment) noexcept +constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept { assert(is_supported_alignment(alignment)); return value & ~(alignment - 1); @@ -87,13 +84,13 @@ constexpr std::size_t align_down(std::size_t value, alignment_type alignment) no * * @return true if aligned */ -constexpr bool is_aligned(std::size_t value, alignment_type alignment) noexcept +constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept { assert(is_supported_alignment(alignment)); return value == align_down(value, alignment); } -inline bool is_pointer_aligned(void* ptr, alignment_type alignment = CUDA_ALLOCATION_ALIGNMENT) +inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) return rmm::detail::is_aligned(reinterpret_cast(ptr), alignment); @@ -127,7 +124,7 @@ inline bool is_pointer_aligned(void* ptr, alignment_type alignment = CUDA_ALLOCA * `alignment`. */ template -void* aligned_allocate(std::size_t bytes, alignment_type alignment, Alloc alloc) +void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) { assert(is_pow2(alignment)); @@ -171,7 +168,7 @@ void* aligned_allocate(std::size_t bytes, alignment_type alignment, Alloc alloc) */ template // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -void aligned_deallocate(void* ptr, std::size_t bytes, alignment_type alignment, Dealloc dealloc) +void aligned_deallocate(void* ptr, std::size_t bytes, std::size_t alignment, Dealloc dealloc) { (void)alignment; diff --git a/include/rmm/mr/device/aligned_resource_adaptor.hpp b/include/rmm/mr/device/aligned_resource_adaptor.hpp index 3d70596ff..31b23a442 100644 --- a/include/rmm/mr/device/aligned_resource_adaptor.hpp +++ b/include/rmm/mr/device/aligned_resource_adaptor.hpp @@ -55,20 +55,17 @@ class aligned_resource_adaptor final : public device_memory_resource { * @throws `rmm::logic_error` if `allocation_alignment` is not a power of 2 * * @param upstream The resource used for allocating/deallocating device memory. - * @param allocation_alignment The size used for allocation alignment. + * @param alignment The size used for allocation alignment. * @param alignment_threshold Only allocations with a size larger than or equal to this threshold * are aligned. */ - explicit aligned_resource_adaptor( - Upstream* upstream, - rmm::detail::alignment_type allocation_alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT, - std::size_t alignment_threshold = default_alignment_threshold) - : upstream_{upstream}, - allocation_alignment_{allocation_alignment}, - alignment_threshold_{alignment_threshold} + explicit aligned_resource_adaptor(Upstream* upstream, + std::size_t alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT, + std::size_t alignment_threshold = default_alignment_threshold) + : upstream_{upstream}, alignment_{alignment}, alignment_threshold_{alignment_threshold} { RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); - RMM_EXPECTS(rmm::detail::is_supported_alignment(allocation_alignment), + RMM_EXPECTS(rmm::detail::is_supported_alignment(alignment), "Allocation alignment is not a power of 2."); } @@ -121,16 +118,14 @@ class aligned_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - if (allocation_alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || - bytes < alignment_threshold_) { + if (alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { return upstream_->allocate(bytes, stream); } auto const size = upstream_allocation_size(bytes); void* pointer = upstream_->allocate(size, stream); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - auto const address = reinterpret_cast(pointer); - auto const aligned_address = - rmm::detail::align_up(address, rmm::detail::alignment_type{allocation_alignment_}); + auto const address = reinterpret_cast(pointer); + auto const aligned_address = rmm::detail::align_up(address, alignment_); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) void* aligned_pointer = reinterpret_cast(aligned_address); if (pointer != aligned_pointer) { @@ -151,8 +146,7 @@ class aligned_resource_adaptor final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - if (allocation_alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || - bytes < alignment_threshold_) { + if (alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { upstream_->deallocate(ptr, bytes, stream); } else { { @@ -181,8 +175,7 @@ class aligned_resource_adaptor final : public device_memory_resource { if (this == &other) { return true; } auto cast = dynamic_cast const*>(&other); return cast != nullptr && upstream_->is_equal(*cast->get_upstream()) && - allocation_alignment_ == cast->allocation_alignment_ && - alignment_threshold_ == cast->alignment_threshold_; + alignment_ == cast->alignment_ && alignment_threshold_ == cast->alignment_threshold_; } /** @@ -210,14 +203,13 @@ class aligned_resource_adaptor final : public device_memory_resource { */ std::size_t upstream_allocation_size(std::size_t bytes) const { - auto const aligned_size = - rmm::detail::align_up(bytes, rmm::detail::alignment_type{allocation_alignment_}); - return aligned_size + allocation_alignment_ - rmm::detail::CUDA_ALLOCATION_ALIGNMENT; + auto const aligned_size = rmm::detail::align_up(bytes, alignment_); + return aligned_size + alignment_ - rmm::detail::CUDA_ALLOCATION_ALIGNMENT; } Upstream* upstream_; ///< The upstream resource used for satisfying allocation requests std::unordered_map pointers_; ///< Map of aligned pointers to upstream pointers. - std::size_t allocation_alignment_; ///< The size used for allocation alignment + std::size_t alignment_; ///< The size used for allocation alignment std::size_t alignment_threshold_; ///< The size above which allocations should be aligned mutable std::mutex mtx_; ///< Mutex for exclusive lock. }; diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp index 6105db207..e0e97b86d 100644 --- a/include/rmm/mr/device/device_memory_resource.hpp +++ b/include/rmm/mr/device/device_memory_resource.hpp @@ -179,7 +179,7 @@ class device_memory_resource { private: // All allocations are padded to a multiple of allocation_size_alignment bytes. - static constexpr auto allocation_size_alignment = rmm::detail::alignment_type{8}; + static constexpr auto allocation_size_alignment = std::size_t{8}; /** * @brief Allocates memory of size at least \p bytes. diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp index 6028e0ead..694450798 100644 --- a/include/rmm/mr/host/new_delete_resource.hpp +++ b/include/rmm/mr/host/new_delete_resource.hpp @@ -22,8 +22,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /**---------------------------------------------------------------------------* * @brief A `host_memory_resource` that uses the global `operator new` and @@ -32,7 +31,7 @@ namespace mr { class new_delete_resource final : public host_memory_resource { public: new_delete_resource() = default; - ~new_delete_resource() = default; + ~new_delete_resource() override = default; new_delete_resource(new_delete_resource const&) = default; new_delete_resource(new_delete_resource&&) = default; new_delete_resource& operator=(new_delete_resource const&) = default; @@ -56,12 +55,11 @@ class new_delete_resource final : public host_memory_resource { std::size_t alignment = detail::RMM_DEFAULT_HOST_ALIGNMENT) override { // If the requested alignment isn't supported, use default - auto align = (detail::is_supported_alignment(rmm::detail::alignment_type{alignment})) - ? rmm::detail::alignment_type{alignment} - : detail::RMM_DEFAULT_HOST_ALIGNMENT; + alignment = + (detail::is_supported_alignment(alignment)) ? alignment : detail::RMM_DEFAULT_HOST_ALIGNMENT; return detail::aligned_allocate( - bytes, align, [](std::size_t size) { return ::operator new(size); }); + bytes, alignment, [](std::size_t size) { return ::operator new(size); }); } /**---------------------------------------------------------------------------* @@ -82,12 +80,11 @@ class new_delete_resource final : public host_memory_resource { *`p`. * @param stream Stream on which to perform deallocation *---------------------------------------------------------------------------**/ - void do_deallocate(void* p, + void do_deallocate(void* ptr, std::size_t bytes, std::size_t alignment = detail::RMM_DEFAULT_HOST_ALIGNMENT) override { - detail::aligned_deallocate(p, bytes, alignment, [](void* p) { ::operator delete(p); }); + detail::aligned_deallocate(ptr, bytes, alignment, [](void* ptr) { ::operator delete(ptr); }); } }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr From b149037eb05f7ccf8f512644417be1d6ed6947a7 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 10:48:49 +1000 Subject: [PATCH 34/72] tidy arena_memory_resource --- .../rmm/mr/device/arena_memory_resource.hpp | 62 +++++++++---------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 28376142c..eab0a7bc7 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -25,8 +25,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief A suballocator that emphasizes fragmentation avoidance and scalable concurrency support. @@ -92,9 +91,12 @@ class arena_memory_resource final : public device_memory_resource { { } + ~arena_memory_resource() override = default; // Disable copy (and move) semantics. arena_memory_resource(arena_memory_resource const&) = delete; arena_memory_resource& operator=(arena_memory_resource const&) = delete; + arena_memory_resource(arena_memory_resource&&) = delete; + arena_memory_resource& operator=(arena_memory_resource&&) = delete; /** * @brief Queries whether the resource supports use of non-null CUDA streams for @@ -130,44 +132,44 @@ class arena_memory_resource final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - if (bytes <= 0) return nullptr; + if (bytes <= 0) { return nullptr; } bytes = detail::arena::align_up(bytes); return get_arena(stream).allocate(bytes); } /** - * @brief Deallocate memory pointed to by `p`. + * @brief Deallocate memory pointed to by `ptr`. * - * @param p Pointer to be deallocated. + * @param ptr Pointer to be deallocated. * @param bytes The size in bytes of the allocation. This must be equal to the * value of `bytes` that was passed to the `allocate` call that returned `p`. * @param stream Stream on which to perform deallocation. */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - if (p == nullptr || bytes <= 0) return; + if (ptr == nullptr || bytes <= 0) { return; } bytes = detail::arena::align_up(bytes); #ifdef RMM_POOL_TRACK_ALLOCATIONS - if (!get_arena(stream).deallocate(p, bytes, stream)) { - deallocate_from_other_arena(p, bytes, stream); + if (!get_arena(stream).deallocate(ptr, bytes, stream)) { + deallocate_from_other_arena(ptr, bytes, stream); } #else - get_arena(stream).deallocate(p, bytes, stream); + get_arena(stream).deallocate(ptr, bytes, stream); #endif } #ifdef RMM_POOL_TRACK_ALLOCATIONS /** - * @brief Deallocate memory pointed to by `p` that was allocated in a different arena. + * @brief Deallocate memory pointed to by `ptr` that was allocated in a different arena. * - * @param p Pointer to be deallocated. + * @param ptr Pointer to be deallocated. * @param bytes The size in bytes of the allocation. This must be equal to the * value of `bytes` that was passed to the `allocate` call that returned `p`. * @param stream Stream on which to perform deallocation. */ - void deallocate_from_other_arena(void* p, std::size_t bytes, cuda_stream_view stream) + void deallocate_from_other_arena(void* ptr, std::size_t bytes, cuda_stream_view stream) { stream.synchronize_no_throw(); @@ -178,19 +180,19 @@ class arena_memory_resource final : public device_memory_resource { for (auto& kv : thread_arenas_) { // If the arena does not belong to the current thread, try to deallocate from it, and return // if successful. - if (kv.first != id && kv.second->deallocate(p, bytes)) return; + if (kv.first != id && kv.second->deallocate(ptr, bytes)) return; } } else { for (auto& kv : stream_arenas_) { // If the arena does not belong to the current stream, try to deallocate from it, and return // if successful. - if (stream != kv.first && kv.second.deallocate(p, bytes)) return; + if (stream != kv.first && kv.second.deallocate(ptr, bytes)) return; } } // The thread that originally allocated the block has terminated, deallocate directly in the // global arena. - global_arena_.deallocate({p, bytes}); + global_arena_.deallocate({ptr, bytes}); } #endif @@ -202,11 +204,8 @@ class arena_memory_resource final : public device_memory_resource { */ arena& get_arena(cuda_stream_view stream) { - if (use_per_thread_arena(stream)) { - return get_thread_arena(); - } else { - return get_stream_arena(stream); - } + if (use_per_thread_arena(stream)) { return get_thread_arena(); } + return get_stream_arena(stream); } /** @@ -216,18 +215,18 @@ class arena_memory_resource final : public device_memory_resource { */ arena& get_thread_arena() { - auto const id = std::this_thread::get_id(); + auto const thread_id = std::this_thread::get_id(); { read_lock lock(mtx_); - auto const it = thread_arenas_.find(id); - if (it != thread_arenas_.end()) { return *it->second; } + auto const iter = thread_arenas_.find(thread_id); + if (iter != thread_arenas_.end()) { return *iter->second; } } { write_lock lock(mtx_); - auto a = std::make_shared(global_arena_); - thread_arenas_.emplace(id, a); - thread_local detail::arena::arena_cleaner cleaner{a}; - return *a; + auto thread_arena = std::make_shared(global_arena_); + thread_arenas_.emplace(thread_id, thread_arena); + thread_local detail::arena::arena_cleaner cleaner{thread_arena}; + return *thread_arena; } } @@ -241,8 +240,8 @@ class arena_memory_resource final : public device_memory_resource { RMM_LOGGING_ASSERT(!use_per_thread_arena(stream)); { read_lock lock(mtx_); - auto const it = stream_arenas_.find(stream.value()); - if (it != stream_arenas_.end()) { return it->second; } + auto const iter = stream_arenas_.find(stream.value()); + if (iter != stream_arenas_.end()) { return iter->second; } } { write_lock lock(mtx_); @@ -285,5 +284,4 @@ class arena_memory_resource final : public device_memory_resource { mutable std::shared_timed_mutex mtx_; }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr From 2d410e33075d6b515999b46026ceb5f9509e408e Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 10:56:26 +1000 Subject: [PATCH 35/72] tidy binning_memory_resource --- .../rmm/mr/device/binning_memory_resource.hpp | 40 +++++++++---------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/include/rmm/mr/device/binning_memory_resource.hpp b/include/rmm/mr/device/binning_memory_resource.hpp index 7b0d9f48f..46a7e204d 100644 --- a/include/rmm/mr/device/binning_memory_resource.hpp +++ b/include/rmm/mr/device/binning_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,8 +27,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief Allocates memory from upstream resources associated with bin sizes. @@ -77,15 +76,16 @@ class binning_memory_resource final : public device_memory_resource { return upstream_resource; }()} { - for (auto i = min_size_exponent; i <= max_size_exponent; i++) + for (auto i = min_size_exponent; i <= max_size_exponent; i++) { add_bin(1 << i); + } } /** * @brief Destroy the binning_memory_resource and free all memory allocated from the upstream * resource. */ - ~binning_memory_resource() = default; + ~binning_memory_resource() override = default; binning_memory_resource() = delete; binning_memory_resource(binning_memory_resource const&) = delete; @@ -99,14 +99,14 @@ class binning_memory_resource final : public device_memory_resource { * * @returns true */ - bool supports_streams() const noexcept override { return true; } + [[nodiscard]] bool supports_streams() const noexcept override { return true; } /** * @brief Query whether the resource supports the get_mem_info API. * * @return bool true if the resource supports get_mem_info, false otherwise. */ - bool supports_get_mem_info() const noexcept override { return false; } + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } /** * @brief Get the upstream memory_resource object. @@ -136,15 +136,13 @@ class binning_memory_resource final : public device_memory_resource { allocation_size = rmm::detail::align_up(allocation_size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - if (nullptr != bin_resource) + if (nullptr != bin_resource) { resource_bins_.insert({allocation_size, bin_resource}); - else { - // If the bin already exists, do nothing. - if (resource_bins_.count(allocation_size) == 0) { - owned_bin_resources_.push_back( - std::make_unique>(upstream_mr_, allocation_size)); - resource_bins_.insert({allocation_size, owned_bin_resources_.back().get()}); - } + } else if (resource_bins_.count(allocation_size) == 0) { // do nothing if bin already exists + + owned_bin_resources_.push_back( + std::make_unique>(upstream_mr_, allocation_size)); + resource_bins_.insert({allocation_size, owned_bin_resources_.back().get()}); } } @@ -175,7 +173,7 @@ class binning_memory_resource final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - if (bytes <= 0) return nullptr; + if (bytes <= 0) { return nullptr; } return get_resource(bytes)->allocate(bytes, stream); } @@ -189,10 +187,10 @@ class binning_memory_resource final : public device_memory_resource { * value of `bytes` that was passed to the `allocate` call that returned `p`. * @param stream Stream on which to perform deallocation */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { auto res = get_resource(bytes); - if (res != nullptr) res->deallocate(p, bytes, stream); + if (res != nullptr) { res->deallocate(ptr, bytes, stream); } } /** @@ -203,7 +201,8 @@ class binning_memory_resource final : public device_memory_resource { * @param stream the stream being executed on * @return std::pair with available and free memory for resource */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + [[nodiscard]] std::pair do_get_mem_info( + cuda_stream_view stream) const override { return std::make_pair(0, 0); } @@ -215,5 +214,4 @@ class binning_memory_resource final : public device_memory_resource { std::map resource_bins_; }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr From 1811fe3b82f7bbcd65319ae82fa449e4b46f1ba9 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 11:04:14 +1000 Subject: [PATCH 36/72] tidy fixed_size_mr --- .../mr/device/fixed_size_memory_resource.hpp | 61 ++++++++++--------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/include/rmm/mr/device/fixed_size_memory_resource.hpp b/include/rmm/mr/device/fixed_size_memory_resource.hpp index fb87691e8..6ff02bcc3 100644 --- a/include/rmm/mr/device/fixed_size_memory_resource.hpp +++ b/include/rmm/mr/device/fixed_size_memory_resource.hpp @@ -33,9 +33,7 @@ #include #include -namespace rmm { - -namespace mr { +namespace rmm::mr { /** * @brief A `device_memory_resource` which allocates memory blocks of a single fixed size. @@ -97,14 +95,14 @@ class fixed_size_memory_resource * * @returns true */ - bool supports_streams() const noexcept override { return true; } + [[nodiscard]] bool supports_streams() const noexcept override { return true; } /** * @brief Query whether the resource supports the get_mem_info API. * * @return bool true if the resource supports get_mem_info, false otherwise. */ - bool supports_get_mem_info() const noexcept override { return false; } + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } /** * @brief Get the upstream memory_resource object. @@ -118,7 +116,7 @@ class fixed_size_memory_resource * * @return std::size_t size in bytes of allocated blocks. */ - std::size_t get_block_size() const noexcept { return block_size_; } + [[nodiscard]] std::size_t get_block_size() const noexcept { return block_size_; } protected: using free_list = detail::fixed_size_free_list; @@ -133,7 +131,7 @@ class fixed_size_memory_resource * @return std::size_t The (fixed) maximum size of a single allocation supported by this memory * resource */ - std::size_t get_maximum_allocation_size() const { return get_block_size(); } + [[nodiscard]] std::size_t get_maximum_allocation_size() const { return get_block_size(); } /** * @brief Allocate a block from upstream to supply the suballocation pool. @@ -160,49 +158,53 @@ class fixed_size_memory_resource */ free_list blocks_from_upstream(cuda_stream_view stream) { - void* p = upstream_mr_->allocate(upstream_chunk_size_, stream); - block_type b{p}; - upstream_blocks_.push_back(b); + void* ptr = upstream_mr_->allocate(upstream_chunk_size_, stream); + block_type block{ptr}; + upstream_blocks_.push_back(block); auto num_blocks = upstream_chunk_size_ / block_size_; - auto g = [p, this](int i) { return block_type{static_cast(p) + i * block_size_}; }; - auto first = thrust::make_transform_iterator(thrust::make_counting_iterator(std::size_t{0}), g); + auto block_gen = [ptr, this](int index) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return block_type{static_cast(ptr) + index * block_size_}; + }; + auto first = + thrust::make_transform_iterator(thrust::make_counting_iterator(std::size_t{0}), block_gen); return free_list(first, first + num_blocks); } /** - * @brief Splits block `b` if necessary to return a pointer to memory of `size` bytes. + * @brief Splits block if necessary to return a pointer to memory of `size` bytes. * * If the block is split, the remainder is returned to the pool. * - * @param b The block to allocate from. + * @param block The block to allocate from. * @param size The size in bytes of the requested allocation. * @param stream_event The stream and associated event on which the allocation will be used. * @return A pair comprising the allocated pointer and any unallocated remainder of the input * block. */ - split_block allocate_from_block(block_type const& b, std::size_t size) + split_block allocate_from_block(block_type const& block, std::size_t size) { - return {b, block_type{nullptr}}; + return {block, block_type{nullptr}}; } /** - * @brief Finds, frees and returns the block associated with pointer `p`. + * @brief Finds, frees and returns the block associated with pointer. * - * @param p The pointer to the memory to free. + * @param ptr The pointer to the memory to free. * @param size The size of the memory to free. Must be equal to the original allocation size. * @param stream The stream-event pair for the stream on which the memory was last used. * @return The (now freed) block associated with `p`. The caller is expected to return the block * to the pool. */ - block_type free_block(void* p, std::size_t size) noexcept + block_type free_block(void* ptr, std::size_t size) noexcept { // Deallocating a fixed-size block just inserts it in the free list, which is // handled by the parent class RMM_LOGGING_ASSERT(rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT) <= block_size_); - return block_type{p}; + return block_type{ptr}; } /** @@ -213,7 +215,8 @@ class fixed_size_memory_resource * @param stream the stream being executed on * @return std::pair with available and free memory for resource */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + [[nodiscard]] std::pair do_get_mem_info( + cuda_stream_view stream) const override { return std::make_pair(0, 0); } @@ -226,8 +229,9 @@ class fixed_size_memory_resource { lock_guard lock(this->get_mutex()); - for (auto b : upstream_blocks_) - upstream_mr_->deallocate(b.pointer(), upstream_chunk_size_); + for (auto block : upstream_blocks_) { + upstream_mr_->deallocate(block.pointer(), upstream_chunk_size_); + } upstream_blocks_.clear(); } @@ -235,15 +239,14 @@ class fixed_size_memory_resource { lock_guard lock(this->get_mutex()); - std::size_t free, total; - std::tie(free, total) = upstream_mr_->get_mem_info(0); + auto const [free, total] = upstream_mr_->get_mem_info(0); std::cout << "GPU free memory: " << free << " total: " << total << "\n"; std::cout << "upstream_blocks: " << upstream_blocks_.size() << "\n"; std::size_t upstream_total{0}; - for (auto h : upstream_blocks_) { - h.print(); + for (auto blocks : upstream_blocks_) { + blocks.print(); upstream_total += upstream_chunk_size_; } std::cout << "total upstream: " << upstream_total << " B\n"; @@ -265,6 +268,7 @@ class fixed_size_memory_resource : std::make_pair(block_size_, blocks.size() * block_size_); } + private: Upstream* upstream_mr_; // The resource from which to allocate new blocks std::size_t const block_size_; // size of blocks this MR allocates @@ -274,5 +278,4 @@ class fixed_size_memory_resource std::vector upstream_blocks_; }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr From d23681035934ef8d3815a840546dde00bd0b66e7 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 11:15:51 +1000 Subject: [PATCH 37/72] tidy limiting_resource_adaptor --- .../mr/device/limiting_resource_adaptor.hpp | 79 +++++++++---------- 1 file changed, 36 insertions(+), 43 deletions(-) diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp index 5002962d5..810228715 100644 --- a/include/rmm/mr/device/limiting_resource_adaptor.hpp +++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp @@ -21,8 +21,7 @@ #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief Resource that uses `Upstream` to allocate memory and limits the total * allocations possible. @@ -47,24 +46,23 @@ class limiting_resource_adaptor final : public device_memory_resource { * @param upstream The resource used for allocating/deallocating device memory * @param allocation_limit Maximum memory allowed for this allocator. */ - limiting_resource_adaptor( - Upstream* upstream, - std::size_t allocation_limit, - std::size_t allocation_alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT) + limiting_resource_adaptor(Upstream* upstream, + std::size_t allocation_limit, + std::size_t alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT) : allocation_limit_{allocation_limit}, allocated_bytes_(0), - allocation_alignment_(allocation_alignment), + alignment_(alignment), upstream_{upstream} { RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); } limiting_resource_adaptor() = delete; - ~limiting_resource_adaptor() = default; + ~limiting_resource_adaptor() override = default; limiting_resource_adaptor(limiting_resource_adaptor const&) = delete; - limiting_resource_adaptor(limiting_resource_adaptor&&) = default; limiting_resource_adaptor& operator=(limiting_resource_adaptor const&) = delete; - limiting_resource_adaptor& operator=(limiting_resource_adaptor&&) = default; + limiting_resource_adaptor(limiting_resource_adaptor&&) noexcept = default; + limiting_resource_adaptor& operator=(limiting_resource_adaptor&&) noexcept = default; /** * @brief Return pointer to the upstream resource. @@ -79,14 +77,17 @@ class limiting_resource_adaptor final : public device_memory_resource { * @return true The upstream resource supports streams * @return false The upstream resource does not support streams. */ - bool supports_streams() const noexcept override { return upstream_->supports_streams(); } + [[nodiscard]] bool supports_streams() const noexcept override + { + return upstream_->supports_streams(); + } /** * @brief Query whether the resource supports the get_mem_info API. * * @return bool true if the upstream resource supports get_mem_info, false otherwise. */ - bool supports_get_mem_info() const noexcept override + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return upstream_->supports_get_mem_info(); } @@ -100,7 +101,7 @@ class limiting_resource_adaptor final : public device_memory_resource { * @return std::size_t number of bytes that have been allocated through this * allocator. */ - std::size_t get_allocated_bytes() const { return allocated_bytes_; } + [[nodiscard]] std::size_t get_allocated_bytes() const { return allocated_bytes_; } /** * @brief Query the maximum number of bytes that this allocator is allowed @@ -109,7 +110,7 @@ class limiting_resource_adaptor final : public device_memory_resource { * * @return std::size_t max number of bytes allowed for this allocator */ - std::size_t get_allocation_limit() const { return allocation_limit_; } + [[nodiscard]] std::size_t get_allocation_limit() const { return allocation_limit_; } private: /** @@ -127,32 +128,30 @@ class limiting_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - void* p = nullptr; + std::size_t proposed_size = rmm::detail::align_up(bytes, alignment_); + RMM_EXPECTS(proposed_size + allocated_bytes_ <= allocation_limit_, + rmm::bad_alloc, + "Exceeded memory limit"); - std::size_t proposed_size = rmm::detail::align_up(bytes, allocation_alignment_); - if (proposed_size + allocated_bytes_ <= allocation_limit_) { - p = upstream_->allocate(bytes, stream); - allocated_bytes_ += proposed_size; - } else { - throw rmm::bad_alloc{"Exceeded memory limit"}; - } + auto* const ptr = upstream_->allocate(bytes, stream); + allocated_bytes_ += proposed_size; - return p; + return ptr; } /** - * @brief Free allocation of size `bytes` pointed to by `p` + * @brief Free allocation of size `bytes` pointed to by `ptr` * * @throws Nothing. * - * @param p Pointer to be deallocated + * @param ptr Pointer to be deallocated * @param bytes Size of the allocation * @param stream Stream on which to perform the deallocation */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - std::size_t allocated_size = rmm::detail::align_up(bytes, allocation_alignment_); - upstream_->deallocate(p, bytes, stream); + std::size_t allocated_size = rmm::detail::align_up(bytes, alignment_); + upstream_->deallocate(ptr, bytes, stream); allocated_bytes_ -= allocated_size; } @@ -165,18 +164,12 @@ class limiting_resource_adaptor final : public device_memory_resource { * @return true If the two resources are equivalent * @return false If the two resources are not equal */ - bool do_is_equal(device_memory_resource const& other) const noexcept override + [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { - if (this == &other) - return true; - else { - limiting_resource_adaptor const* cast = - dynamic_cast const*>(&other); - if (cast != nullptr) - return upstream_->is_equal(*cast->get_upstream()); - else - return upstream_->is_equal(other); - } + if (this == &other) { return true; } + auto const* cast = dynamic_cast const*>(&other); + if (cast != nullptr) { return upstream_->is_equal(*cast->get_upstream()); } + return upstream_->is_equal(other); } /** @@ -187,7 +180,8 @@ class limiting_resource_adaptor final : public device_memory_resource { * @param stream Stream on which to get the mem info. * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + [[nodiscard]] std::pair do_get_mem_info( + cuda_stream_view stream) const override { return {allocation_limit_ - allocated_bytes_, allocation_limit_}; } @@ -199,7 +193,7 @@ class limiting_resource_adaptor final : public device_memory_resource { std::atomic allocated_bytes_; // todo: should be some way to ask the upstream... - std::size_t allocation_alignment_; + std::size_t alignment_; Upstream* upstream_; ///< The upstream resource used for satisfying ///< allocation requests @@ -220,5 +214,4 @@ limiting_resource_adaptor make_limiting_adaptor(Upstream* upstream, return limiting_resource_adaptor{upstream, allocation_limit}; } -} // namespace mr -} // namespace rmm +} // namespace rmm::mr From 5c21ec4a11a441f6041b7bc644d1e51a5927eb79 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 11:20:35 +1000 Subject: [PATCH 38/72] tidy logging_resource_adaptr --- .../mr/device/logging_resource_adaptor.hpp | 61 +++++++++---------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp index 1148afa88..0bb707a6c 100644 --- a/include/rmm/mr/device/logging_resource_adaptor.hpp +++ b/include/rmm/mr/device/logging_resource_adaptor.hpp @@ -28,8 +28,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief Resource that uses `Upstream` to allocate memory and logs information * about the requested allocation/deallocations. @@ -113,11 +112,11 @@ class logging_resource_adaptor final : public device_memory_resource { } logging_resource_adaptor() = delete; - ~logging_resource_adaptor() = default; + ~logging_resource_adaptor() override = default; logging_resource_adaptor(logging_resource_adaptor const&) = delete; - logging_resource_adaptor(logging_resource_adaptor&&) = default; logging_resource_adaptor& operator=(logging_resource_adaptor const&) = delete; - logging_resource_adaptor& operator=(logging_resource_adaptor&&) = default; + logging_resource_adaptor(logging_resource_adaptor&&) noexcept = default; + logging_resource_adaptor& operator=(logging_resource_adaptor&&) noexcept = default; /** * @brief Return pointer to the upstream resource. @@ -132,14 +131,17 @@ class logging_resource_adaptor final : public device_memory_resource { * @return true The upstream resource supports streams * @return false The upstream resource does not support streams. */ - bool supports_streams() const noexcept override { return upstream_->supports_streams(); } + [[nodiscard]] bool supports_streams() const noexcept override + { + return upstream_->supports_streams(); + } /** * @brief Query whether the resource supports the get_mem_info API. * * @return bool true if the upstream resource supports get_mem_info, false otherwise. */ - bool supports_get_mem_info() const noexcept override + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return upstream_->supports_get_mem_info(); } @@ -154,7 +156,10 @@ class logging_resource_adaptor final : public device_memory_resource { * * @return CSV formatted header string of column names */ - std::string header() const { return std::string{"Thread,Time,Action,Pointer,Size,Stream"}; } + [[nodiscard]] std::string header() const + { + return std::string{"Thread,Time,Action,Pointer,Size,Stream"}; + } private: // make_logging_adaptor needs access to private get_default_filename @@ -172,7 +177,7 @@ class logging_resource_adaptor final : public device_memory_resource { */ static std::string get_default_filename() { - auto filename = std::getenv("RMM_LOG_FILE"); + auto* filename = std::getenv("RMM_LOG_FILE"); RMM_EXPECTS(filename != nullptr, "RMM logging requested without an explicit file name, but RMM_LOG_FILE is unset"); return std::string{filename}; @@ -210,13 +215,13 @@ class logging_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - auto const p = upstream_->allocate(bytes, stream); - logger_->info("allocate,{},{},{}", p, bytes, fmt::ptr(stream.value())); - return p; + auto const ptr = upstream_->allocate(bytes, stream); + logger_->info("allocate,{},{},{}", ptr, bytes, fmt::ptr(stream.value())); + return ptr; } /** - * @brief Free allocation of size `bytes` pointed to by `p` and log the + * @brief Free allocation of size `bytes` pointed to by `ptr` and log the * deallocation. * * Every invocation of `logging_resource_adaptor::do_deallocate` will write @@ -227,14 +232,14 @@ class logging_resource_adaptor final : public device_memory_resource { * * @throws Nothing. * - * @param p Pointer to be deallocated + * @param ptr Pointer to be deallocated * @param bytes Size of the allocation * @param stream Stream on which to perform the deallocation */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - logger_->info("free,{},{},{}", p, bytes, fmt::ptr(stream.value())); - upstream_->deallocate(p, bytes, stream); + logger_->info("free,{},{},{}", ptr, bytes, fmt::ptr(stream.value())); + upstream_->deallocate(ptr, bytes, stream); } /** @@ -246,18 +251,12 @@ class logging_resource_adaptor final : public device_memory_resource { * @return true If the two resources are equivalent * @return false If the two resources are not equal */ - bool do_is_equal(device_memory_resource const& other) const noexcept override + [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { - if (this == &other) - return true; - else { - logging_resource_adaptor const* cast = - dynamic_cast const*>(&other); - if (cast != nullptr) - return upstream_->is_equal(*cast->get_upstream()); - else - return upstream_->is_equal(other); - } + if (this == &other) { return true; } + auto const* cast = dynamic_cast const*>(&other); + if (cast != nullptr) { return upstream_->is_equal(*cast->get_upstream()); } + return upstream_->is_equal(other); } /** @@ -268,7 +267,8 @@ class logging_resource_adaptor final : public device_memory_resource { * @param stream Stream on which to get the mem info. * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + [[nodiscard]] std::pair do_get_mem_info( + cuda_stream_view stream) const override { return upstream_->get_mem_info(stream); } @@ -313,5 +313,4 @@ logging_resource_adaptor make_logging_adaptor(Upstream* upstream, return logging_resource_adaptor{upstream, stream, auto_flush}; } -} // namespace mr -} // namespace rmm +} // namespace rmm::mr From afc9e7642252f7d162eee6f9ac0e4e612906cd67 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 11:22:55 +1000 Subject: [PATCH 39/72] tidy managed_mr --- .../rmm/mr/device/managed_memory_resource.hpp | 35 +++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/include/rmm/mr/device/managed_memory_resource.hpp b/include/rmm/mr/device/managed_memory_resource.hpp index ebce40bf5..3ed44a528 100644 --- a/include/rmm/mr/device/managed_memory_resource.hpp +++ b/include/rmm/mr/device/managed_memory_resource.hpp @@ -22,8 +22,7 @@ #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief `device_memory_resource` derived class that uses * cudaMallocManaged/Free for allocation/deallocation. @@ -31,7 +30,7 @@ namespace mr { class managed_memory_resource final : public device_memory_resource { public: managed_memory_resource() = default; - ~managed_memory_resource() = default; + ~managed_memory_resource() override = default; managed_memory_resource(managed_memory_resource const&) = default; managed_memory_resource(managed_memory_resource&&) = default; managed_memory_resource& operator=(managed_memory_resource const&) = default; @@ -43,18 +42,18 @@ class managed_memory_resource final : public device_memory_resource { * * @returns false */ - bool supports_streams() const noexcept override { return false; } + [[nodiscard]] bool supports_streams() const noexcept override { return false; } /** * @brief Query whether the resource supports the get_mem_info API. * * @return bool true if the resource supports get_mem_info, false otherwise. */ - bool supports_get_mem_info() const noexcept override { return true; } + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; } private: /** - * @brief Allocates memory of size at least \p bytes using cudaMallocManaged. + * @brief Allocates memory of size at least `bytes` using cudaMallocManaged. * * The returned pointer has at least 256B alignment. * @@ -65,29 +64,29 @@ class managed_memory_resource final : public device_memory_resource { * @param bytes The size, in bytes, of the allocation * @return void* Pointer to the newly allocated memory */ - void* do_allocate(std::size_t bytes, cuda_stream_view) override + void* do_allocate(std::size_t bytes, cuda_stream_view /*stream*/) override { // FIXME: Unlike cudaMalloc, cudaMallocManaged will throw an error for 0 // size allocations. if (bytes == 0) { return nullptr; } - void* p{nullptr}; - RMM_CUDA_TRY(cudaMallocManaged(&p, bytes), rmm::bad_alloc); - return p; + void* ptr{nullptr}; + RMM_CUDA_TRY(cudaMallocManaged(&ptr, bytes), rmm::bad_alloc); + return ptr; } /** - * @brief Deallocate memory pointed to by \p p. + * @brief Deallocate memory pointed to by `ptr`. * * @note Stream argument is ignored. * * @throws Nothing. * - * @param p Pointer to be deallocated + * @param ptr Pointer to be deallocated */ - void do_deallocate(void* p, std::size_t, cuda_stream_view) override + void do_deallocate(void* ptr, std::size_t /*bytes*/, cuda_stream_view /*stream*/) override { - RMM_ASSERT_CUDA_SUCCESS(cudaFree(p)); + RMM_ASSERT_CUDA_SUCCESS(cudaFree(ptr)); } /** @@ -102,7 +101,7 @@ class managed_memory_resource final : public device_memory_resource { * @return true If the two resources are equivalent * @return false If the two resources are not equal */ - bool do_is_equal(device_memory_resource const& other) const noexcept override + [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { return dynamic_cast(&other) != nullptr; } @@ -115,7 +114,8 @@ class managed_memory_resource final : public device_memory_resource { * @param stream to execute on * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + [[nodiscard]] std::pair do_get_mem_info( + cuda_stream_view stream) const override { std::size_t free_size{}; std::size_t total_size{}; @@ -124,5 +124,4 @@ class managed_memory_resource final : public device_memory_resource { } }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr From 86fcffd721298d43ae9abd5ea9733290ed0d289e Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 11:36:37 +1000 Subject: [PATCH 40/72] tidy owning_wrapper and per_device_resource --- include/rmm/mr/device/owning_wrapper.hpp | 54 ++++++++++--------- include/rmm/mr/device/per_device_resource.hpp | 24 ++++----- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/include/rmm/mr/device/owning_wrapper.hpp b/include/rmm/mr/device/owning_wrapper.hpp index 6abe950b0..7bcc3b2fa 100644 --- a/include/rmm/mr/device/owning_wrapper.hpp +++ b/include/rmm/mr/device/owning_wrapper.hpp @@ -22,21 +22,23 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { namespace detail { /// Converts a tuple into a parameter pack template -auto make_resource_impl(UpstreamTuple const& t, std::index_sequence, Args&&... args) +auto make_resource_impl(UpstreamTuple const& upstreams, + std::index_sequence /*indices*/, + Args&&... args) { - return std::make_unique(std::get(t).get()..., std::forward(args)...); + return std::make_unique(std::get(upstreams).get()..., + std::forward(args)...); } template -auto make_resource(std::tuple...> const& t, Args&&... args) +auto make_resource(std::tuple...> const& upstreams, Args&&... args) { return make_resource_impl( - t, std::index_sequence_for{}, std::forward(args)...); + upstreams, std::index_sequence_for{}, std::forward(args)...); } } // namespace detail @@ -128,14 +130,20 @@ class owning_wrapper : public device_memory_resource { /** * @copydoc rmm::mr::device_memory_resource::supports_streams() */ - bool supports_streams() const noexcept override { return wrapped().supports_streams(); } + [[nodiscard]] bool supports_streams() const noexcept override + { + return wrapped().supports_streams(); + } /** * @brief Query whether the resource supports the get_mem_info API. * * @return true if the upstream resource supports get_mem_info, false otherwise. */ - bool supports_get_mem_info() const noexcept override { return wrapped().supports_get_mem_info(); } + [[nodiscard]] bool supports_get_mem_info() const noexcept override + { + return wrapped().supports_get_mem_info(); + } private: /** @@ -156,17 +164,17 @@ class owning_wrapper : public device_memory_resource { /** * @brief Returns an allocation to the wrapped resource. * - * `p` must have been returned from a prior call to `do_allocate(bytes)`. + * `ptr` must have been returned from a prior call to `do_allocate(bytes)`. * * @throws Nothing. * - * @param p Pointer to the allocation to free. + * @param ptr Pointer to the allocation to free. * @param bytes Size of the allocation * @param stream Stream on which to deallocate the memory */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - wrapped().deallocate(p, bytes, stream); + wrapped().deallocate(ptr, bytes, stream); } /** @@ -180,18 +188,12 @@ class owning_wrapper : public device_memory_resource { * @return true If the two resources are equal * @return false If the two resources are not equal */ - bool do_is_equal(device_memory_resource const& other) const noexcept override + [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { - if (this == &other) { - return true; - } else { - auto casted = dynamic_cast const*>(&other); - if (nullptr != casted) { - return wrapped().is_equal(casted->wrapped()); - } else { - return wrapped().is_equal(other); - } - } + if (this == &other) { return true; } + auto casted = dynamic_cast const*>(&other); + if (nullptr != casted) { return wrapped().is_equal(casted->wrapped()); } + return wrapped().is_equal(other); } /** @@ -202,7 +204,8 @@ class owning_wrapper : public device_memory_resource { * @param stream Stream on which to get the mem info. * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + [[nodiscard]] std::pair do_get_mem_info( + cuda_stream_view stream) const override { return wrapped().get_mem_info(stream); } @@ -272,5 +275,4 @@ auto make_owning_wrapper(std::shared_ptr upstream, Args&&... args) std::forward(args)...); } -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp index 0f10b7f53..4ddbd874a 100644 --- a/include/rmm/mr/device/per_device_resource.hpp +++ b/include/rmm/mr/device/per_device_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -71,9 +71,7 @@ * @endcode */ -namespace rmm { - -namespace mr { +namespace rmm::mr { namespace detail { @@ -126,13 +124,14 @@ RMM_EXPORT inline auto& get_map() * @param id The id of the target device * @return Pointer to the current `device_memory_resource` for device `id` */ -inline device_memory_resource* get_per_device_resource(cuda_device_id id) +inline device_memory_resource* get_per_device_resource(cuda_device_id device_id) { std::lock_guard lock{detail::map_lock()}; auto& map = detail::get_map(); // If a resource was never set for `id`, set to the initial resource - auto const found = map.find(id.value()); - return (found == map.end()) ? (map[id.value()] = detail::initial_resource()) : found->second; + auto const found = map.find(device_id.value()); + return (found == map.end()) ? (map[device_id.value()] = detail::initial_resource()) + : found->second; } /** @@ -162,15 +161,15 @@ inline device_memory_resource* get_per_device_resource(cuda_device_id id) * for `id` * @return Pointer to the previous memory resource for `id` */ -inline device_memory_resource* set_per_device_resource(cuda_device_id id, +inline device_memory_resource* set_per_device_resource(cuda_device_id device_id, device_memory_resource* new_mr) { std::lock_guard lock{detail::map_lock()}; auto& map = detail::get_map(); - auto const old_itr = map.find(id.value()); + auto const old_itr = map.find(device_id.value()); // If a resource didn't previously exist for `id`, return pointer to initial_resource - auto old_mr = (old_itr == map.end()) ? detail::initial_resource() : old_itr->second; - map[id.value()] = (new_mr == nullptr) ? detail::initial_resource() : new_mr; + auto* old_mr = (old_itr == map.end()) ? detail::initial_resource() : old_itr->second; + map[device_id.value()] = (new_mr == nullptr) ? detail::initial_resource() : new_mr; return old_mr; } @@ -228,5 +227,4 @@ inline device_memory_resource* set_current_device_resource(device_memory_resourc { return set_per_device_resource(rmm::detail::current_device(), new_mr); } -} // namespace mr -} // namespace rmm +} // namespace rmm::mr From b808afd64e2998f170083ab8e8d73de9b2613058 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 11:38:35 +1000 Subject: [PATCH 41/72] tidy polymorphic_allocator --- .../rmm/mr/device/polymorphic_allocator.hpp | 76 +++++++------------ 1 file changed, 28 insertions(+), 48 deletions(-) diff --git a/include/rmm/mr/device/polymorphic_allocator.hpp b/include/rmm/mr/device/polymorphic_allocator.hpp index 4f97cf568..643d1b6fb 100644 --- a/include/rmm/mr/device/polymorphic_allocator.hpp +++ b/include/rmm/mr/device/polymorphic_allocator.hpp @@ -24,8 +24,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief A stream ordered Allocator using a `rmm::mr::device_memory_resource` to satisfy @@ -45,7 +44,6 @@ template class polymorphic_allocator { public: using value_type = T; - /** * @brief Construct a `polymorphic_allocator` using the return value of * `rmm::mr::get_current_device_resource()` as the underlying memory resource. @@ -53,15 +51,6 @@ class polymorphic_allocator { */ polymorphic_allocator() = default; - /** - * @brief Construct a `polymorphic_allocator` using `other.resource()` as the underlying memory - * resource. - * - * @param other The `polymorphic_resource` whose `resource()` will be used as the underlying - * resource of the new `polymorphic_allocator`. - */ - polymorphic_allocator(polymorphic_allocator const& other) = default; - /** * @brief Construct a `polymorphic_allocator` using the provided memory resource. * @@ -84,30 +73,30 @@ class polymorphic_allocator { } /** - * @brief Allocates storage for `n` objects of type `T` using the underlying memory resource. + * @brief Allocates storage for `num` objects of type `T` using the underlying memory resource. * - * @param n The number of objects to allocate storage for + * @param num The number of objects to allocate storage for * @param stream The stream on which to perform the allocation * @return Pointer to the allocated storage */ - value_type* allocate(std::size_t n, cuda_stream_view stream) + value_type* allocate(std::size_t num, cuda_stream_view stream) { - return static_cast(resource()->allocate(n * sizeof(T), stream)); + return static_cast(resource()->allocate(num * sizeof(T), stream)); } /** - * @brief Deallocates storage pointed to by `p`. + * @brief Deallocates storage pointed to by `ptr`. * - * `p` must have been allocated from a `rmm::mr::device_memory_resource` `r` that compares equal + * `ptr` must have been allocated from a `rmm::mr::device_memory_resource` `r` that compares equal * to `*resource()` using `r.allocate(n * sizeof(T))`. * - * @param p Pointer to memory to deallocate - * @param n Number of objects originally allocated + * @param ptr Pointer to memory to deallocate + * @param num Number of objects originally allocated * @param stream Stream on which to perform the deallocation */ - void deallocate(value_type* p, std::size_t n, cuda_stream_view stream) + void deallocate(value_type* ptr, std::size_t num, cuda_stream_view stream) { - resource()->deallocate(p, n * sizeof(T), stream); + resource()->deallocate(ptr, num * sizeof(T), stream); } /** @@ -115,7 +104,7 @@ class polymorphic_allocator { * * @return Pointer to the underlying resource. */ - device_memory_resource* resource() const noexcept { return mr_; } + [[nodiscard]] device_memory_resource* resource() const noexcept { return mr_; } private: device_memory_resource* mr_{ @@ -169,22 +158,14 @@ class stream_allocator_adaptor { * @note: The `stream` must not be destroyed before the `stream_allocator_adaptor`, otherwise * behavior is undefined. * - * @param a The stream ordered allocator to use as the underlying allocator + * @param allocator The stream ordered allocator to use as the underlying allocator * @param stream The stream used with the underlying allocator */ - stream_allocator_adaptor(Allocator const& a, cuda_stream_view stream) : alloc_{a}, stream_{stream} + stream_allocator_adaptor(Allocator const& allocator, cuda_stream_view stream) + : alloc_{allocator}, stream_{stream} { } - /** - * @brief Construct a `stream_allocator_adaptor` using `other.underlying_allocator()` and - * `other.stream()` as the underlying allocator and stream. - * - * @param other The other `stream_allocator_adaptor` whose underlying allocator and stream will be - * copied - */ - stream_allocator_adaptor(stream_allocator_adaptor const& other) = default; - /** * @brief Construct a `stream_allocator_adaptor` using `other.underlying_allocator()` and * `other.stream()` as the underlying allocator and stream. @@ -211,30 +192,30 @@ class stream_allocator_adaptor { }; /** - * @brief Allocates storage for `n` objects of type `T` using the underlying allocator on + * @brief Allocates storage for `num` objects of type `T` using the underlying allocator on * `stream()`. * - * @param n The number of objects to allocate storage for + * @param num The number of objects to allocate storage for * @return Pointer to the allocated storage */ - value_type* allocate(std::size_t n) { return alloc_.allocate(n, stream()); } + value_type* allocate(std::size_t num) { return alloc_.allocate(num, stream()); } /** - * @brief Deallocates storage pointed to by `p` using the underlying allocator on `stream()`. + * @brief Deallocates storage pointed to by `ptr` using the underlying allocator on `stream()`. * - * `p` must have been allocated from by an allocator `a` that compares equal to + * `ptr` must have been allocated from by an allocator `a` that compares equal to * `underlying_allocator()` using `a.allocate(n)`. * - * @param p Pointer to memory to deallocate - * @param n Number of objects originally allocated + * @param ptr Pointer to memory to deallocate + * @param num Number of objects originally allocated */ - void deallocate(value_type* p, std::size_t n) { alloc_.deallocate(p, n, stream()); } + void deallocate(value_type* ptr, std::size_t num) { alloc_.deallocate(ptr, num, stream()); } /** * @brief Returns the underlying stream on which calls to the underlying allocator are made. * */ - cuda_stream_view stream() const noexcept { return stream_; } + [[nodiscard]] cuda_stream_view stream() const noexcept { return stream_; } /** * @brief Returns the underlying stream-ordered allocator @@ -266,14 +247,13 @@ bool operator!=(stream_allocator_adaptor const& lhs, stream_allocator_adaptor * @tparam Allocator Type of the stream-ordered allocator * @param allocator The allocator to use as the underlying allocator of the * `stream_allocator_adaptor` - * @param s The stream on which the `stream_allocator_adaptor` will perform (de)allocations + * @param stream The stream on which the `stream_allocator_adaptor` will perform (de)allocations * @return A `stream_allocator_adaptor` wrapping `allocator` and `s` */ template -auto make_stream_allocator_adaptor(Allocator const& allocator, cuda_stream_view s) +auto make_stream_allocator_adaptor(Allocator const& allocator, cuda_stream_view stream) { - return stream_allocator_adaptor{allocator, s}; + return stream_allocator_adaptor{allocator, stream}; } -} // namespace mr -} // namespace rmm +} // namespace rmm::mr From 9543e3fd720246c5e3901ee97af8174430fa6a95 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 11:53:31 +1000 Subject: [PATCH 42/72] tidy pool_mr --- .../rmm/mr/device/pool_memory_resource.hpp | 118 +++++++++--------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index 7a2a5b9c7..e446a9332 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -41,8 +41,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief A coalescing best-fit suballocator which uses a pool of memory allocated from @@ -114,14 +113,14 @@ class pool_memory_resource final * * @returns bool true. */ - bool supports_streams() const noexcept override { return true; } + [[nodiscard]] bool supports_streams() const noexcept override { return true; } /** * @brief Query whether the resource supports the get_mem_info API. * * @return bool false */ - bool supports_get_mem_info() const noexcept override { return false; } + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } /** * @brief Get the upstream memory_resource object. @@ -145,7 +144,7 @@ class pool_memory_resource final * * @return std::size_t The maximum size of a single allocation supported by this memory resource */ - std::size_t get_maximum_allocation_size() const + [[nodiscard]] std::size_t get_maximum_allocation_size() const { return std::numeric_limits::max(); } @@ -168,12 +167,14 @@ class pool_memory_resource final block_type try_to_expand(std::size_t try_size, std::size_t min_size, cuda_stream_view stream) { while (try_size >= min_size) { - auto b = block_from_upstream(try_size, stream); - if (b.has_value()) { - current_pool_size_ += b.value().size(); - return b.value(); + auto block = block_from_upstream(try_size, stream); + if (block.has_value()) { + current_pool_size_ += block.value().size(); + return block.value(); + } + if (try_size == min_size) { + break; // only try `size` once } - if (try_size == min_size) break; // only try `size` once try_size = std::max(min_size, try_size / 2); } RMM_LOG_ERROR("[A][Stream {}][Upstream {}B][FAILURE maximum pool size exceeded]", @@ -199,15 +200,13 @@ class pool_memory_resource final { auto const try_size = [&]() { if (not initial_size.has_value()) { - std::size_t free{}, total{}; - std::tie(free, total) = (get_upstream()->supports_get_mem_info()) - ? get_upstream()->get_mem_info(cuda_stream_legacy) - : rmm::detail::available_device_memory(); + auto const [free, total] = (get_upstream()->supports_get_mem_info()) + ? get_upstream()->get_mem_info(cuda_stream_legacy) + : rmm::detail::available_device_memory(); return rmm::detail::align_up(std::min(free, total / 2), rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - } else { - return initial_size.value(); } + return initial_size.value(); }(); current_pool_size_ = 0; // try_to_expand will set this if it succeeds @@ -217,8 +216,8 @@ class pool_memory_resource final "Initial pool size exceeds the maximum pool size!"); if (try_size > 0) { - auto const b = try_to_expand(try_size, try_size, cuda_stream_legacy); - this->insert_block(b, cuda_stream_legacy); + auto const block = try_to_expand(try_size, try_size, cuda_stream_legacy); + this->insert_block(block, cuda_stream_legacy); } } @@ -252,7 +251,7 @@ class pool_memory_resource final * @param size The size of the minimum allocation immediately needed * @return std::size_t The computed size to grow the pool. */ - std::size_t size_to_grow(std::size_t size) const + [[nodiscard]] std::size_t size_to_grow(std::size_t size) const { if (maximum_pool_size_.has_value()) { auto const unaligned_remaining = maximum_pool_size_.value() - pool_size(); @@ -260,8 +259,8 @@ class pool_memory_resource final rmm::detail::align_up(unaligned_remaining, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); auto const aligned_size = rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); return (aligned_size <= remaining) ? std::max(aligned_size, remaining / 2) : 0; - } else - return std::max(size, pool_size()); + } + return std::max(size, pool_size()); }; /** @@ -275,64 +274,66 @@ class pool_memory_resource final { RMM_LOG_DEBUG("[A][Stream {}][Upstream {}B]", fmt::ptr(stream.value()), size); - if (size == 0) return {}; + if (size == 0) { return {}; } try { - void* p = upstream_mr_->allocate(size, stream); + void* ptr = upstream_mr_->allocate(size, stream); return thrust::optional{ - *upstream_blocks_.emplace(reinterpret_cast(p), size, true).first}; + *upstream_blocks_.emplace(static_cast(ptr), size, true).first}; } catch (std::exception const& e) { return thrust::nullopt; } } /** - * @brief Splits block `b` if necessary to return a pointer to memory of `size` bytes. + * @brief Splits `block` if necessary to return a pointer to memory of `size` bytes. * * If the block is split, the remainder is returned to the pool. * - * @param b The block to allocate from. + * @param block The block to allocate from. * @param size The size in bytes of the requested allocation. * @param stream_event The stream and associated event on which the allocation will be used. * @return A pair comprising the allocated pointer and any unallocated remainder of the input * block. */ - split_block allocate_from_block(block_type const& b, std::size_t size) + split_block allocate_from_block(block_type const& block, std::size_t size) { - block_type const alloc{b.pointer(), size, b.is_head()}; + block_type const alloc{block.pointer(), size, block.is_head()}; #ifdef RMM_POOL_TRACK_ALLOCATIONS allocated_blocks_.insert(alloc); #endif - auto rest = - (b.size() > size) ? block_type{b.pointer() + size, b.size() - size, false} : block_type{}; + auto rest = (block.size() > size) + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + ? block_type{block.pointer() + size, block.size() - size, false} + : block_type{}; return {alloc, rest}; } /** - * @brief Finds, frees and returns the block associated with pointer `p`. + * @brief Finds, frees and returns the block associated with pointer `ptr`. * - * @param p The pointer to the memory to free. + * @param ptr The pointer to the memory to free. * @param size The size of the memory to free. Must be equal to the original allocation size. * @param stream The stream-event pair for the stream on which the memory was last used. * @return The (now freed) block associated with `p`. The caller is expected to return the block * to the pool. */ - block_type free_block(void* p, std::size_t size) noexcept + block_type free_block(void* ptr, std::size_t size) noexcept { #ifdef RMM_POOL_TRACK_ALLOCATIONS - if (p == nullptr) return block_type{}; - auto const i = allocated_blocks_.find(static_cast(p)); - RMM_LOGGING_ASSERT(i != allocated_blocks_.end()); + if (ptr == nullptr) return block_type{}; + auto const iter = allocated_blocks_.find(static_cast(ptr)); + RMM_LOGGING_ASSERT(iter != allocated_blocks_.end()); - auto block = *i; + auto block = *iter; RMM_LOGGING_ASSERT(block.size() == rmm::detail::align_up(size, allocation_alignment)); - allocated_blocks_.erase(i); + allocated_blocks_.erase(iter); return block; #else - auto const i = upstream_blocks_.find(static_cast(p)); - return block_type{static_cast(p), size, (i != upstream_blocks_.end())}; + auto const iter = upstream_blocks_.find(static_cast(ptr)); + return block_type{static_cast(ptr), size, (iter != upstream_blocks_.end())}; #endif } @@ -343,7 +344,7 @@ class pool_memory_resource final * * @return std::size_t The total size of the currently allocated pool. */ - std::size_t pool_size() const noexcept { return current_pool_size_; } + [[nodiscard]] std::size_t pool_size() const noexcept { return current_pool_size_; } /** * @brief Free all memory allocated from the upstream memory_resource. @@ -353,8 +354,9 @@ class pool_memory_resource final { lock_guard lock(this->get_mutex()); - for (auto b : upstream_blocks_) - upstream_mr_->deallocate(b.pointer(), b.size()); + for (auto block : upstream_blocks_) { + upstream_mr_->deallocate(block.pointer(), block.size()); + } upstream_blocks_.clear(); #ifdef RMM_POOL_TRACK_ALLOCATIONS allocated_blocks_.clear(); @@ -373,23 +375,22 @@ class pool_memory_resource final { lock_guard lock(this->get_mutex()); - std::size_t free, total; - std::tie(free, total) = upstream_mr_->get_mem_info(0); + auto const [free, total] = upstream_mr_->get_mem_info(0); std::cout << "GPU free memory: " << free << " total: " << total << "\n"; std::cout << "upstream_blocks: " << upstream_blocks_.size() << "\n"; std::size_t upstream_total{0}; - for (auto h : upstream_blocks_) { - h.print(); - upstream_total += h.size(); + for (auto blocks : upstream_blocks_) { + blocks.print(); + upstream_total += blocks.size(); } std::cout << "total upstream: " << upstream_total << " B\n"; #ifdef RMM_POOL_TRACK_ALLOCATIONS std::cout << "allocated_blocks: " << allocated_blocks_.size() << "\n"; - for (auto b : allocated_blocks_) - b.print(); + for (auto block : allocated_blocks_) + block.print(); #endif this->print_free_blocks(); @@ -407,9 +408,9 @@ class pool_memory_resource final { std::size_t largest{}; std::size_t total{}; - std::for_each(blocks.cbegin(), blocks.cend(), [&largest, &total](auto const& b) { - total += b.size(); - largest = std::max(largest, b.size()); + std::for_each(blocks.cbegin(), blocks.cend(), [&largest, &total](auto const& block) { + total += block.size(); + largest = std::max(largest, block.size()); }); return {largest, total}; } @@ -422,14 +423,14 @@ class pool_memory_resource final * @param stream to execute on * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + [[nodiscard]] std::pair do_get_mem_info( + cuda_stream_view stream) const override { - std::size_t free_size{}; - std::size_t total_size{}; // TODO implement this - return std::make_pair(free_size, total_size); + return {0, 0}; } + private: Upstream* upstream_mr_; // The "heap" to allocate the pool from std::size_t current_pool_size_{}; thrust::optional maximum_pool_size_{}; @@ -442,5 +443,4 @@ class pool_memory_resource final std::set> upstream_blocks_; }; // namespace mr -} // namespace mr -} // namespace rmm +} // namespace rmm::mr From e8e1eae80f7e77f96dd4d1552d13fb7e0265d430 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 11:56:22 +1000 Subject: [PATCH 43/72] tidy statistics_resource_adaptor --- .../mr/device/statistics_resource_adaptor.hpp | 45 +++++++++---------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/include/rmm/mr/device/statistics_resource_adaptor.hpp b/include/rmm/mr/device/statistics_resource_adaptor.hpp index bcc0bf10b..298ac8bc1 100644 --- a/include/rmm/mr/device/statistics_resource_adaptor.hpp +++ b/include/rmm/mr/device/statistics_resource_adaptor.hpp @@ -21,8 +21,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief Resource that uses `Upstream` to allocate memory and tracks statistics * on memory allocations. @@ -52,17 +51,17 @@ class statistics_resource_adaptor final : public device_memory_resource { int64_t peak{0}; // Max value of `value` int64_t total{0}; // Sum of all added values - counter& operator+=(int64_t x) + counter& operator+=(int64_t val) { - value += x; - total += x; + value += val; + total += val; peak = std::max(value, peak); return *this; } - counter& operator-=(int64_t x) + counter& operator-=(int64_t val) { - value -= x; + value -= val; return *this; } }; @@ -81,11 +80,11 @@ class statistics_resource_adaptor final : public device_memory_resource { } statistics_resource_adaptor() = delete; - virtual ~statistics_resource_adaptor() = default; + ~statistics_resource_adaptor() override = default; statistics_resource_adaptor(statistics_resource_adaptor const&) = delete; - statistics_resource_adaptor(statistics_resource_adaptor&&) = default; statistics_resource_adaptor& operator=(statistics_resource_adaptor const&) = delete; - statistics_resource_adaptor& operator=(statistics_resource_adaptor&&) = default; + statistics_resource_adaptor(statistics_resource_adaptor&&) noexcept = default; + statistics_resource_adaptor& operator=(statistics_resource_adaptor&&) noexcept = default; /** * @brief Return pointer to the upstream resource. @@ -156,7 +155,7 @@ class statistics_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - void* p = upstream_->allocate(bytes, stream); + void* ptr = upstream_->allocate(bytes, stream); // increment the stats { @@ -167,21 +166,21 @@ class statistics_resource_adaptor final : public device_memory_resource { allocations_ += 1; } - return p; + return ptr; } /** - * @brief Free allocation of size `bytes` pointed to by `p` + * @brief Free allocation of size `bytes` pointed to by `ptr` * * @throws Nothing. * - * @param p Pointer to be deallocated + * @param ptr Pointer to be deallocated * @param bytes Size of the allocation * @param stream Stream on which to perform the deallocation */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - upstream_->deallocate(p, bytes, stream); + upstream_->deallocate(ptr, bytes, stream); { write_lock_t lock(mtx_); @@ -203,13 +202,10 @@ class statistics_resource_adaptor final : public device_memory_resource { */ bool do_is_equal(device_memory_resource const& other) const noexcept override { - if (this == &other) - return true; - else { - auto cast = dynamic_cast const*>(&other); - return cast != nullptr ? upstream_->is_equal(*cast->get_upstream()) - : upstream_->is_equal(other); - } + if (this == &other) { return true; } + auto cast = dynamic_cast const*>(&other); + return cast != nullptr ? upstream_->is_equal(*cast->get_upstream()) + : upstream_->is_equal(other); } /** @@ -244,5 +240,4 @@ statistics_resource_adaptor make_statistics_adaptor(Upstream* upstream return statistics_resource_adaptor{upstream}; } -} // namespace mr -} // namespace rmm +} // namespace rmm::mr From b26b9aaa61d479f36b83f5387f0b540ccf88b6d4 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 12:00:37 +1000 Subject: [PATCH 44/72] tidy thread_safe_resource_adaptor --- .../device/thread_safe_resource_adaptor.hpp | 30 ++++++++----------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp index 2675a4df2..b1f898c5e 100644 --- a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp +++ b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp @@ -22,8 +22,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief Resource that adapts `Upstream` memory resource adaptor to be thread safe. * @@ -54,7 +53,7 @@ class thread_safe_resource_adaptor final : public device_memory_resource { } thread_safe_resource_adaptor() = delete; - ~thread_safe_resource_adaptor() = default; + ~thread_safe_resource_adaptor() override = default; thread_safe_resource_adaptor(thread_safe_resource_adaptor const&) = delete; thread_safe_resource_adaptor(thread_safe_resource_adaptor&&) = delete; thread_safe_resource_adaptor& operator=(thread_safe_resource_adaptor const&) = delete; @@ -98,19 +97,18 @@ class thread_safe_resource_adaptor final : public device_memory_resource { } /** - * @brief Free allocation of size `bytes` pointed to to by `p` and log the - * deallocation. + * @brief Free allocation of size `bytes` pointed to to by `ptr`.s * * @throws Nothing. * - * @param p Pointer to be deallocated + * @param ptr Pointer to be deallocated * @param bytes Size of the allocation * @param stream Stream on which to perform the deallocation */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { lock_t lock(mtx); - upstream_->deallocate(p, bytes, stream); + upstream_->deallocate(ptr, bytes, stream); } /** @@ -124,15 +122,12 @@ class thread_safe_resource_adaptor final : public device_memory_resource { */ bool do_is_equal(device_memory_resource const& other) const noexcept override { - if (this == &other) - return true; - else { - auto thread_safe_other = dynamic_cast const*>(&other); - if (thread_safe_other != nullptr) - return upstream_->is_equal(*thread_safe_other->get_upstream()); - else - return upstream_->is_equal(other); + if (this == &other) { return true; } + auto thread_safe_other = dynamic_cast const*>(&other); + if (thread_safe_other != nullptr) { + return upstream_->is_equal(*thread_safe_other->get_upstream()); } + return upstream_->is_equal(other); } /** @@ -153,5 +148,4 @@ class thread_safe_resource_adaptor final : public device_memory_resource { Upstream* upstream_; ///< The upstream resource used for satisfying allocation requests }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr From 7fb12d2cd6f8bd00b1031a108f7f6875834bb52c Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 12:02:29 +1000 Subject: [PATCH 45/72] tidy thrust_allocator_adaptor --- .../mr/device/thrust_allocator_adaptor.hpp | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/include/rmm/mr/device/thrust_allocator_adaptor.hpp b/include/rmm/mr/device/thrust_allocator_adaptor.hpp index d841304a6..56e910801 100644 --- a/include/rmm/mr/device/thrust_allocator_adaptor.hpp +++ b/include/rmm/mr/device/thrust_allocator_adaptor.hpp @@ -22,8 +22,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief An `allocator` compatible with Thrust containers and algorithms using * a `device_memory_resource` for memory (de)allocation. @@ -91,39 +90,38 @@ class thrust_allocator : public thrust::device_malloc_allocator { /** * @brief Allocate objects of type `T` * - * @param n The number of elements of type `T` to allocate + * @param num The number of elements of type `T` to allocate * @return pointer Pointer to the newly allocated storage */ - pointer allocate(size_type n) + pointer allocate(size_type num) { - return thrust::device_pointer_cast(static_cast(_mr->allocate(n * sizeof(T), _stream))); + return thrust::device_pointer_cast(static_cast(_mr->allocate(num * sizeof(T), _stream))); } /** * @brief Deallocates objects of type `T` * - * @param p Pointer returned by a previous call to `allocate` - * @param n number of elements, *must* be equal to the argument passed to the + * @param ptr Pointer returned by a previous call to `allocate` + * @param num number of elements, *must* be equal to the argument passed to the * prior `allocate` call that produced `p` */ - void deallocate(pointer p, size_type n) + void deallocate(pointer ptr, size_type num) { - return _mr->deallocate(thrust::raw_pointer_cast(p), n * sizeof(T), _stream); + return _mr->deallocate(thrust::raw_pointer_cast(ptr), num * sizeof(T), _stream); } /** * @brief Returns the device memory resource used by this allocator. */ - device_memory_resource* resource() const noexcept { return _mr; } + [[nodiscard]] device_memory_resource* resource() const noexcept { return _mr; } /** * @brief Returns the stream used by this allocator. */ - cuda_stream_view stream() const noexcept { return _stream; } + [[nodiscard]] cuda_stream_view stream() const noexcept { return _stream; } private: cuda_stream_view _stream{}; device_memory_resource* _mr{rmm::mr::get_current_device_resource()}; }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr From 2c28eeea0cc03a66046de0ebabb8cbf5008ddc0e Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 12:05:59 +1000 Subject: [PATCH 46/72] tidy tracking_resource_adaptor --- .../mr/device/tracking_resource_adaptor.hpp | 49 +++++++++---------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp index 1a32a1c44..fbcb44898 100644 --- a/include/rmm/mr/device/tracking_resource_adaptor.hpp +++ b/include/rmm/mr/device/tracking_resource_adaptor.hpp @@ -25,8 +25,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief Resource that uses `Upstream` to allocate memory and tracks allocations. * @@ -83,11 +82,11 @@ class tracking_resource_adaptor final : public device_memory_resource { } tracking_resource_adaptor() = delete; - virtual ~tracking_resource_adaptor() = default; + ~tracking_resource_adaptor() override = default; tracking_resource_adaptor(tracking_resource_adaptor const&) = delete; - tracking_resource_adaptor(tracking_resource_adaptor&&) = default; tracking_resource_adaptor& operator=(tracking_resource_adaptor const&) = delete; - tracking_resource_adaptor& operator=(tracking_resource_adaptor&&) = default; + tracking_resource_adaptor(tracking_resource_adaptor&&) noexcept = default; + tracking_resource_adaptor& operator=(tracking_resource_adaptor&&) noexcept = default; /** * @brief Return pointer to the upstream resource. @@ -154,10 +153,10 @@ class tracking_resource_adaptor final : public device_memory_resource { std::ostringstream oss; if (!allocations_.empty()) { - for (auto const& al : allocations_) { - oss << al.first << ": " << al.second.allocation_size << " B"; - if (al.second.strace != nullptr) { - oss << " : callstack:" << std::endl << *al.second.strace; + for (auto const& alloc : allocations_) { + oss << alloc.first << ": " << alloc.second.allocation_size << " B"; + if (alloc.second.strace != nullptr) { + oss << " : callstack:" << std::endl << *alloc.second.strace; } oss << std::endl; } @@ -193,34 +192,34 @@ class tracking_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - void* p = upstream_->allocate(bytes, stream); + void* ptr = upstream_->allocate(bytes, stream); // track it. { write_lock_t lock(mtx_); - allocations_.emplace(p, allocation_info{bytes, capture_stacks_}); + allocations_.emplace(ptr, allocation_info{bytes, capture_stacks_}); } allocated_bytes_ += bytes; - return p; + return ptr; } /** - * @brief Free allocation of size `bytes` pointed to by `p` + * @brief Free allocation of size `bytes` pointed to by `ptr` * * @throws Nothing. * - * @param p Pointer to be deallocated + * @param ptr Pointer to be deallocated * @param bytes Size of the allocation * @param stream Stream on which to perform the deallocation */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - upstream_->deallocate(p, bytes, stream); + upstream_->deallocate(ptr, bytes, stream); { write_lock_t lock(mtx_); - const auto found = allocations_.find(p); + const auto found = allocations_.find(ptr); // Ensure the allocation is found and the number of bytes match if (found == allocations_.end()) { @@ -229,7 +228,7 @@ class tracking_resource_adaptor final : public device_memory_resource { RMM_LOG_ERROR( "Deallocating a pointer that was not tracked. Ptr: {:p} [{}B], Current Num. Allocations: " "{}", - fmt::ptr(p), + fmt::ptr(ptr), bytes, this->allocations_.size()); } else { @@ -261,13 +260,10 @@ class tracking_resource_adaptor final : public device_memory_resource { */ bool do_is_equal(device_memory_resource const& other) const noexcept override { - if (this == &other) - return true; - else { - auto cast = dynamic_cast const*>(&other); - return cast != nullptr ? upstream_->is_equal(*cast->get_upstream()) - : upstream_->is_equal(other); - } + if (this == &other) { return true; } + auto cast = dynamic_cast const*>(&other); + return cast != nullptr ? upstream_->is_equal(*cast->get_upstream()) + : upstream_->is_equal(other); } /** @@ -303,5 +299,4 @@ tracking_resource_adaptor make_tracking_adaptor(Upstream* upstream) return tracking_resource_adaptor{upstream}; } -} // namespace mr -} // namespace rmm +} // namespace rmm::mr From c0c71a956de79b62e67dd4f20f26e59a5c09abcc Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 12:19:43 +1000 Subject: [PATCH 47/72] tidy host_mrs --- .../rmm/mr/device/device_memory_resource.hpp | 4 +- include/rmm/mr/host/host_memory_resource.hpp | 133 +++++++++--------- include/rmm/mr/host/new_delete_resource.hpp | 47 +++---- .../rmm/mr/host/pinned_memory_resource.hpp | 55 ++++---- 4 files changed, 113 insertions(+), 126 deletions(-) diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp index e0e97b86d..52aa8c79f 100644 --- a/include/rmm/mr/device/device_memory_resource.hpp +++ b/include/rmm/mr/device/device_memory_resource.hpp @@ -85,8 +85,8 @@ class device_memory_resource { virtual ~device_memory_resource() = default; device_memory_resource(device_memory_resource const&) = default; device_memory_resource& operator=(device_memory_resource const&) = default; - device_memory_resource(device_memory_resource&&) = default; - device_memory_resource& operator=(device_memory_resource&&) = default; + device_memory_resource(device_memory_resource&&) noexcept = default; + device_memory_resource& operator=(device_memory_resource&&) noexcept = default; /** * @brief Allocates memory of size at least \p bytes. diff --git a/include/rmm/mr/host/host_memory_resource.hpp b/include/rmm/mr/host/host_memory_resource.hpp index b799b46da..4edffc860 100644 --- a/include/rmm/mr/host/host_memory_resource.hpp +++ b/include/rmm/mr/host/host_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,9 +18,9 @@ #include #include -namespace rmm { -namespace mr { -/**---------------------------------------------------------------------------* +namespace rmm::mr { + +/** * @brief Base class for host memory allocation. * * This is based on `std::pmr::memory_resource`: @@ -43,124 +43,121 @@ namespace mr { * base class' `allocate` function may log every allocation, no matter what * derived class implementation is used. * - *---------------------------------------------------------------------------**/ + */ class host_memory_resource { public: - virtual ~host_memory_resource() = default; + host_memory_resource() = default; + virtual ~host_memory_resource() = default; + host_memory_resource(host_memory_resource const&) = default; + host_memory_resource& operator=(host_memory_resource const&) = default; + host_memory_resource(host_memory_resource&&) noexcept = default; + host_memory_resource& operator=(host_memory_resource&&) noexcept = default; - /**---------------------------------------------------------------------------* + /** * @brief Allocates memory on the host of size at least `bytes` bytes. * - * The returned storage is aligned to the specified `alignment` if supported, - * and to `alignof(std::max_align_t)` otherwise. + * The returned storage is aligned to the specified `alignment` if supported, and to + * `alignof(std::max_align_t)` otherwise. * - * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be - * allocated. + * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated. * * @param bytes The size of the allocation * @param alignment Alignment of the allocation * @return void* Pointer to the newly allocated memory - *---------------------------------------------------------------------------**/ + */ void* allocate(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) { return do_allocate(bytes, alignment); } - /**---------------------------------------------------------------------------* - * @brief Deallocate memory pointed to by `p`. + /** + * @brief Deallocate memory pointed to by `ptr`. * - * `p` must have been returned by a prior call to `allocate(bytes,alignment)` - * on a `host_memory_resource` that compares equal to `*this`, and the storage - * it points to must not yet have been deallocated, otherwise behavior is - * undefined. + * `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a + * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not + * yet have been deallocated, otherwise behavior is undefined. * * @throws Nothing. * - * @param p Pointer to be deallocated - * @param bytes The size in bytes of the allocation. This must be equal to the - * value of `bytes` that was passed to the `allocate` call that returned `p`. - * @param alignment Alignment of the allocation. This must be equal to the - *value of `alignment` that was passed to the `allocate` call that returned - *`p`. + * @param ptr Pointer to be deallocated + * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes` + * that was passed to the `allocate` call that returned `ptr`. + * @param alignment Alignment of the allocation. This must be equal to the value of `alignment` + * that was passed to the `allocate` call that returned `ptr`. * @param stream Stream on which to perform deallocation - *---------------------------------------------------------------------------**/ - void deallocate(void* p, std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) + */ + void deallocate(void* ptr, std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) { - do_deallocate(p, bytes, alignment); + do_deallocate(ptr, bytes, alignment); } - /**---------------------------------------------------------------------------* + /** * @brief Compare this resource to another. * - * Two `host_memory_resource`s compare equal if and only if memory allocated - * from one `host_memory_resource` can be deallocated from the other and vice - * versa. + * Two `host_memory_resource`s compare equal if and only if memory allocated from one + * `host_memory_resource` can be deallocated from the other and vice versa. * - * By default, simply checks if \p *this and \p other refer to the same - * object, i.e., does not check if they are two objects of the same class. + * By default, simply checks if \p *this and \p other refer to the same object, i.e., does not + * check if they are two objects of the same class. * * @param other The other resource to compare to - * @returns If the two resources are equivalent - *---------------------------------------------------------------------------**/ - bool is_equal(host_memory_resource const& other) const noexcept { return do_is_equal(other); } + * @returns true if the two resources are equivalent + */ + [[nodiscard]] bool is_equal(host_memory_resource const& other) const noexcept + { + return do_is_equal(other); + } private: - /**---------------------------------------------------------------------------* + /** * @brief Allocates memory on the host of size at least `bytes` bytes. * - * The returned storage is aligned to the specified `alignment` if supported, - * and to `alignof(std::max_align_t)` otherwise. + * The returned storage is aligned to the specified `alignment` if supported, and to + * `alignof(std::max_align_t)` otherwise. * - * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be - * allocated. + * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated. * * @param bytes The size of the allocation * @param alignment Alignment of the allocation * @return void* Pointer to the newly allocated memory - *---------------------------------------------------------------------------**/ + */ virtual void* do_allocate(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) = 0; - /**---------------------------------------------------------------------------* - * @brief Deallocate memory pointed to by `p`. + /** + * @brief Deallocate memory pointed to by `ptr`. * - * `p` must have been returned by a prior call to `allocate(bytes,alignment)` - * on a `host_memory_resource` that compares equal to `*this`, and the storage - * it points to must not yet have been deallocated, otherwise behavior is - * undefined. + * `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a + * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not + * yet have been deallocated, otherwise behavior is undefined. * * @throws Nothing. * - * @param p Pointer to be deallocated - * @param bytes The size in bytes of the allocation. This must be equal to the - * value of `bytes` that was passed to the `allocate` call that returned `p`. - * @param alignment Alignment of the allocation. This must be equal to the - *value of `alignment` that was passed to the `allocate` call that returned - *`p`. - * @param stream Stream on which to perform deallocation - *---------------------------------------------------------------------------**/ - virtual void do_deallocate(void* p, + * @param ptr Pointer to be deallocated + * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes` + * that was passed to the `allocate` call that returned `ptr`. + * @param alignment Alignment of the allocation. This must be equal to the value of `alignment` + * that was passed to the `allocate` call that returned `ptr`. + */ + virtual void do_deallocate(void* ptr, std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) = 0; - /**---------------------------------------------------------------------------* + /** * @brief Compare this resource to another. * - * Two host_memory_resources compare equal if and only if memory allocated - * from one host_memory_resource can be deallocated from the other and vice - * versa. + * Two host_memory_resources compare equal if and only if memory allocated from one + * host_memory_resource can be deallocated from the other and vice versa. * - * By default, simply checks if \p *this and \p other refer to the same - * object, i.e., does not check if they are two objects of the same class. + * By default, simply checks if `*this` and `other` refer to the same object, i.e., does not check + * whether they are two objects of the same class. * * @param other The other resource to compare to * @return true If the two resources are equivalent - * @return false If the two resources are not equal - *---------------------------------------------------------------------------**/ - virtual bool do_is_equal(host_memory_resource const& other) const noexcept + */ + [[nodiscard]] virtual bool do_is_equal(host_memory_resource const& other) const noexcept { return this == &other; } }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp index 694450798..e30a6f41c 100644 --- a/include/rmm/mr/host/new_delete_resource.hpp +++ b/include/rmm/mr/host/new_delete_resource.hpp @@ -24,10 +24,10 @@ namespace rmm::mr { -/**---------------------------------------------------------------------------* - * @brief A `host_memory_resource` that uses the global `operator new` and - * `operator delete` to allocate host memory. - *---------------------------------------------------------------------------**/ +/** + * @brief A `host_memory_resource` that uses the global `operator new` and `operator delete` to + * allocate host memory. + */ class new_delete_resource final : public host_memory_resource { public: new_delete_resource() = default; @@ -38,19 +38,18 @@ class new_delete_resource final : public host_memory_resource { new_delete_resource& operator=(new_delete_resource&&) = default; private: - /**---------------------------------------------------------------------------* + /** * @brief Allocates memory on the host of size at least `bytes` bytes. * - * The returned storage is aligned to the specified `alignment` if supported, - * and to `alignof(std::max_align_t)` otherwise. + * The returned storage is aligned to the specified `alignment` if supported, and to + * `alignof(std::max_align_t)` otherwise. * - * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be - * allocated. + * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated. * * @param bytes The size of the allocation * @param alignment Alignment of the allocation - * @return void* Pointer to the newly allocated memory - *---------------------------------------------------------------------------**/ + * @return Pointer to the newly allocated memory + */ void* do_allocate(std::size_t bytes, std::size_t alignment = detail::RMM_DEFAULT_HOST_ALIGNMENT) override { @@ -62,24 +61,21 @@ class new_delete_resource final : public host_memory_resource { bytes, alignment, [](std::size_t size) { return ::operator new(size); }); } - /**---------------------------------------------------------------------------* - * @brief Deallocate memory pointed to by `p`. + /** + * @brief Deallocate memory pointed to by `ptr`. * - * `p` must have been returned by a prior call to `allocate(bytes,alignment)` - * on a `host_memory_resource` that compares equal to `*this`, and the storage - * it points to must not yet have been deallocated, otherwise behavior is - * undefined. + * `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a + * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not + * yet have been deallocated, otherwise behavior is undefined. * * @throws Nothing. * - * @param p Pointer to be deallocated - * @param bytes The size in bytes of the allocation. This must be equal to the - * value of `bytes` that was passed to the `allocate` call that returned `p`. - * @param alignment Alignment of the allocation. This must be equal to the - *value of `alignment` that was passed to the `allocate` call that returned - *`p`. - * @param stream Stream on which to perform deallocation - *---------------------------------------------------------------------------**/ + * @param ptr Pointer to be deallocated + * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes` + * that was passed to the `allocate` call that returned `ptr`. + * @param alignment Alignment of the allocation. This must be equal to the value of `alignment` + * that was passed to the `allocate` call that returned `ptr`. + */ void do_deallocate(void* ptr, std::size_t bytes, std::size_t alignment = detail::RMM_DEFAULT_HOST_ALIGNMENT) override @@ -87,4 +83,5 @@ class new_delete_resource final : public host_memory_resource { detail::aligned_deallocate(ptr, bytes, alignment, [](void* ptr) { ::operator delete(ptr); }); } }; + } // namespace rmm::mr diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp index d00a5cffe..42be3644f 100644 --- a/include/rmm/mr/host/pinned_memory_resource.hpp +++ b/include/rmm/mr/host/pinned_memory_resource.hpp @@ -22,38 +22,36 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { -/**---------------------------------------------------------------------------* +/* * @brief A `host_memory_resource` that uses `cudaMallocHost` to allocate * pinned/page-locked host memory. * * See https://devblogs.nvidia.com/how-optimize-data-transfers-cuda-cc/ - *---------------------------------------------------------------------------**/ + */ class pinned_memory_resource final : public host_memory_resource { public: pinned_memory_resource() = default; - ~pinned_memory_resource() = default; + ~pinned_memory_resource() override = default; pinned_memory_resource(pinned_memory_resource const&) = default; pinned_memory_resource(pinned_memory_resource&&) = default; pinned_memory_resource& operator=(pinned_memory_resource const&) = default; pinned_memory_resource& operator=(pinned_memory_resource&&) = default; private: - /**---------------------------------------------------------------------------* + /** * @brief Allocates pinned memory on the host of size at least `bytes` bytes. * - * The returned storage is aligned to the specified `alignment` if supported, - * and to `alignof(std::max_align_t)` otherwise. + * The returned storage is aligned to the specified `alignment` if supported, and to + * `alignof(std::max_align_t)` otherwise. * - * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be - * allocated. + * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated. * * @param bytes The size of the allocation * @param alignment Alignment of the allocation * @return void* Pointer to the newly allocated memory - *---------------------------------------------------------------------------**/ + */ void* do_allocate(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) override { // don't allocate anything if the user requested zero bytes @@ -71,33 +69,28 @@ class pinned_memory_resource final : public host_memory_resource { }); } - /**---------------------------------------------------------------------------* - * @brief Deallocate memory pointed to by `p`. + /** + * @brief Deallocate memory pointed to by `ptr`. * - * `p` must have been returned by a prior call to `allocate(bytes,alignment)` - * on a `host_memory_resource` that compares equal to `*this`, and the storage - * it points to must not yet have been deallocated, otherwise behavior is - * undefined. + * `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a + * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not + * yet have been deallocated, otherwise behavior is undefined. * * @throws Nothing. * - * @param p Pointer to be deallocated - * @param bytes The size in bytes of the allocation. This must be equal to the - * value of `bytes` that was passed to the `allocate` call that returned `p`. - * @param alignment Alignment of the allocation. This must be equal to the - *value of `alignment` that was passed to the `allocate` call that returned - *`p`. - * @param stream Stream on which to perform deallocation - *---------------------------------------------------------------------------**/ - void do_deallocate(void* p, + * @param ptr Pointer to be deallocated + * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes` + * that was passed to the `allocate` call that returned `ptr`. + * @param alignment Alignment of the allocation. This must be equal to the value of `alignment` + * that was passed to the `allocate` call that returned `ptr`. + */ + void do_deallocate(void* ptr, std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) override { - (void)alignment; - if (nullptr == p) { return; } + if (nullptr == ptr) { return; } detail::aligned_deallocate( - p, bytes, alignment, [](void* p) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(p)); }); + ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); }); } }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr From 111a2f434b08ae90bf7e536e7b89035c6f74b32e Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 12:31:45 +1000 Subject: [PATCH 48/72] tidy aligned_mr_tests --- tests/mr/device/aligned_mr_tests.cpp | 49 +++++++++++++--------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/tests/mr/device/aligned_mr_tests.cpp b/tests/mr/device/aligned_mr_tests.cpp index a39dcdbfd..f1ed561f4 100644 --- a/tests/mr/device/aligned_mr_tests.cpp +++ b/tests/mr/device/aligned_mr_tests.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -40,6 +41,12 @@ class mock_resource : public rmm::mr::device_memory_resource { using aligned_mock = rmm::mr::aligned_resource_adaptor; using aligned_real = rmm::mr::aligned_resource_adaptor; +void* int_to_address(std::size_t val) +{ + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast, performance-no-int-to-ptr) + return reinterpret_cast(val); +} + TEST(AlignedTest, ThrowOnNullUpstream) { auto construct_nullptr = []() { aligned_mock mr{nullptr}; }; @@ -87,8 +94,8 @@ TEST(AlignedTest, DefaultAllocationAlignmentPassthrough) aligned_mock mr{&mock}; cuda_stream_view stream; - auto const unaligned_address{123}; - void* const pointer = reinterpret_cast(unaligned_address); + void* const pointer = int_to_address(123); + // device_memory_resource aligns to 8. { auto const size{8}; @@ -111,8 +118,7 @@ TEST(AlignedTest, BelowAlignmentThresholdPassthrough) aligned_mock mr{&mock, alignment, threshold}; cuda_stream_view stream; - auto const unaligned_address1{123}; - void* const pointer = reinterpret_cast(unaligned_address1); + void* const pointer = int_to_address(123); // device_memory_resource aligns to 8. { auto const size{8}; @@ -127,9 +133,8 @@ TEST(AlignedTest, BelowAlignmentThresholdPassthrough) } { - auto const unaligned_address2{456}; auto const size{65528}; - void* const pointer1 = reinterpret_cast(unaligned_address2); + void* const pointer1 = int_to_address(456); EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer1)); EXPECT_CALL(mock, do_deallocate(pointer1, size, stream)).Times(1); EXPECT_EQ(mr.allocate(size, stream), pointer1); @@ -145,8 +150,7 @@ TEST(AlignedTest, UpstreamAddressAlreadyAligned) aligned_mock mr{&mock, alignment, threshold}; cuda_stream_view stream; - auto const aligned_address{4096}; - void* const pointer = reinterpret_cast(aligned_address); + void* const pointer = int_to_address(4096); { auto const size{69376}; @@ -170,16 +174,14 @@ TEST(AlignedTest, AlignUpstreamAddress) cuda_stream_view stream; { - auto const address{256}; - void* const pointer = reinterpret_cast(address); + void* const pointer = int_to_address(256); auto const size{69376}; EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer)); EXPECT_CALL(mock, do_deallocate(pointer, size, stream)).Times(1); } { - auto const address{4096}; - void* const expected_pointer = reinterpret_cast(address); + void* const expected_pointer = int_to_address(4096); auto const size{65536}; EXPECT_EQ(mr.allocate(size, stream), expected_pointer); mr.deallocate(expected_pointer, size, stream); @@ -196,12 +198,9 @@ TEST(AlignedTest, AlignMultiple) cuda_stream_view stream; { - auto const address1{256}; - auto const address2{131584}; - auto const address3{263168}; - void* const pointer1 = reinterpret_cast(address1); - void* const pointer2 = reinterpret_cast(address2); - void* const pointer3 = reinterpret_cast(address3); + void* const pointer1 = int_to_address(256); + void* const pointer2 = int_to_address(131584); + void* const pointer3 = int_to_address(263168); auto const size1{69376}; auto const size2{77568}; auto const size3{81664}; @@ -214,12 +213,9 @@ TEST(AlignedTest, AlignMultiple) } { - auto const expected_address1{4096}; - auto const expected_address2{135168}; - auto const expected_address3{266240}; - void* const expected_pointer1 = reinterpret_cast(expected_address1); - void* const expected_pointer2 = reinterpret_cast(expected_address2); - void* const expected_pointer3 = reinterpret_cast(expected_address3); + void* const expected_pointer1 = int_to_address(4096); + void* const expected_pointer2 = int_to_address(135168); + void* const expected_pointer3 = int_to_address(266240); auto const size1{65536}; auto const size2{73728}; auto const size3{77800}; @@ -237,9 +233,8 @@ TEST(AlignedTest, AlignRealPointer) auto const alignment{4096}; auto const threshold{65536}; aligned_real mr{rmm::mr::get_current_device_resource(), alignment, threshold}; - void* alloc = mr.allocate(threshold); - auto const address = reinterpret_cast(alloc); - EXPECT_TRUE(address % alignment == 0); + void* alloc = mr.allocate(threshold); + EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc, alignment)); mr.deallocate(alloc, threshold); } From 70b85c29bec915d1d220fb4f08f65d478c80f08a Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 13:10:42 +1000 Subject: [PATCH 49/72] tidy device MR tests --- tests/mr/device/mr_multithreaded_tests.cpp | 2 +- tests/mr/device/mr_test.hpp | 11 ++- .../mr/device/polymorphic_allocator_tests.cpp | 12 ++- tests/mr/device/pool_mr_tests.cpp | 28 +++--- tests/mr/device/statistics_mr_tests.cpp | 60 ++++++----- .../device/stream_allocator_adaptor_tests.cpp | 25 ++--- tests/mr/device/thrust_allocator_tests.cu | 20 ++-- tests/mr/device/tracking_mr_tests.cpp | 99 ++++++++++--------- 8 files changed, 139 insertions(+), 118 deletions(-) diff --git a/tests/mr/device/mr_multithreaded_tests.cpp b/tests/mr/device/mr_multithreaded_tests.cpp index a80af38fe..4bacb208f 100644 --- a/tests/mr/device/mr_multithreaded_tests.cpp +++ b/tests/mr/device/mr_multithreaded_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index 5609168b8..4bef2b54e 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -100,13 +100,14 @@ inline void test_allocate(rmm::mr::device_memory_resource* mr, inline void concurrent_allocations_are_different(rmm::mr::device_memory_resource* mr, cuda_stream_view stream) { - void* p1 = mr->allocate(8_B, stream); - void* p2 = mr->allocate(8_B, stream); + const auto size{8_B}; + void* ptr1 = mr->allocate(size, stream); + void* ptr2 = mr->allocate(size, stream); - EXPECT_NE(p1, p2); + EXPECT_NE(ptr1, ptr2); - mr->deallocate(p1, 8_B, stream); - mr->deallocate(p2, 8_B, stream); + mr->deallocate(ptr1, size, stream); + mr->deallocate(ptr2, size, stream); } inline void test_various_allocations(rmm::mr::device_memory_resource* mr, cuda_stream_view stream) diff --git a/tests/mr/device/polymorphic_allocator_tests.cpp b/tests/mr/device/polymorphic_allocator_tests.cpp index 319d0ca63..4c8346730 100644 --- a/tests/mr/device/polymorphic_allocator_tests.cpp +++ b/tests/mr/device/polymorphic_allocator_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,7 +42,8 @@ TEST_F(allocator_test, custom_resource) EXPECT_EQ(allocator.resource(), &mr); } -void test_conversion(rmm::mr::polymorphic_allocator) {} +void test_conversion(rmm::mr::polymorphic_allocator /*unused*/) {} + TEST_F(allocator_test, implicit_conversion) { rmm::mr::cuda_memory_resource mr; @@ -106,9 +107,10 @@ TEST_F(allocator_test, rebind) TEST_F(allocator_test, allocate_deallocate) { rmm::mr::polymorphic_allocator allocator{}; - auto p = allocator.allocate(1000, stream); - EXPECT_NE(p, nullptr); - EXPECT_NO_THROW(allocator.deallocate(p, 1000, stream)); + const auto size{1000}; + auto* ptr = allocator.allocate(size, stream); + EXPECT_NE(ptr, nullptr); + EXPECT_NO_THROW(allocator.deallocate(ptr, size, stream)); } } // namespace diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp index 7f31412c8..9f2020785 100644 --- a/tests/mr/device/pool_mr_tests.cpp +++ b/tests/mr/device/pool_mr_tests.cpp @@ -26,8 +26,7 @@ #include -namespace rmm { -namespace test { +namespace rmm::test { namespace { using cuda_mr = rmm::mr::cuda_memory_resource; using pool_mr = rmm::mr::pool_memory_resource; @@ -44,7 +43,9 @@ TEST(PoolTest, ThrowMaxLessThanInitial) // Make sure first argument is enough larger than the second that alignment rounding doesn't // make them equal auto max_less_than_initial = []() { - pool_mr mr{rmm::mr::get_current_device_resource(), 1024, 256}; + const auto initial{1024}; + const auto maximum{256}; + pool_mr mr{rmm::mr::get_current_device_resource(), initial, maximum}; }; EXPECT_THROW(max_less_than_initial(), rmm::logic_error); } @@ -54,8 +55,9 @@ TEST(PoolTest, AllocateNinetyPercent) auto allocate_ninety = []() { auto const [free, total] = rmm::detail::available_device_memory(); (void)total; - auto const ninety_percent_pool = rmm::detail::align_up(static_cast(free * 0.9), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + auto const ninety_percent_pool = + rmm::detail::align_up(static_cast(static_cast(free) * 0.9), + rmm::detail::CUDA_ALLOCATION_ALIGNMENT); pool_mr mr{rmm::mr::get_current_device_resource(), ninety_percent_pool}; }; EXPECT_NO_THROW(allocate_ninety()); @@ -67,10 +69,10 @@ TEST(PoolTest, TwoLargeBuffers) auto const [free, total] = rmm::detail::available_device_memory(); (void)total; pool_mr mr{rmm::mr::get_current_device_resource()}; - auto p1 = mr.allocate(free / 4); - auto p2 = mr.allocate(free / 4); - mr.deallocate(p1, free / 4); - mr.deallocate(p2, free / 4); + auto* ptr1 = mr.allocate(free / 4); + auto* ptr2 = mr.allocate(free / 4); + mr.deallocate(ptr1, free / 4); + mr.deallocate(ptr2, free / 4); }; EXPECT_NO_THROW(two_large()); } @@ -78,7 +80,8 @@ TEST(PoolTest, TwoLargeBuffers) TEST(PoolTest, ForceGrowth) { cuda_mr cuda; - limiting_mr limiter{&cuda, 6000}; + auto const max_size{6000}; + limiting_mr limiter{&cuda, max_size}; pool_mr mr{&limiter, 0}; EXPECT_NO_THROW(mr.allocate(1000)); EXPECT_NO_THROW(mr.allocate(4000)); @@ -89,7 +92,7 @@ TEST(PoolTest, ForceGrowth) TEST(PoolTest, DeletedStream) { pool_mr mr{rmm::mr::get_current_device_resource(), 0}; - cudaStream_t stream; // we don't use rmm::cuda_stream here to make destruction more explicit + cudaStream_t stream{}; // we don't use rmm::cuda_stream here to make destruction more explicit const int size = 10000; EXPECT_EQ(cudaSuccess, cudaStreamCreate(&stream)); EXPECT_NO_THROW(rmm::device_buffer buff(size, cuda_stream_view{stream}, &mr)); @@ -124,5 +127,4 @@ TEST(PoolTest, NonAlignedPoolSize) } } // namespace -} // namespace test -} // namespace rmm +} // namespace rmm::test diff --git a/tests/mr/device/statistics_mr_tests.cpp b/tests/mr/device/statistics_mr_tests.cpp index 83464bbe2..59c356b1e 100644 --- a/tests/mr/device/statistics_mr_tests.cpp +++ b/tests/mr/device/statistics_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,20 +14,24 @@ * limitations under the License. */ +#include "../../byte_literals.hpp" + #include #include #include #include -#include "mr_test.hpp" #include -namespace rmm { -namespace test { +namespace rmm::test { namespace { using statistics_adaptor = rmm::mr::statistics_resource_adaptor; +constexpr auto num_allocations{10}; +constexpr auto num_more_allocations{5}; +constexpr auto ten_MiB{10_MiB}; + TEST(StatisticsTest, ThrowOnNullUpstream) { auto construct_nullptr = []() { statistics_adaptor mr{nullptr}; }; @@ -51,11 +55,13 @@ TEST(StatisticsTest, AllFreed) { statistics_adaptor mr{rmm::mr::get_current_device_resource()}; std::vector allocations; - for (int i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + + allocations.reserve(num_allocations); + for (int i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } - for (auto p : allocations) { - mr.deallocate(p, 10_MiB); + for (auto* alloc : allocations) { + mr.deallocate(alloc, ten_MiB); } // Counter values should be 0 @@ -67,12 +73,13 @@ TEST(StatisticsTest, PeakAllocations) { statistics_adaptor mr{rmm::mr::get_current_device_resource()}; std::vector allocations; - for (std::size_t i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + + for (std::size_t i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } // Delete every other allocation for (auto&& it = allocations.begin(); it != allocations.end(); ++it) { - mr.deallocate(*it, 10_MiB); + mr.deallocate(*it, ten_MiB); it = allocations.erase(it); } @@ -92,13 +99,13 @@ TEST(StatisticsTest, PeakAllocations) EXPECT_EQ(current_alloc_counts.total, 10); // Add 10 more to increase the peak - for (std::size_t i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + for (std::size_t i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } // Deallocate all remaining - for (std::size_t i = 0; i < allocations.size(); ++i) { - mr.deallocate(allocations[i], 10_MiB); + for (auto& allocation : allocations) { + mr.deallocate(allocation, ten_MiB); } allocations.clear(); @@ -124,9 +131,9 @@ TEST(StatisticsTest, MultiTracking) rmm::mr::set_current_device_resource(&mr); std::vector> allocations; - for (std::size_t i = 0; i < 10; ++i) { + for (std::size_t i = 0; i < num_allocations; ++i) { allocations.emplace_back( - std::make_shared(10_MiB, rmm::cuda_stream_default)); + std::make_shared(ten_MiB, rmm::cuda_stream_default)); } EXPECT_EQ(mr.get_allocations_counter().value, 10); @@ -134,9 +141,9 @@ TEST(StatisticsTest, MultiTracking) statistics_adaptor inner_mr{rmm::mr::get_current_device_resource()}; rmm::mr::set_current_device_resource(&inner_mr); - for (std::size_t i = 0; i < 5; ++i) { + for (std::size_t i = 0; i < num_more_allocations; ++i) { allocations.emplace_back( - std::make_shared(10_MiB, rmm::cuda_stream_default)); + std::make_shared(ten_MiB, rmm::cuda_stream_default)); } // Check the allocated bytes for both MRs @@ -174,8 +181,8 @@ TEST(StatisticsTest, NegativeInnerTracking) // memory pointer statistics_adaptor mr{rmm::mr::get_current_device_resource()}; std::vector allocations; - for (std::size_t i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + for (std::size_t i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } EXPECT_EQ(mr.get_allocations_counter().value, 10); @@ -183,8 +190,8 @@ TEST(StatisticsTest, NegativeInnerTracking) statistics_adaptor inner_mr{&mr}; // Add more allocations - for (std::size_t i = 0; i < 5; ++i) { - allocations.push_back(inner_mr.allocate(10_MiB)); + for (std::size_t i = 0; i < num_more_allocations; ++i) { + allocations.push_back(inner_mr.allocate(ten_MiB)); } // Check the outstanding allocations @@ -199,8 +206,8 @@ TEST(StatisticsTest, NegativeInnerTracking) EXPECT_EQ(inner_mr.get_allocations_counter().value, 5); // Deallocate all allocations using the inner_mr - for (std::size_t i = 0; i < allocations.size(); ++i) { - inner_mr.deallocate(allocations[i], 10_MiB); + for (auto& allocation : allocations) { + inner_mr.deallocate(allocation, ten_MiB); } allocations.clear(); @@ -227,5 +234,4 @@ TEST(StatisticsTest, NegativeInnerTracking) } } // namespace -} // namespace test -} // namespace rmm +} // namespace rmm::test diff --git a/tests/mr/device/stream_allocator_adaptor_tests.cpp b/tests/mr/device/stream_allocator_adaptor_tests.cpp index 616b899f1..669fca5f5 100644 --- a/tests/mr/device/stream_allocator_adaptor_tests.cpp +++ b/tests/mr/device/stream_allocator_adaptor_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,15 +14,16 @@ * limitations under the License. */ -#include - -#include #include #include #include #include #include +#include + +#include + namespace { struct allocator_test : public ::testing::Test { @@ -34,7 +35,7 @@ TEST_F(allocator_test, factory) { using Adaptor = rmm::mr::stream_allocator_adaptor; auto adapted = rmm::mr::make_stream_allocator_adaptor(allocator, stream); - static_assert((std::is_same::value), ""); + static_assert((std::is_same::value)); EXPECT_EQ(adapted.underlying_allocator(), allocator); EXPECT_EQ(adapted.stream(), stream); } @@ -97,21 +98,21 @@ TEST_F(allocator_test, rebind) { auto adapted = rmm::mr::make_stream_allocator_adaptor(allocator, stream); using Rebound = std::allocator_traits::rebind_alloc; - static_assert((std::is_same::value_type, double>::value), ""); + static_assert((std::is_same::value_type, double>::value)); static_assert( std::is_same>>::value, - ""); + rmm::mr::stream_allocator_adaptor>>::value); - Rebound r{adapted}; + Rebound rebound{adapted}; } TEST_F(allocator_test, allocate_deallocate) { auto adapted = rmm::mr::make_stream_allocator_adaptor(allocator, stream); - auto p = adapted.allocate(1000); - EXPECT_NE(p, nullptr); - EXPECT_NO_THROW(adapted.deallocate(p, 1000)); + auto const size{1000}; + auto* ptr = adapted.allocate(size); + EXPECT_NE(ptr, nullptr); + EXPECT_NO_THROW(adapted.deallocate(ptr, size)); } } // namespace diff --git a/tests/mr/device/thrust_allocator_tests.cu b/tests/mr/device/thrust_allocator_tests.cu index 0092f21ba..eabdfe143 100644 --- a/tests/mr/device/thrust_allocator_tests.cu +++ b/tests/mr/device/thrust_allocator_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,13 +14,14 @@ * limitations under the License. */ -#include +#include "mr_test.hpp" + #include #include -#include "mr_test.hpp" -namespace rmm { -namespace test { +#include + +namespace rmm::test { namespace { struct allocator_test : public mr_test { @@ -28,8 +29,9 @@ struct allocator_test : public mr_test { TEST_P(allocator_test, first) { - rmm::device_vector ints(100, 1); - EXPECT_EQ(100, thrust::reduce(ints.begin(), ints.end())); + auto const num_ints{100}; + rmm::device_vector ints(num_ints, 1); + EXPECT_EQ(num_ints, thrust::reduce(ints.begin(), ints.end())); } INSTANTIATE_TEST_CASE_P(ThrustAllocatorTests, @@ -39,6 +41,6 @@ INSTANTIATE_TEST_CASE_P(ThrustAllocatorTests, mr_factory{"Pool", &make_pool}, mr_factory{"Binning", &make_binning}), [](auto const& info) { return info.param.name; }); + } // namespace -} // namespace test -} // namespace rmm +} // namespace rmm::test diff --git a/tests/mr/device/tracking_mr_tests.cpp b/tests/mr/device/tracking_mr_tests.cpp index 8dc666f0c..5926cbf62 100644 --- a/tests/mr/device/tracking_mr_tests.cpp +++ b/tests/mr/device/tracking_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,20 +14,24 @@ * limitations under the License. */ +#include "../../byte_literals.hpp" + #include #include #include #include -#include "mr_test.hpp" #include -namespace rmm { -namespace test { +namespace rmm::test { namespace { using tracking_adaptor = rmm::mr::tracking_resource_adaptor; +constexpr auto num_allocations{10}; +constexpr auto num_more_allocations{5}; +constexpr auto ten_MiB{10_MiB}; + TEST(TrackingTest, ThrowOnNullUpstream) { auto construct_nullptr = []() { tracking_adaptor mr{nullptr}; }; @@ -45,11 +49,12 @@ TEST(TrackingTest, AllFreed) { tracking_adaptor mr{rmm::mr::get_current_device_resource()}; std::vector allocations; - for (int i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + allocations.reserve(num_allocations); + for (int i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } - for (auto p : allocations) { - mr.deallocate(p, 10_MiB); + for (auto* alloc : allocations) { + mr.deallocate(alloc, ten_MiB); } EXPECT_EQ(mr.get_outstanding_allocations().size(), 0); EXPECT_EQ(mr.get_allocated_bytes(), 0); @@ -59,16 +64,17 @@ TEST(TrackingTest, AllocationsLeftWithStacks) { tracking_adaptor mr{rmm::mr::get_current_device_resource(), true}; std::vector allocations; - for (int i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + allocations.reserve(num_allocations); + for (int i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } - for (int i = 0; i < 10; i += 2) { - mr.deallocate(allocations[i], 10_MiB); + for (int i = 0; i < num_allocations; i += 2) { + mr.deallocate(allocations[i], ten_MiB); } - EXPECT_EQ(mr.get_outstanding_allocations().size(), 5); - EXPECT_EQ(mr.get_allocated_bytes(), 50_MiB); + EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations / 2); + EXPECT_EQ(mr.get_allocated_bytes(), ten_MiB * (num_allocations / 2)); auto const& outstanding_allocations = mr.get_outstanding_allocations(); - EXPECT_EQ(outstanding_allocations.size(), 5); + EXPECT_EQ(outstanding_allocations.size(), num_allocations / 2); EXPECT_NE(outstanding_allocations.begin()->second.strace, nullptr); } @@ -76,16 +82,18 @@ TEST(TrackingTest, AllocationsLeftWithoutStacks) { tracking_adaptor mr{rmm::mr::get_current_device_resource()}; std::vector allocations; - for (int i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + allocations.reserve(num_allocations); + for (int i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } - for (int i = 0; i < 10; i += 2) { - mr.deallocate(allocations[i], 10_MiB); + + for (int i = 0; i < num_allocations; i += 2) { + mr.deallocate(allocations[i], ten_MiB); } - EXPECT_EQ(mr.get_outstanding_allocations().size(), 5); - EXPECT_EQ(mr.get_allocated_bytes(), 50_MiB); + EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations / 2); + EXPECT_EQ(mr.get_allocated_bytes(), ten_MiB * (num_allocations / 2)); auto const& outstanding_allocations = mr.get_outstanding_allocations(); - EXPECT_EQ(outstanding_allocations.size(), 5); + EXPECT_EQ(outstanding_allocations.size(), num_allocations / 2); EXPECT_EQ(outstanding_allocations.begin()->second.strace, nullptr); } @@ -95,27 +103,27 @@ TEST(TrackingTest, MultiTracking) rmm::mr::set_current_device_resource(&mr); std::vector> allocations; - for (std::size_t i = 0; i < 10; ++i) { + for (std::size_t i = 0; i < num_allocations; ++i) { allocations.emplace_back( - std::make_shared(10_MiB, rmm::cuda_stream_default)); + std::make_shared(ten_MiB, rmm::cuda_stream_default)); } - EXPECT_EQ(mr.get_outstanding_allocations().size(), 10); + EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations); tracking_adaptor inner_mr{rmm::mr::get_current_device_resource()}; rmm::mr::set_current_device_resource(&inner_mr); - for (std::size_t i = 0; i < 5; ++i) { + for (std::size_t i = 0; i < num_more_allocations; ++i) { allocations.emplace_back( - std::make_shared(10_MiB, rmm::cuda_stream_default)); + std::make_shared(ten_MiB, rmm::cuda_stream_default)); } // Check the allocated bytes for both MRs - EXPECT_EQ(mr.get_outstanding_allocations().size(), 15); - EXPECT_EQ(inner_mr.get_outstanding_allocations().size(), 5); + EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations + num_more_allocations); + EXPECT_EQ(inner_mr.get_outstanding_allocations().size(), num_more_allocations); - EXPECT_EQ(mr.get_allocated_bytes(), 150_MiB); - EXPECT_EQ(inner_mr.get_allocated_bytes(), 50_MiB); + EXPECT_EQ(mr.get_allocated_bytes(), ten_MiB * (num_allocations + num_more_allocations)); + EXPECT_EQ(inner_mr.get_allocated_bytes(), ten_MiB * num_more_allocations); EXPECT_GT(mr.get_outstanding_allocations_str().size(), 0); @@ -140,26 +148,26 @@ TEST(TrackingTest, NegativeInnerTracking) // memory pointer tracking_adaptor mr{rmm::mr::get_current_device_resource()}; std::vector allocations; - for (std::size_t i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + for (std::size_t i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } - EXPECT_EQ(mr.get_outstanding_allocations().size(), 10); + EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations); tracking_adaptor inner_mr{&mr}; // Add more allocations - for (std::size_t i = 0; i < 5; ++i) { - allocations.push_back(inner_mr.allocate(10_MiB)); + for (std::size_t i = 0; i < num_more_allocations; ++i) { + allocations.push_back(inner_mr.allocate(ten_MiB)); } // Check the outstanding allocations - EXPECT_EQ(mr.get_outstanding_allocations().size(), 15); - EXPECT_EQ(inner_mr.get_outstanding_allocations().size(), 5); + EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations + num_more_allocations); + EXPECT_EQ(inner_mr.get_outstanding_allocations().size(), num_more_allocations); // Deallocate all allocations using the inner_mr - for (std::size_t i = 0; i < allocations.size(); ++i) { - inner_mr.deallocate(allocations[i], 10_MiB); + for (auto& allocation : allocations) { + inner_mr.deallocate(allocation, ten_MiB); } allocations.clear(); @@ -172,13 +180,13 @@ TEST(TrackingTest, DeallocWrongBytes) { tracking_adaptor mr{rmm::mr::get_current_device_resource()}; std::vector allocations; - for (std::size_t i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + for (std::size_t i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } // When deallocating, pass the wrong bytes to deallocate - for (std::size_t i = 0; i < allocations.size(); ++i) { - mr.deallocate(allocations[i], 5_MiB); + for (auto& allocation : allocations) { + mr.deallocate(allocation, ten_MiB / 2); } allocations.clear(); @@ -190,5 +198,4 @@ TEST(TrackingTest, DeallocWrongBytes) } } // namespace -} // namespace test -} // namespace rmm +} // namespace rmm::test From 7ac76e6c79c8e8a418bb391f7c774e60ddd8ce6f Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 13:28:35 +1000 Subject: [PATCH 50/72] tidy host mr tests --- tests/mr/host/mr_tests.cpp | 151 ++++++++++++++++++------------------- 1 file changed, 72 insertions(+), 79 deletions(-) diff --git a/tests/mr/host/mr_tests.cpp b/tests/mr/host/mr_tests.cpp index 442a70ca0..24f52a88a 100644 --- a/tests/mr/host/mr_tests.cpp +++ b/tests/mr/host/mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,36 +14,33 @@ * limitations under the License. */ -#include +#include "../../byte_literals.hpp" +#include #include #include #include #include + +#include + #include #include #include +namespace rmm::test { namespace { -inline bool is_aligned(void* p, std::size_t alignment = alignof(std::max_align_t)) -{ - return (0 == reinterpret_cast(p) % alignment); -} - -inline void expect_aligned(void* p, std::size_t alignment) +inline bool is_aligned(void* ptr, std::size_t alignment = alignof(std::max_align_t)) { - EXPECT_EQ(0, reinterpret_cast(p) % alignment); + return rmm::detail::is_pointer_aligned(ptr, alignment); } -/**---------------------------------------------------------------------------* - * @brief Returns if a pointer points to a device memory or managed memory - * allocation. - *---------------------------------------------------------------------------**/ -inline bool is_device_memory(void* p) +// Returns true if a pointer points to a device memory or managed memory allocation. +inline bool is_device_memory(void* ptr) { cudaPointerAttributes attributes{}; - if (cudaSuccess != cudaPointerGetAttributes(&attributes, p)) { return false; } + if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } #if CUDART_VERSION < 10000 // memoryType is deprecated in CUDA 10 return attributes.memoryType == cudaMemoryTypeDevice; #else @@ -54,24 +51,23 @@ inline bool is_device_memory(void* p) /** * @brief Returns if a pointer `p` points to pinned host memory. */ -inline bool is_pinned_memory(void* p) +inline bool is_pinned_memory(void* ptr) { cudaPointerAttributes attributes{}; - if (cudaSuccess != cudaPointerGetAttributes(&attributes, p)) { return false; } + if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } return attributes.type == cudaMemoryTypeHost; } -static constexpr std::size_t size_word{4}; -static constexpr std::size_t size_kb{std::size_t{1} << 10}; -static constexpr std::size_t size_mb{std::size_t{1} << 20}; -static constexpr std::size_t size_gb{std::size_t{1} << 30}; -static constexpr std::size_t size_tb{std::size_t{1} << 40}; -static constexpr std::size_t size_pb{std::size_t{1} << 50}; +constexpr std::size_t size_word{4_B}; +constexpr std::size_t size_kb{1_KiB}; +constexpr std::size_t size_mb{1_MiB}; +constexpr std::size_t size_gb{1_GiB}; +constexpr std::size_t size_pb{1_PiB}; struct allocation { - void* p{nullptr}; + void* ptr{nullptr}; std::size_t size{0}; - allocation(void* _p, std::size_t _size) : p{_p}, size{_size} {} + allocation(void* ptr, std::size_t size) : ptr{ptr}, size{size} {} allocation() = default; }; } // namespace @@ -81,7 +77,6 @@ struct MRTest : public ::testing::Test { std::unique_ptr mr; MRTest() : mr{new MemoryResourceType} {} - ~MRTest() = default; }; using resources = ::testing::Types; @@ -92,56 +87,56 @@ TYPED_TEST(MRTest, SelfEquality) { EXPECT_TRUE(this->mr->is_equal(*this->mr)); } TYPED_TEST(MRTest, AllocateZeroBytes) { - void* p{nullptr}; - EXPECT_NO_THROW(p = this->mr->allocate(0)); - EXPECT_NO_THROW(this->mr->deallocate(p, 0)); + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = this->mr->allocate(0)); + EXPECT_NO_THROW(this->mr->deallocate(ptr, 0)); } TYPED_TEST(MRTest, AllocateWord) { - void* p{nullptr}; - EXPECT_NO_THROW(p = this->mr->allocate(size_word)); - EXPECT_NE(nullptr, p); - EXPECT_TRUE(is_aligned(p)); - EXPECT_FALSE(is_device_memory(p)); - EXPECT_NO_THROW(this->mr->deallocate(p, size_word)); + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = this->mr->allocate(size_word)); + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(is_aligned(ptr)); + EXPECT_FALSE(is_device_memory(ptr)); + EXPECT_NO_THROW(this->mr->deallocate(ptr, size_word)); } TYPED_TEST(MRTest, AllocateKB) { - void* p{nullptr}; - EXPECT_NO_THROW(p = this->mr->allocate(size_kb)); - EXPECT_NE(nullptr, p); - EXPECT_TRUE(is_aligned(p)); - EXPECT_FALSE(is_device_memory(p)); - EXPECT_NO_THROW(this->mr->deallocate(p, size_kb)); + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = this->mr->allocate(size_kb)); + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(is_aligned(ptr)); + EXPECT_FALSE(is_device_memory(ptr)); + EXPECT_NO_THROW(this->mr->deallocate(ptr, size_kb)); } TYPED_TEST(MRTest, AllocateMB) { - void* p{nullptr}; - EXPECT_NO_THROW(p = this->mr->allocate(size_mb)); - EXPECT_NE(nullptr, p); - EXPECT_TRUE(is_aligned(p)); - EXPECT_FALSE(is_device_memory(p)); - EXPECT_NO_THROW(this->mr->deallocate(p, size_mb)); + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = this->mr->allocate(size_mb)); + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(is_aligned(ptr)); + EXPECT_FALSE(is_device_memory(ptr)); + EXPECT_NO_THROW(this->mr->deallocate(ptr, size_mb)); } TYPED_TEST(MRTest, AllocateGB) { - void* p{nullptr}; - EXPECT_NO_THROW(p = this->mr->allocate(size_gb)); - EXPECT_NE(nullptr, p); - EXPECT_TRUE(is_aligned(p)); - EXPECT_FALSE(is_device_memory(p)); - EXPECT_NO_THROW(this->mr->deallocate(p, size_gb)); + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = this->mr->allocate(size_gb)); + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(is_aligned(ptr)); + EXPECT_FALSE(is_device_memory(ptr)); + EXPECT_NO_THROW(this->mr->deallocate(ptr, size_gb)); } TYPED_TEST(MRTest, AllocateTooMuch) { - void* p{nullptr}; - EXPECT_THROW(p = this->mr->allocate(size_pb), std::bad_alloc); - EXPECT_EQ(nullptr, p); + void* ptr{nullptr}; + EXPECT_THROW(ptr = this->mr->allocate(size_pb), std::bad_alloc); + EXPECT_EQ(nullptr, ptr); } TYPED_TEST(MRTest, RandomAllocations) @@ -156,16 +151,16 @@ TYPED_TEST(MRTest, RandomAllocations) // 100 allocations from [0,5MB) std::for_each( - allocations.begin(), allocations.end(), [&generator, &distribution, this](allocation& a) { - a.size = distribution(generator); - EXPECT_NO_THROW(a.p = this->mr->allocate(a.size)); - EXPECT_NE(nullptr, a.p); - EXPECT_TRUE(is_aligned(a.p)); + allocations.begin(), allocations.end(), [&generator, &distribution, this](allocation& alloc) { + alloc.size = distribution(generator); + EXPECT_NO_THROW(alloc.ptr = this->mr->allocate(alloc.size)); + EXPECT_NE(nullptr, alloc.ptr); + EXPECT_TRUE(is_aligned(alloc.ptr)); }); std::for_each( - allocations.begin(), allocations.end(), [generator, distribution, this](allocation& a) { - EXPECT_NO_THROW(this->mr->deallocate(a.p, a.size)); + allocations.begin(), allocations.end(), [generator, distribution, this](allocation& alloc) { + EXPECT_NO_THROW(this->mr->deallocate(alloc.ptr, alloc.size)); }); } @@ -189,30 +184,27 @@ TYPED_TEST(MRTest, MixedRandomAllocationFree) std::size_t allocation_size = size_distribution(generator); EXPECT_NO_THROW(allocations.emplace_back(this->mr->allocate(allocation_size), allocation_size)); auto new_allocation = allocations.back(); - EXPECT_NE(nullptr, new_allocation.p); - EXPECT_TRUE(is_aligned(new_allocation.p)); + EXPECT_NE(nullptr, new_allocation.ptr); + EXPECT_TRUE(is_aligned(new_allocation.ptr)); bool const free_front{free_distribution(generator) == free_distribution.max()}; if (free_front) { auto front = allocations.front(); - EXPECT_NO_THROW(this->mr->deallocate(front.p, front.size)); + EXPECT_NO_THROW(this->mr->deallocate(front.ptr, front.size)); allocations.pop_front(); } } // free any remaining allocations - for (auto a : allocations) { - EXPECT_NO_THROW(this->mr->deallocate(a.p, a.size)); + for (auto alloc : allocations) { + EXPECT_NO_THROW(this->mr->deallocate(alloc.ptr, alloc.size)); allocations.pop_front(); } } -static constexpr std::size_t MinTestedSize = 32; -static constexpr std::size_t MaxTestedSize = 8 * 1024; -static constexpr std::size_t TestedSizeStep = 1; -static constexpr std::size_t MinTestedAlignment = 16; -static constexpr std::size_t MaxTestedAlignment = 4 * 1024; -static constexpr std::size_t TestedAlignmentMultiplier = 2; +static constexpr std::size_t MinTestedAlignment{16}; +static constexpr std::size_t MaxTestedAlignment{4096}; +static constexpr std::size_t TestedAlignmentMultiplier{2}; static constexpr std::size_t NUM_TRIALS{100}; TYPED_TEST(MRTest, AlignmentTest) @@ -248,7 +240,7 @@ TYPED_TEST(MRTest, UnsupportedAlignmentTest) // alignment of `alignof(std::max_align_t)` auto const bad_alignment = alignment + 1; EXPECT_NO_THROW(ptr = this->mr->allocate(allocation_size, bad_alignment)); - expect_aligned(ptr, alignof(std::max_align_t)); + EXPECT_TRUE(is_aligned(ptr, alignof(std::max_align_t))); EXPECT_NO_THROW(this->mr->deallocate(ptr, allocation_size, bad_alignment)); } } @@ -257,8 +249,9 @@ TYPED_TEST(MRTest, UnsupportedAlignmentTest) TEST(PinnedResource, isPinned) { rmm::mr::pinned_memory_resource mr; - void* p{nullptr}; - EXPECT_NO_THROW(p = mr.allocate(100)); - EXPECT_TRUE(is_pinned_memory(p)); - EXPECT_NO_THROW(mr.deallocate(p, 100)); + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = mr.allocate(100)); + EXPECT_TRUE(is_pinned_memory(ptr)); + EXPECT_NO_THROW(mr.deallocate(ptr, 100)); } +} // namespace rmm::test From 9dcbae6a653f821af22b41e1e1d69df30774c6da Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 13:30:06 +1000 Subject: [PATCH 51/72] copyright --- tests/cuda_stream_pool_tests.cpp | 2 +- tests/device_uvector_tests.cpp | 2 +- tests/logger_tests.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/cuda_stream_pool_tests.cpp b/tests/cuda_stream_pool_tests.cpp index 3f1919600..4fddb2da6 100644 --- a/tests/cuda_stream_pool_tests.cpp +++ b/tests/cuda_stream_pool_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/tests/device_uvector_tests.cpp b/tests/device_uvector_tests.cpp index b3c06885b..09aa7e527 100644 --- a/tests/device_uvector_tests.cpp +++ b/tests/device_uvector_tests.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/tests/logger_tests.cpp b/tests/logger_tests.cpp index 498a96722..e0663e84b 100644 --- a/tests/logger_tests.cpp +++ b/tests/logger_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 1cfa4933715f58bf5a595ce6aec3c5dda200bf7d Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 16:25:44 +1000 Subject: [PATCH 52/72] tidy benchmarks and more --- .../cuda_stream_pool_bench.cpp | 22 ++-- .../device_uvector/device_uvector_bench.cu | 30 +++--- .../multi_stream_allocations_bench.cu | 29 ++--- .../random_allocations/random_allocations.cpp | 8 +- benchmarks/replay/replay.cpp | 102 ++++++++++-------- .../synchronization/synchronization.cpp | 5 +- .../synchronization/synchronization.hpp | 14 ++- benchmarks/utilities/log_parser.hpp | 86 +++++++++------ .../utilities/simulated_memory_resource.hpp | 43 ++++---- tests/cuda_stream_tests.cpp | 1 + tests/device_buffer_tests.cu | 31 ++++-- tests/mr/device/mr_multithreaded_tests.cpp | 59 +++++----- tests/mr/host/mr_tests.cpp | 7 +- 13 files changed, 248 insertions(+), 189 deletions(-) diff --git a/benchmarks/cuda_stream_pool/cuda_stream_pool_bench.cpp b/benchmarks/cuda_stream_pool/cuda_stream_pool_bench.cpp index a536077f9..6710ffe50 100644 --- a/benchmarks/cuda_stream_pool/cuda_stream_pool_bench.cpp +++ b/benchmarks/cuda_stream_pool/cuda_stream_pool_bench.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,36 +14,36 @@ * limitations under the License. */ -#include - #include #include #include +#include + #include static void BM_StreamPoolGetStream(benchmark::State& state) { rmm::cuda_stream_pool stream_pool{}; - for (auto _ : state) { - auto s = stream_pool.get_stream(); - cudaStreamQuery(s.value()); + for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) + auto stream = stream_pool.get_stream(); + cudaStreamQuery(stream.value()); } - state.SetItemsProcessed(state.iterations()); + state.SetItemsProcessed(static_cast(state.iterations())); } BENCHMARK(BM_StreamPoolGetStream)->Unit(benchmark::kMicrosecond); static void BM_CudaStreamClass(benchmark::State& state) { - for (auto _ : state) { - auto s = rmm::cuda_stream{}; - cudaStreamQuery(s.view().value()); + for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) + auto stream = rmm::cuda_stream{}; + cudaStreamQuery(stream.view().value()); } - state.SetItemsProcessed(state.iterations()); + state.SetItemsProcessed(static_cast(state.iterations())); } BENCHMARK(BM_CudaStreamClass)->Unit(benchmark::kMicrosecond); diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu index 01d81c55d..6665ccaa8 100644 --- a/benchmarks/device_uvector/device_uvector_bench.cu +++ b/benchmarks/device_uvector/device_uvector_bench.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,33 +14,39 @@ * limitations under the License. */ -#include - -#include #include #include #include #include #include +#include + +#include + static void BM_UvectorSizeConstruction(benchmark::State& state) { rmm::mr::cuda_memory_resource cuda_mr{}; rmm::mr::pool_memory_resource mr{&cuda_mr}; rmm::mr::set_current_device_resource(&mr); - for (auto _ : state) { + for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) rmm::device_uvector vec(state.range(0), rmm::cuda_stream_view{}); cudaDeviceSynchronize(); } - state.SetItemsProcessed(state.iterations()); + state.SetItemsProcessed(static_cast(state.iterations())); rmm::mr::set_current_device_resource(nullptr); } + +const auto range_multiplier{10}; +const auto range_start{10'000}; +const auto range_end{1'000'000'000}; + BENCHMARK(BM_UvectorSizeConstruction) - ->RangeMultiplier(10) - ->Range(10'000, 1'000'000'000) + ->RangeMultiplier(range_multiplier) + ->Range(range_start, range_end) ->Unit(benchmark::kMicrosecond); static void BM_ThrustVectorSizeConstruction(benchmark::State& state) @@ -49,19 +55,19 @@ static void BM_ThrustVectorSizeConstruction(benchmark::State& state) rmm::mr::pool_memory_resource mr{&cuda_mr}; rmm::mr::set_current_device_resource(&mr); - for (auto _ : state) { + for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) rmm::device_vector vec(state.range(0)); cudaDeviceSynchronize(); } - state.SetItemsProcessed(state.iterations()); + state.SetItemsProcessed(static_cast(state.iterations())); rmm::mr::set_current_device_resource(nullptr); } BENCHMARK(BM_ThrustVectorSizeConstruction) - ->RangeMultiplier(10) - ->Range(10'000, 1'000'000'000) + ->RangeMultiplier(range_multiplier) + ->Range(range_start, range_end) ->Unit(benchmark::kMicrosecond); BENCHMARK_MAIN(); diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu index 7d0a8a17a..9b6210a2d 100644 --- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu +++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu @@ -16,8 +16,6 @@ #include -#include - #include #include #include @@ -31,15 +29,18 @@ #include +#include + #include __global__ void compute_bound_kernel(int64_t* out) { clock_t clock_begin = clock64(); clock_t clock_current = clock_begin; + auto const million{1'000'000}; - if (threadIdx.x == 0) { - while (clock_current - clock_begin < 1000000) { + if (threadIdx.x == 0) { // NOLINT(readability-static-accessed-through-instance) + while (clock_current - clock_begin < million) { clock_current = clock64(); } } @@ -69,7 +70,7 @@ static void run_test(std::size_t num_kernels, } } -static void BM_MultiStreamAllocations(benchmark::State& state, MRFactoryFunc factory) +static void BM_MultiStreamAllocations(benchmark::State& state, MRFactoryFunc const& factory) { auto mr = factory(); @@ -77,18 +78,18 @@ static void BM_MultiStreamAllocations(benchmark::State& state, MRFactoryFunc fac auto num_streams = state.range(0); auto num_kernels = state.range(1); - auto do_prewarm = state.range(2); + bool do_prewarm = state.range(2) != 0; auto stream_pool = rmm::cuda_stream_pool(num_streams); if (do_prewarm) { run_prewarm(stream_pool, mr.get()); } - for (auto _ : state) { + for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) run_test(num_kernels, stream_pool, mr.get()); cudaDeviceSynchronize(); } - state.SetItemsProcessed(state.iterations() * num_kernels); + state.SetItemsProcessed(static_cast(state.iterations() * num_kernels)); rmm::mr::set_current_device_resource(nullptr); } @@ -124,7 +125,7 @@ static void benchmark_range(benchmark::internal::Benchmark* b) ->Unit(benchmark::kMicrosecond); } -MRFactoryFunc get_mr_factory(std::string resource_name) +MRFactoryFunc get_mr_factory(std::string const& resource_name) { if (resource_name == "cuda") { return &make_cuda; } #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT @@ -139,7 +140,7 @@ MRFactoryFunc get_mr_factory(std::string resource_name) RMM_FAIL(); } -void declare_benchmark(std::string name) +void declare_benchmark(std::string const& name) { if (name == "cuda") { BENCHMARK_CAPTURE(BM_MultiStreamAllocations, cuda, &make_cuda) // @@ -176,7 +177,7 @@ void declare_benchmark(std::string name) std::cout << "Error: invalid memory_resource name: " << name << std::endl; } -void run_profile(std::string resource_name, int kernel_count, int stream_count, bool prewarm) +void run_profile(std::string const& resource_name, int kernel_count, int stream_count, bool prewarm) { auto mr_factory = get_mr_factory(resource_name); auto mr = mr_factory(); @@ -228,7 +229,11 @@ int main(int argc, char** argv) auto num_kernels = args["kernels"].as(); auto num_streams = args["streams"].as(); auto prewarm = args["warm"].as(); - run_profile(resource_name, num_kernels, num_streams, prewarm); + try { + run_profile(resource_name, num_kernels, num_streams, prewarm); + } catch (std::exception const& e) { + std::cout << "Exception caught: " << e.what() << std::endl; + } } else { auto resource_names = std::vector(); diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index b82a10c89..65abd56b5 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -197,7 +197,7 @@ static void BM_RandomAllocations(benchmark::State& state, MRFactoryFunc const& f std::size_t max_size = state.range(1); try { - for (auto _ : state) { + for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) uniform_random_allocations(*mr, num_allocations, max_size, max_usage); } } catch (std::exception const& e) { @@ -321,7 +321,11 @@ int main(int argc, char** argv) std::cout << "Profiling " << resource << " with " << num_allocations << " allocations of max " << max_size << "B\n"; - profile_random_allocations(funcs.at(resource), num_allocations, max_size); + try { + profile_random_allocations(funcs.at(resource), num_allocations, max_size); + } catch (std::exception const& e) { + std::cout << "Exception caught: " << e.what() << std::endl; + } std::cout << "Finished\n"; } else { diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp index 6fbd5f2ab..a1355cdb2 100644 --- a/benchmarks/replay/replay.cpp +++ b/benchmarks/replay/replay.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include @@ -37,6 +36,7 @@ #include +#include #include #include #include @@ -45,7 +45,7 @@ #include /// MR factory functions -std::shared_ptr make_cuda(std::size_t = 0) +std::shared_ptr make_cuda(std::size_t /*unused*/ = 0) { return std::make_shared(); } @@ -75,7 +75,9 @@ inline auto make_binning(std::size_t simulated_size) { auto pool = make_pool(simulated_size); auto mr = rmm::mr::make_owning_wrapper(pool); - for (std::size_t i = 18; i <= 22; i++) { + const auto min_size_exp{18}; + const auto max_size_exp{22}; + for (std::size_t i = min_size_exp; i <= max_size_exp; i++) { mr->wrapped().add_bin(1 << i); } return mr; @@ -89,8 +91,8 @@ using MRFactoryFunc = std::function> const& events) : factory_{std::move(factory)}, simulated_size_{simulated_size}, - mr_{}, events_{events}, - allocation_map{events.size()}, - event_index{0} + allocation_map{events.size()} { } @@ -144,12 +144,14 @@ struct replay_benchmark { simulated_size_{other.simulated_size_}, mr_{std::move(other.mr_)}, events_{other.events_}, - allocation_map{events_.size()}, - event_index{0} + allocation_map{std::move(other.allocation_map)} { } + ~replay_benchmark() = default; replay_benchmark(replay_benchmark const&) = delete; + replay_benchmark& operator=(replay_benchmark const&) = delete; + replay_benchmark& operator=(replay_benchmark&& other) noexcept = delete; /// Add an allocation to the map (NOT thread safe) void set_allocation(uintptr_t ptr, allocation alloc) { allocation_map.insert({ptr, alloc}); } @@ -159,9 +161,9 @@ struct replay_benchmark { { auto iter = allocation_map.find(ptr); if (iter != allocation_map.end()) { - allocation a = iter->second; + allocation alloc = iter->second; allocation_map.erase(iter); - return a; + return alloc; } return allocation{}; } @@ -187,11 +189,12 @@ struct replay_benchmark { auto alloc = ptr_alloc.second; num_leaked++; total_leaked += alloc.size; - mr_->deallocate(alloc.p, alloc.size); + mr_->deallocate(alloc.ptr, alloc.size); } - if (num_leaked > 0) + if (num_leaked > 0) { std::cout << "LOG shows leak of " << num_leaked << " allocations of " << total_leaked << " total bytes\n"; + } allocation_map.clear(); mr_.reset(); } @@ -204,20 +207,20 @@ struct replay_benchmark { auto const& my_events = events_.at(state.thread_index); - for (auto _ : state) { - std::for_each(my_events.begin(), my_events.end(), [&state, this](auto e) { + for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) + std::for_each(my_events.begin(), my_events.end(), [this](auto event) { // ensure correct ordering between threads std::unique_lock lock{event_mutex}; - if (event_index != e.index) { - cv.wait(lock, [&]() { return event_index == e.index; }); + if (event_index != event.index) { + cv.wait(lock, [&]() { return event_index == event.index; }); } - if (rmm::detail::action::ALLOCATE == e.act) { - auto p = mr_->allocate(e.size); - set_allocation(e.pointer, allocation{p, e.size}); + if (rmm::detail::action::ALLOCATE == event.act) { + auto ptr = mr_->allocate(event.size); + set_allocation(event.pointer, allocation{ptr, event.size}); } else { - auto a = remove_allocation(e.pointer); - mr_->deallocate(a.p, e.size); + auto alloc = remove_allocation(event.pointer); + mr_->deallocate(alloc.p, event.size); } event_index++; @@ -242,11 +245,11 @@ std::vector> parse_per_thread_events(std::string RMM_EXPECTS(std::all_of(all_events.begin(), all_events.end(), - [](auto const& e) { - cudaStream_t cs; - memcpy(&cs, &e.stream, sizeof(cudaStream_t)); - auto s = rmm::cuda_stream_view{cs}; - return s.is_default() or s.is_per_thread_default(); + [](auto const& event) { + cudaStream_t custream; + memcpy(&custream, &event.stream, sizeof(cudaStream_t)); + auto stream = rmm::cuda_stream_view{custream}; + return stream.is_default() or stream.is_per_thread_default(); }), "Non-default streams not currently supported."); @@ -294,28 +297,29 @@ void declare_benchmark(std::string const& name, std::vector> const& per_thread_events, std::size_t num_threads) { - if (name == "cuda") + if (name == "cuda") { benchmark::RegisterBenchmark("CUDA Resource", replay_benchmark(&make_cuda, simulated_size, per_thread_events)) ->Unit(benchmark::kMillisecond) - ->Threads(num_threads); - else if (name == "binning") + ->Threads(static_cast(num_threads)); + } else if (name == "binning") { benchmark::RegisterBenchmark("Binning Resource", replay_benchmark(&make_binning, simulated_size, per_thread_events)) ->Unit(benchmark::kMillisecond) - ->Threads(num_threads); - else if (name == "pool") + ->Threads(static_cast(num_threads)); + } else if (name == "pool") { benchmark::RegisterBenchmark("Pool Resource", replay_benchmark(&make_pool, simulated_size, per_thread_events)) ->Unit(benchmark::kMillisecond) - ->Threads(num_threads); - else if (name == "arena") + ->Threads(static_cast(num_threads)); + } else if (name == "arena") { benchmark::RegisterBenchmark("Arena Resource", replay_benchmark(&make_arena, simulated_size, per_thread_events)) ->Unit(benchmark::kMillisecond) - ->Threads(num_threads); - else + ->Threads(static_cast(num_threads)); + } else { std::cout << "Error: invalid memory_resource name: " << name << "\n"; + } } // Usage: REPLAY_BENCHMARK -f "path/to/log/file" @@ -355,14 +359,22 @@ int main(int argc, char** argv) auto filename = args["file"].as(); - auto per_thread_events = parse_per_thread_events(filename); + auto per_thread_events = [filename]() { + try { + auto events = parse_per_thread_events(filename); + return events; + } catch (std::exception const& e) { + std::cout << "Failed to parse events: " << e.what() << std::endl; + return std::vector>{}; + } + }(); #ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM std::cout << "Using CUDA per-thread default stream.\n"; #endif auto const simulated_size = - static_cast(args["size"].as() * static_cast(1u << 30u)); + static_cast(args["size"].as() * static_cast(1U << 30U)); if (simulated_size != 0 && args["resource"].as() != "cuda") { std::cout << "Simulating GPU with memory size of " << simulated_size << " bytes.\n"; } @@ -375,11 +387,11 @@ int main(int argc, char** argv) [](std::size_t accum, auto const& events) { return accum + events.size(); }) << std::endl; - for (std::size_t t = 0; t < per_thread_events.size(); ++t) { - std::cout << "Thread " << t << ": " << per_thread_events[t].size() << " events\n"; + for (std::size_t thread = 0; thread < per_thread_events.size(); ++thread) { + std::cout << "Thread " << thread << ": " << per_thread_events[thread].size() << " events\n"; if (args["verbose"].as()) { - for (auto const& e : per_thread_events[t]) { - std::cout << e << std::endl; + for (auto const& event : per_thread_events[thread]) { + std::cout << event << std::endl; } } } @@ -396,8 +408,8 @@ int main(int argc, char** argv) std::array mrs{"pool", "arena", "binning", "cuda"}; std::for_each(std::cbegin(mrs), std::cend(mrs), - [&simulated_size, &per_thread_events, &num_threads](auto const& s) { - declare_benchmark(s, simulated_size, per_thread_events, num_threads); + [&simulated_size, &per_thread_events, &num_threads](auto const& mr) { + declare_benchmark(mr, simulated_size, per_thread_events, num_threads); }); } diff --git a/benchmarks/synchronization/synchronization.cpp b/benchmarks/synchronization/synchronization.cpp index 5db8c4a3e..9e048b285 100644 --- a/benchmarks/synchronization/synchronization.cpp +++ b/benchmarks/synchronization/synchronization.cpp @@ -59,9 +59,10 @@ cuda_event_timer::~cuda_event_timer() RMM_CUDA_ASSERT_OK(cudaEventRecord(stop, stream.value())); RMM_CUDA_ASSERT_OK(cudaEventSynchronize(stop)); - float milliseconds = 0.0f; + float milliseconds = 0.0F; RMM_CUDA_ASSERT_OK(cudaEventElapsedTime(&milliseconds, start, stop)); - p_state->SetIterationTime(milliseconds / (1000.0f)); + const auto to_milliseconds{1.0F / 1000}; + p_state->SetIterationTime(milliseconds * to_milliseconds); RMM_CUDA_ASSERT_OK(cudaEventDestroy(start)); RMM_CUDA_ASSERT_OK(cudaEventDestroy(stop)); } diff --git a/benchmarks/synchronization/synchronization.hpp b/benchmarks/synchronization/synchronization.hpp index 6c2298575..b0007d9b2 100644 --- a/benchmarks/synchronization/synchronization.hpp +++ b/benchmarks/synchronization/synchronization.hpp @@ -89,9 +89,15 @@ class cuda_event_timer { // will be set to the value given by `cudaEventElapsedTime`. ~cuda_event_timer(); + // disable copy and move + cuda_event_timer(cuda_event_timer const&) = delete; + cuda_event_timer& operator=(cuda_event_timer const&) = delete; + cuda_event_timer(cuda_event_timer&&) = delete; + cuda_event_timer& operator=(cuda_event_timer&&) = delete; + private: - cudaEvent_t start; - cudaEvent_t stop; - rmm::cuda_stream_view stream; - benchmark::State* p_state; + cudaEvent_t start{}; + cudaEvent_t stop{}; + rmm::cuda_stream_view stream{}; + benchmark::State* p_state{}; }; diff --git a/benchmarks/utilities/log_parser.hpp b/benchmarks/utilities/log_parser.hpp index f4bbdbbc8..db939e65f 100644 --- a/benchmarks/utilities/log_parser.hpp +++ b/benchmarks/utilities/log_parser.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,8 +31,7 @@ #include #include -namespace rmm { -namespace detail { +namespace rmm::detail { enum class action : bool { ALLOCATE, FREE }; @@ -43,41 +42,57 @@ enum class action : bool { ALLOCATE, FREE }; struct event { event() = default; event(event const&) = default; - event(action a, std::size_t s, void const* p) - : act{a}, size{s}, pointer{reinterpret_cast(p)} + event& operator=(event const&) = default; + event(event&&) noexcept = default; + event& operator=(event&&) noexcept = default; + ~event() = default; + event(action act, std::size_t size, void const* ptr) + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + : act{act}, size{size}, pointer{reinterpret_cast(ptr)} { } - event(action a, std::size_t s, uintptr_t p) : act{a}, size{s}, pointer{p} {} + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) + event(action act, std::size_t size, uintptr_t ptr) : act{act}, size{size}, pointer{ptr} {} - event(std::size_t tid, action a, std::size_t sz, uintptr_t p, uintptr_t s, std::size_t i) - : act{a}, size{sz}, pointer{p}, thread_id{tid}, stream{s}, index{i} + event(std::size_t tid, + action act, + std::size_t size, // NOLINT(bugprone-easily-swappable-parameters) + uintptr_t ptr, + uintptr_t stream, + std::size_t index) + : act{act}, size{size}, pointer{ptr}, thread_id{tid}, stream{stream}, index{index} { } - event(std::size_t tid, action a, std::size_t sz, void* p, uintptr_t s, std::size_t i) - : event{tid, a, sz, reinterpret_cast(p), s, i} + event( + std::size_t tid, action act, std::size_t size, void* ptr, uintptr_t stream, std::size_t index) + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + : event{tid, act, size, reinterpret_cast(ptr), stream, index} { } - friend std::ostream& operator<<(std::ostream& os, event const& e); + friend std::ostream& operator<<(std::ostream& os, event const& evt); - action act{}; ///< Indicates if the event is an allocation or a free - std::size_t size{}; ///< The size of the memory allocated or freed - uintptr_t pointer{}; ///< The pointer returned from an allocation, or the - ///< pointer freed - std::size_t thread_id; ///< ID of the thread that initiated the event - uintptr_t stream; ///< Numeric representation of the CUDA stream on which the event occurred - std::size_t index; ///< Original ordering index of the event + action act{}; ///< Indicates if the event is an allocation or a free + std::size_t size{}; ///< The size of the memory allocated or freed + uintptr_t pointer{}; ///< The pointer returned from an allocation, or the + ///< pointer freed + std::size_t thread_id{}; ///< ID of the thread that initiated the event + uintptr_t stream{}; ///< Numeric representation of the CUDA stream on which the event occurred + std::size_t index{}; ///< Original ordering index of the event }; -inline std::ostream& operator<<(std::ostream& os, event const& e) +inline std::ostream& operator<<(std::ostream& os, event const& evt) { - auto act_string = (e.act == action::ALLOCATE) ? "allocate" : "free"; + const auto* act_string = (evt.act == action::ALLOCATE) ? "allocate" : "free"; - os << "Thread: " << e.thread_id << std::setw(9) << act_string - << " Size: " << std::setw(std::numeric_limits::digits10) << e.size << " Pointer: " - << "0x" << std::hex << e.pointer << std::dec << " Stream: " << e.stream; + const auto format_width{9}; + + os << "Thread: " << evt.thread_id << std::setw(format_width) << act_string + << " Size: " << std::setw(std::numeric_limits::digits10) << evt.size + << " Pointer: " + << "0x" << std::hex << evt.pointer << std::dec << " Stream: " << evt.stream; return os; } @@ -105,11 +120,12 @@ inline std::chrono::time_point parse_time(std::string int seconds = std::stoi(str_time.substr(previous, current - previous)); int microseconds = std::stoi(str_time.substr(current + 1, str_time.length())); - std::tm tm{seconds, minutes, hours, 1, 0, 1970, 0, 0, 0}; + auto const epoch_year{1970}; + std::tm time{seconds, minutes, hours, 1, 0, epoch_year, 0, 0, 0}; - auto tp = std::chrono::system_clock::from_time_t(std::mktime(&tm)); - tp += std::chrono::microseconds{microseconds}; - return tp; + auto timepoint = std::chrono::system_clock::from_time_t(std::mktime(&time)); + timepoint += std::chrono::microseconds{microseconds}; + return timepoint; } /** @@ -128,8 +144,9 @@ inline std::vector parse_csv(std::string const& filename) std::vector tids = csv.GetColumn("Thread"); std::vector actions = csv.GetColumn("Action"); - auto parse_pointer = [](std::string const& s, uintptr_t& ptr) { - ptr = std::stoll(s, nullptr, 16); + auto parse_pointer = [](std::string const& str, uintptr_t& ptr) { + auto const base{16}; + ptr = std::stoll(str, nullptr, base); }; std::vector pointers = csv.GetColumn("Pointer", parse_pointer); @@ -140,19 +157,18 @@ inline std::vector parse_csv(std::string const& filename) RMM_EXPECTS(std::all_of(std::begin(size_list), std::end(size_list), - [size = sizes.size()](auto i) { return i == size; }), + [size = sizes.size()](auto val) { return val == size; }), "Size mismatch in columns of parsed log."); std::vector events(sizes.size()); for (std::size_t i = 0; i < actions.size(); ++i) { - auto const& a = actions[i]; - RMM_EXPECTS((a == "allocate") or (a == "free"), "Invalid action string."); - auto act = (a == "allocate") ? action::ALLOCATE : action::FREE; + auto const& action = actions[i]; + RMM_EXPECTS((action == "allocate") or (action == "free"), "Invalid action string."); + auto act = (action == "allocate") ? action::ALLOCATE : action::FREE; events[i] = event{tids[i], act, sizes[i], pointers[i], streams[i], i}; } return events; } -} // namespace detail -} // namespace rmm +} // namespace rmm::detail diff --git a/benchmarks/utilities/simulated_memory_resource.hpp b/benchmarks/utilities/simulated_memory_resource.hpp index 67883ad5d..44ee4798c 100644 --- a/benchmarks/utilities/simulated_memory_resource.hpp +++ b/benchmarks/utilities/simulated_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,7 @@ #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief A device memory resource that simulates a fix-sized GPU. @@ -39,14 +38,18 @@ class simulated_memory_resource final : public device_memory_resource { * @param memory_size_bytes The size of the memory to simulate. */ explicit simulated_memory_resource(std::size_t memory_size_bytes) - : begin_{reinterpret_cast(0x100)}, - end_{reinterpret_cast(begin_ + memory_size_bytes)} + : begin_{reinterpret_cast(0x100)}, // NOLINT + end_{reinterpret_cast(begin_ + memory_size_bytes)} // NOLINT { } + ~simulated_memory_resource() override = default; + // Disable copy (and move) semantics. simulated_memory_resource(simulated_memory_resource const&) = delete; simulated_memory_resource& operator=(simulated_memory_resource const&) = delete; + simulated_memory_resource(simulated_memory_resource&&) = delete; + simulated_memory_resource& operator=(simulated_memory_resource&&) = delete; /** * @brief Query whether the resource supports use of non-null CUDA streams for @@ -54,14 +57,14 @@ class simulated_memory_resource final : public device_memory_resource { * * @returns bool false */ - bool supports_streams() const noexcept override { return false; } + [[nodiscard]] bool supports_streams() const noexcept override { return false; } /** * @brief Query whether the resource supports the get_mem_info API. * * @return false */ - bool supports_get_mem_info() const noexcept override { return false; } + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } private: /** @@ -74,24 +77,25 @@ class simulated_memory_resource final : public device_memory_resource { * @param bytes The size, in bytes, of the allocation * @return void* Pointer to the newly allocated memory */ - void* do_allocate(std::size_t bytes, cuda_stream_view) override + void* do_allocate(std::size_t bytes, cuda_stream_view /*stream*/) override { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) RMM_EXPECTS(begin_ + bytes <= end_, rmm::bad_alloc, "Simulated memory size exceeded"); - auto p = static_cast(begin_); - begin_ += bytes; - return p; + auto* ptr = static_cast(begin_); + begin_ += bytes; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return ptr; } /** - * @brief Deallocate memory pointed to by \p p. + * @brief Deallocate memory pointed to by `p`. * * @note This call is ignored. * * @throws Nothing. * - * @param p Pointer to be deallocated + * @param ptr Pointer to be deallocated */ - void do_deallocate(void* p, std::size_t, cuda_stream_view) override {} + void do_deallocate(void* ptr, std::size_t /*bytes*/, cuda_stream_view /*stream*/) override {} /** * @brief Get free and available memory for memory resource. @@ -99,14 +103,13 @@ class simulated_memory_resource final : public device_memory_resource { * @param stream to execute on. * @return std::pair containing free_size and total_size of memory. */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + [[nodiscard]] std::pair do_get_mem_info( + cuda_stream_view stream) const override { return std::make_pair(0, 0); } - private: - char* begin_; - char* end_; + char* begin_{}; + char* end_{}; }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/tests/cuda_stream_tests.cpp b/tests/cuda_stream_tests.cpp index 55e3185fe..96cae868e 100644 --- a/tests/cuda_stream_tests.cpp +++ b/tests/cuda_stream_tests.cpp @@ -47,6 +47,7 @@ TEST_F(CudaStreamTest, MoveConstructor) rmm::cuda_stream stream_a; auto const view_a = stream_a.view(); rmm::cuda_stream stream_b = std::move(stream_a); + // NOLINTNEXTLINE(bugprone-use-after-move) EXPECT_FALSE(stream_a.is_valid()); // Any other operations on stream_a are UB, may segfault EXPECT_EQ(stream_b, view_a); } diff --git a/tests/device_buffer_tests.cu b/tests/device_buffer_tests.cu index 63841a67e..ff71dfba1 100644 --- a/tests/device_buffer_tests.cu +++ b/tests/device_buffer_tests.cu @@ -154,6 +154,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor) // Initialize buffer thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), 0); @@ -168,6 +169,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor) EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), static_cast(buff_copy.data()))); @@ -179,6 +181,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor) EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), static_cast(buff_copy.data()))); } @@ -193,6 +196,7 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSize) thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), 0); rmm::device_buffer buff_copy(buff, rmm::cuda_stream_default); @@ -208,6 +212,7 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSize) EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), static_cast(buff_copy.data()))); } @@ -218,6 +223,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructorExplicitMr) thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), 0); rmm::device_buffer buff_copy(buff, this->stream, &this->mr); @@ -231,6 +237,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructorExplicitMr) EXPECT_TRUE(thrust::equal(rmm::exec_policy(buff_copy.stream()), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), static_cast(buff_copy.data()))); } @@ -245,6 +252,7 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSizeExplicitMr) thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), 0); rmm::device_buffer buff_copy(buff, this->stream, &this->mr); @@ -261,6 +269,7 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSizeExplicitMr) EXPECT_TRUE(thrust::equal(rmm::exec_policy(buff_copy.stream()), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), static_cast(buff_copy.data()))); } @@ -284,11 +293,11 @@ TYPED_TEST(DeviceBufferTest, MoveConstructor) EXPECT_EQ(mr, buff_new.memory_resource()); // Original buffer should be empty - EXPECT_EQ(nullptr, buff.data()); - EXPECT_EQ(0, buff.size()); - EXPECT_EQ(0, buff.capacity()); - EXPECT_EQ(rmm::cuda_stream_default, buff.stream()); - EXPECT_NE(nullptr, buff.memory_resource()); + EXPECT_EQ(nullptr, buff.data()); // NOLINT(bugprone-use-after-move) + EXPECT_EQ(0, buff.size()); // NOLINT(bugprone-use-after-move) + EXPECT_EQ(0, buff.capacity()); // NOLINT(bugprone-use-after-move) + EXPECT_EQ(rmm::cuda_stream_default, buff.stream()); // NOLINT(bugprone-use-after-move) + EXPECT_NE(nullptr, buff.memory_resource()); // NOLINT(bugprone-use-after-move) } TYPED_TEST(DeviceBufferTest, MoveConstructorStream) @@ -312,11 +321,11 @@ TYPED_TEST(DeviceBufferTest, MoveConstructorStream) EXPECT_EQ(mr, buff_new.memory_resource()); // Original buffer should be empty - EXPECT_EQ(nullptr, buff.data()); - EXPECT_EQ(0, buff.size()); - EXPECT_EQ(0, buff.capacity()); - EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream()); - EXPECT_NE(nullptr, buff.memory_resource()); + EXPECT_EQ(nullptr, buff.data()); // NOLINT(bugprone-use-after-move) + EXPECT_EQ(0, buff.size()); // NOLINT(bugprone-use-after-move) + EXPECT_EQ(0, buff.capacity()); // NOLINT(bugprone-use-after-move) + EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream()); // NOLINT(bugprone-use-after-move) + EXPECT_NE(nullptr, buff.memory_resource()); // NOLINT(bugprone-use-after-move) } TYPED_TEST(DeviceBufferTest, MoveAssignmentToDefault) @@ -399,6 +408,7 @@ TYPED_TEST(DeviceBufferTest, ResizeSmaller) thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), 0); @@ -422,6 +432,7 @@ TYPED_TEST(DeviceBufferTest, ResizeSmaller) EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), static_cast(old_content.data()))); } diff --git a/tests/mr/device/mr_multithreaded_tests.cpp b/tests/mr/device/mr_multithreaded_tests.cpp index 4bacb208f..838035d9f 100644 --- a/tests/mr/device/mr_multithreaded_tests.cpp +++ b/tests/mr/device/mr_multithreaded_tests.cpp @@ -76,8 +76,7 @@ TEST(DefaultTest, CurrentDeviceResourceIsCUDA_mt) TEST(DefaultTest, GetCurrentDeviceResource_mt) { spawn([]() { - rmm::mr::device_memory_resource* mr{nullptr}; - EXPECT_NO_THROW(mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); EXPECT_NE(nullptr, mr); EXPECT_TRUE(mr->is_equal(rmm::mr::cuda_memory_resource{})); }); @@ -87,8 +86,7 @@ TEST_P(mr_test_mt, SetCurrentDeviceResource_mt) { // single thread changes default resource, then multiple threads use it - rmm::mr::device_memory_resource* old{nullptr}; - EXPECT_NO_THROW(old = rmm::mr::set_current_device_resource(this->mr.get())); + rmm::mr::device_memory_resource* old = rmm::mr::set_current_device_resource(this->mr.get()); EXPECT_NE(nullptr, old); spawn([mr = this->mr.get()]() { @@ -97,7 +95,7 @@ TEST_P(mr_test_mt, SetCurrentDeviceResource_mt) }); // setting default resource w/ nullptr should reset to initial - EXPECT_NO_THROW(rmm::mr::set_current_device_resource(nullptr)); + rmm::mr::set_current_device_resource(nullptr); EXPECT_TRUE(old->is_equal(*rmm::mr::get_current_device_resource())); } @@ -109,25 +107,25 @@ TEST_P(mr_test_mt, SetCurrentDeviceResourcePerThread_mt) std::vector threads; threads.reserve(num_devices); for (int i = 0; i < num_devices; ++i) { - threads.emplace_back(std::thread{ - [mr = this->mr.get()](auto dev_id) { - RMM_CUDA_TRY(cudaSetDevice(dev_id)); - rmm::mr::device_memory_resource* old{}; - EXPECT_NO_THROW(old = rmm::mr::set_current_device_resource(mr)); - EXPECT_NE(nullptr, old); - // initial resource for this device should be CUDA mr - EXPECT_TRUE(old->is_equal(rmm::mr::cuda_memory_resource{})); - // get_current_device_resource should equal the resource we just set - EXPECT_EQ(mr, rmm::mr::get_current_device_resource()); - // Setting current dev resource to nullptr should reset to cuda MR and return the MR we - // previously set - EXPECT_NO_THROW(old = rmm::mr::set_current_device_resource(nullptr)); - EXPECT_NE(nullptr, old); - EXPECT_EQ(old, mr); - EXPECT_TRUE( - rmm::mr::get_current_device_resource()->is_equal(rmm::mr::cuda_memory_resource{})); - }, - i}); + threads.emplace_back(std::thread{[mr = this->mr.get()](auto dev_id) { + RMM_CUDA_TRY(cudaSetDevice(dev_id)); + rmm::mr::device_memory_resource* old = + rmm::mr::set_current_device_resource(mr); + EXPECT_NE(nullptr, old); + // initial resource for this device should be CUDA mr + EXPECT_TRUE(old->is_equal(rmm::mr::cuda_memory_resource{})); + // get_current_device_resource should equal the resource we + // just set + EXPECT_EQ(mr, rmm::mr::get_current_device_resource()); + // Setting current dev resource to nullptr should reset to + // cuda MR and return the MR we previously set + old = rmm::mr::set_current_device_resource(nullptr); + EXPECT_NE(nullptr, old); + EXPECT_EQ(old, mr); + EXPECT_TRUE(rmm::mr::get_current_device_resource()->is_equal( + rmm::mr::cuda_memory_resource{})); + }, + i}); } for (auto& thread : threads) { @@ -187,8 +185,7 @@ void allocate_loop(rmm::mr::device_memory_resource* mr, for (std::size_t i = 0; i < num_allocations; ++i) { std::size_t size = size_distribution(generator); - void* ptr{}; - EXPECT_NO_THROW(ptr = mr->allocate(size, stream)); + void* ptr = mr->allocate(size, stream); { std::lock_guard lock(mtx); allocations.emplace_back(ptr, size); @@ -208,7 +205,7 @@ void deallocate_loop(rmm::mr::device_memory_resource* mr, i++; allocation alloc = allocations.front(); allocations.pop_front(); - EXPECT_NO_THROW(mr->deallocate(alloc.ptr, alloc.size, stream)); + mr->deallocate(alloc.ptr, alloc.size, stream); } } @@ -250,11 +247,9 @@ TEST_P(mr_test_mt, AllocFreeDifferentThreadsSameStream) TEST_P(mr_test_mt, AllocFreeDifferentThreadsDifferentStream) { - EXPECT_NO_THROW([this]() { - rmm::cuda_stream streamB; - test_allocate_free_different_threads(this->mr.get(), this->stream, streamB); - streamB.synchronize(); - }()); + rmm::cuda_stream streamB; + test_allocate_free_different_threads(this->mr.get(), this->stream, streamB); + streamB.synchronize(); } } // namespace diff --git a/tests/mr/host/mr_tests.cpp b/tests/mr/host/mr_tests.cpp index 24f52a88a..1cd59f5a6 100644 --- a/tests/mr/host/mr_tests.cpp +++ b/tests/mr/host/mr_tests.cpp @@ -158,10 +158,9 @@ TYPED_TEST(MRTest, RandomAllocations) EXPECT_TRUE(is_aligned(alloc.ptr)); }); - std::for_each( - allocations.begin(), allocations.end(), [generator, distribution, this](allocation& alloc) { - EXPECT_NO_THROW(this->mr->deallocate(alloc.ptr, alloc.size)); - }); + std::for_each(allocations.begin(), allocations.end(), [this](allocation& alloc) { + EXPECT_NO_THROW(this->mr->deallocate(alloc.ptr, alloc.size)); + }); } TYPED_TEST(MRTest, MixedRandomAllocationFree) From 41790ffc71be393875fb0f788011311286845f58 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 16:27:13 +1000 Subject: [PATCH 53/72] p->ptr --- benchmarks/replay/replay.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp index a1355cdb2..4e9bbffcf 100644 --- a/benchmarks/replay/replay.cpp +++ b/benchmarks/replay/replay.cpp @@ -220,7 +220,7 @@ struct replay_benchmark { set_allocation(event.pointer, allocation{ptr, event.size}); } else { auto alloc = remove_allocation(event.pointer); - mr_->deallocate(alloc.p, event.size); + mr_->deallocate(alloc.ptr, event.size); } event_index++; From 514a4f180d0d4c47f15328e0d3fb5a8cf95083aa Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 8 Sep 2021 16:34:17 +1000 Subject: [PATCH 54/72] Revert threshold size type --- include/rmm/mr/device/cuda_async_memory_resource.hpp | 8 ++++---- tests/mr/device/cuda_async_mr_tests.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp index 19d52b16b..13eb99ec8 100644 --- a/include/rmm/mr/device/cuda_async_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp @@ -40,7 +40,6 @@ namespace rmm::mr { */ class cuda_async_memory_resource final : public device_memory_resource { public: - enum release_threshold_size_type : std::size_t {}; /** * @brief Constructs a cuda_async_memory_resource with the optionally specified initial pool size * and release threshold. @@ -55,8 +54,9 @@ class cuda_async_memory_resource final : public device_memory_resource { * @param release_threshold Optional release threshold size in bytes of the pool. If no value is * provided, the release threshold is set to the total amount of memory on the current device. */ - cuda_async_memory_resource(thrust::optional initial_pool_size = {}, - thrust::optional release_threshold = {}) + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) + cuda_async_memory_resource(thrust::optional initial_pool_size = {}, + thrust::optional release_threshold = {}) { #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT // Check if cudaMallocAsync Memory pool supported @@ -78,7 +78,7 @@ class cuda_async_memory_resource final : public device_memory_resource { auto const [free, total] = rmm::detail::available_device_memory(); // Need an l-value to take address to pass to cudaMemPoolSetAttribute - uint64_t threshold = release_threshold.value_or(release_threshold_size_type{total}); + uint64_t threshold = release_threshold.value_or(total); RMM_CUDA_TRY( cudaMemPoolSetAttribute(cuda_pool_handle_, cudaMemPoolAttrReleaseThreshold, &threshold)); diff --git a/tests/mr/device/cuda_async_mr_tests.cpp b/tests/mr/device/cuda_async_mr_tests.cpp index 5a507162c..4bf0c3d5b 100644 --- a/tests/mr/device/cuda_async_mr_tests.cpp +++ b/tests/mr/device/cuda_async_mr_tests.cpp @@ -47,7 +47,7 @@ TEST(PoolTest, ExplicitInitialPoolSize) TEST(PoolTest, ExplicitReleaseThreshold) { const auto pool_init_size{100}; - const auto pool_release_threshold = cuda_async_mr::release_threshold_size_type{1000}; + const auto pool_release_threshold{1000}; cuda_async_mr mr{pool_init_size, pool_release_threshold}; void* ptr = mr.allocate(pool_init_size); mr.deallocate(ptr, pool_init_size); From d3d2b08d9d3eddcfbb30760e7affaea438cd0c3b Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 9 Sep 2021 09:13:37 +1000 Subject: [PATCH 55/72] Disable readability-named-parameter --- .clang-tidy | 5 +++-- benchmarks/utilities/simulated_memory_resource.hpp | 4 ++-- include/rmm/mr/device/cuda_async_memory_resource.hpp | 4 ++-- include/rmm/mr/device/cuda_memory_resource.hpp | 7 +++---- include/rmm/mr/device/managed_memory_resource.hpp | 4 ++-- include/rmm/mr/device/owning_wrapper.hpp | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index b76743aeb..263a2d2d6 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -8,6 +8,7 @@ Checks: 'clang-diagnostic-*, readability-*, llvm-*, -modernize-use-trailing-return-type, + -readability-named-parameter, -cppcoreguidelines-macro-usage' WarningsAsErrors: '' HeaderFilterRegex: '' @@ -54,8 +55,8 @@ CheckOptions: value: 'mr|os' - key: readability-identifier-length.IgnoredVariableNames value: 'mr|_' - - key: readability-function-cognitive-complexity.IgnoreMacros - value: '1' + #- key: readability-function-cognitive-complexity.IgnoreMacros + # value: '1' - key: bugprone-easily-swappable-parameters.IgnoredParameterNames value: 'alignment' ... diff --git a/benchmarks/utilities/simulated_memory_resource.hpp b/benchmarks/utilities/simulated_memory_resource.hpp index 44ee4798c..d8c7cf946 100644 --- a/benchmarks/utilities/simulated_memory_resource.hpp +++ b/benchmarks/utilities/simulated_memory_resource.hpp @@ -77,7 +77,7 @@ class simulated_memory_resource final : public device_memory_resource { * @param bytes The size, in bytes, of the allocation * @return void* Pointer to the newly allocated memory */ - void* do_allocate(std::size_t bytes, cuda_stream_view /*stream*/) override + void* do_allocate(std::size_t bytes, cuda_stream_view) override { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) RMM_EXPECTS(begin_ + bytes <= end_, rmm::bad_alloc, "Simulated memory size exceeded"); @@ -95,7 +95,7 @@ class simulated_memory_resource final : public device_memory_resource { * * @param ptr Pointer to be deallocated */ - void do_deallocate(void* ptr, std::size_t /*bytes*/, cuda_stream_view /*stream*/) override {} + void do_deallocate(void* ptr, std::size_t, cuda_stream_view) override {} /** * @brief Get free and available memory for memory resource. diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp index 13eb99ec8..9111a2da3 100644 --- a/include/rmm/mr/device/cuda_async_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp @@ -165,7 +165,7 @@ class cuda_async_memory_resource final : public device_memory_resource { * * @param p Pointer to be deallocated */ - void do_deallocate(void* ptr, std::size_t /*bytes*/, rmm::cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view stream) override { #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT if (ptr != nullptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeAsync(ptr, stream.value())); } @@ -197,7 +197,7 @@ class cuda_async_memory_resource final : public device_memory_resource { * @return std::pair contaiing free_size and total_size of memory */ [[nodiscard]] std::pair do_get_mem_info( - rmm::cuda_stream_view /*stream*/) const override + rmm::cuda_stream_view) const override { return std::make_pair(0, 0); } diff --git a/include/rmm/mr/device/cuda_memory_resource.hpp b/include/rmm/mr/device/cuda_memory_resource.hpp index 59a729297..b5b3d87df 100644 --- a/include/rmm/mr/device/cuda_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_memory_resource.hpp @@ -64,7 +64,7 @@ class cuda_memory_resource final : public device_memory_resource { * @param bytes The size, in bytes, of the allocation * @return void* Pointer to the newly allocated memory */ - void* do_allocate(std::size_t bytes, cuda_stream_view /*stream*/) override + void* do_allocate(std::size_t bytes, cuda_stream_view) override { void* ptr{nullptr}; RMM_CUDA_TRY(cudaMalloc(&ptr, bytes), rmm::bad_alloc); @@ -80,7 +80,7 @@ class cuda_memory_resource final : public device_memory_resource { * * @param p Pointer to be deallocated */ - void do_deallocate(void* ptr, std::size_t /*bytes*/, cuda_stream_view /*stream*/) override + void do_deallocate(void* ptr, std::size_t, cuda_stream_view) override { RMM_ASSERT_CUDA_SUCCESS(cudaFree(ptr)); } @@ -109,8 +109,7 @@ class cuda_memory_resource final : public device_memory_resource { * * @return std::pair contaiing free_size and total_size of memory */ - [[nodiscard]] std::pair do_get_mem_info( - cuda_stream_view /*stream*/) const override + [[nodiscard]] std::pair do_get_mem_info(cuda_stream_view) const override { std::size_t free_size{}; std::size_t total_size{}; diff --git a/include/rmm/mr/device/managed_memory_resource.hpp b/include/rmm/mr/device/managed_memory_resource.hpp index 3ed44a528..7cce644be 100644 --- a/include/rmm/mr/device/managed_memory_resource.hpp +++ b/include/rmm/mr/device/managed_memory_resource.hpp @@ -64,7 +64,7 @@ class managed_memory_resource final : public device_memory_resource { * @param bytes The size, in bytes, of the allocation * @return void* Pointer to the newly allocated memory */ - void* do_allocate(std::size_t bytes, cuda_stream_view /*stream*/) override + void* do_allocate(std::size_t bytes, cuda_stream_view) override { // FIXME: Unlike cudaMalloc, cudaMallocManaged will throw an error for 0 // size allocations. @@ -84,7 +84,7 @@ class managed_memory_resource final : public device_memory_resource { * * @param ptr Pointer to be deallocated */ - void do_deallocate(void* ptr, std::size_t /*bytes*/, cuda_stream_view /*stream*/) override + void do_deallocate(void* ptr, std::size_t, cuda_stream_view) override { RMM_ASSERT_CUDA_SUCCESS(cudaFree(ptr)); } diff --git a/include/rmm/mr/device/owning_wrapper.hpp b/include/rmm/mr/device/owning_wrapper.hpp index 7bcc3b2fa..17a5b4565 100644 --- a/include/rmm/mr/device/owning_wrapper.hpp +++ b/include/rmm/mr/device/owning_wrapper.hpp @@ -27,7 +27,7 @@ namespace detail { /// Converts a tuple into a parameter pack template auto make_resource_impl(UpstreamTuple const& upstreams, - std::index_sequence /*indices*/, + std::index_sequence, Args&&... args) { return std::make_unique(std::get(upstreams).get()..., From a859235112187616f0edc2f5c2fd91df87d3715c Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 9 Sep 2021 11:02:54 +1000 Subject: [PATCH 56/72] Use C++17 for all of Cython --- python/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/setup.py b/python/setup.py index e05a73e4b..8101ba2ef 100644 --- a/python/setup.py +++ b/python/setup.py @@ -155,7 +155,7 @@ def get_cuda_version_from_header(cuda_include_dir): ], libraries=["cuda", "cudart"], language="c++", - extra_compile_args=["-std=c++14"], + extra_compile_args=["-std=c++17"], ) ], nthreads=nthreads, @@ -178,7 +178,7 @@ def get_cuda_version_from_header(cuda_include_dir): ], libraries=["cuda", "cudart"], language="c++", - extra_compile_args=["-std=c++14"], + extra_compile_args=["-std=c++17"], ) ], nthreads=nthreads, From 0ac66e10ac12fc29d76eb85a696b9cf0442b1629 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 9 Sep 2021 11:03:28 +1000 Subject: [PATCH 57/72] Avoid bounds check exception in `stack_trace` --- include/rmm/detail/stack_trace.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rmm/detail/stack_trace.hpp b/include/rmm/detail/stack_trace.hpp index 2b83aa7cf..1f76af0a4 100644 --- a/include/rmm/detail/stack_trace.hpp +++ b/include/rmm/detail/stack_trace.hpp @@ -53,7 +53,7 @@ class stack_trace { const int MaxStackDepth = 64; std::array stack{}; auto const depth = backtrace(stack.begin(), MaxStackDepth); - stack_ptrs.insert(stack_ptrs.end(), stack.begin(), &stack.at(depth)); + stack_ptrs.insert(stack_ptrs.end(), stack.begin(), stack.begin() + depth); #endif // RMM_ENABLE_STACK_TRACES } From fe1d70de9fbdb4e3b3d1ef4f89cfba8bae94c2ca Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 9 Sep 2021 11:03:50 +1000 Subject: [PATCH 58/72] variable name --- .../multi_stream_allocations_bench.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu index 9b6210a2d..dbcea2a45 100644 --- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu +++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu @@ -117,9 +117,9 @@ inline auto make_binning() return mr; } -static void benchmark_range(benchmark::internal::Benchmark* b) +static void benchmark_range(benchmark::internal::Benchmark* bench) { - b // + bench // ->RangeMultiplier(2) ->Ranges({{1, 4}, {4, 4}, {false, true}}) ->Unit(benchmark::kMicrosecond); @@ -177,6 +177,7 @@ void declare_benchmark(std::string const& name) std::cout << "Error: invalid memory_resource name: " << name << std::endl; } +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) void run_profile(std::string const& resource_name, int kernel_count, int stream_count, bool prewarm) { auto mr_factory = get_mr_factory(resource_name); From fbf1c9c27c3148f87bc9bfae5392863a1cb324d8 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 9 Sep 2021 11:12:51 +1000 Subject: [PATCH 59/72] Revert magic numbers --- benchmarks/device_uvector/device_uvector_bench.cu | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu index 6665ccaa8..7e73451e6 100644 --- a/benchmarks/device_uvector/device_uvector_bench.cu +++ b/benchmarks/device_uvector/device_uvector_bench.cu @@ -40,13 +40,9 @@ static void BM_UvectorSizeConstruction(benchmark::State& state) rmm::mr::set_current_device_resource(nullptr); } -const auto range_multiplier{10}; -const auto range_start{10'000}; -const auto range_end{1'000'000'000}; - BENCHMARK(BM_UvectorSizeConstruction) - ->RangeMultiplier(range_multiplier) - ->Range(range_start, range_end) + ->RangeMultiplier(10) // NOLINT + ->Range(10'000, 1'000'000'000) // NOLINT ->Unit(benchmark::kMicrosecond); static void BM_ThrustVectorSizeConstruction(benchmark::State& state) @@ -66,8 +62,8 @@ static void BM_ThrustVectorSizeConstruction(benchmark::State& state) } BENCHMARK(BM_ThrustVectorSizeConstruction) - ->RangeMultiplier(range_multiplier) - ->Range(range_start, range_end) + ->RangeMultiplier(10) // NOLINT + ->Range(10'000, 1'000'000'000) // NOLINT ->Unit(benchmark::kMicrosecond); BENCHMARK_MAIN(); From 8d743a8e34d11a3c6e3fc140fc182b32e20eca65 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 14 Sep 2021 15:21:43 +1000 Subject: [PATCH 60/72] Fix merge problems --- include/rmm/logger.hpp | 13 +- .../rmm/mr/device/arena_memory_resource.hpp | 151 ++++---- include/rmm/mr/device/detail/arena.hpp | 350 +++++++++--------- .../rmm/mr/host/pinned_memory_resource.hpp | 6 +- 4 files changed, 270 insertions(+), 250 deletions(-) diff --git a/include/rmm/logger.hpp b/include/rmm/logger.hpp index b000b5ef0..43d4b6ea3 100644 --- a/include/rmm/logger.hpp +++ b/include/rmm/logger.hpp @@ -75,16 +75,17 @@ struct logger_wrapper { struct bytes { std::size_t value; - friend std::ostream& operator<<(std::ostream& os, bytes const& b) + friend std::ostream& operator<<(std::ostream& os, bytes const& value) { - std::string const units[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}; - int i = 0; - auto size = static_cast(b.value); + static std::array const units{ + "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}; + int index = 0; + auto size = static_cast(value.value); while (size > 1024) { size /= 1024; - i++; + index++; } - return os << size << ' ' << units[i]; + return os << size << ' ' << units.at(index); } }; diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 28376142c..ce8737225 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -16,17 +16,20 @@ #pragma once #include +#include #include #include #include +#include +#include + #include #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief A suballocator that emphasizes fragmentation avoidance and scalable concurrency support. @@ -87,14 +90,23 @@ class arena_memory_resource final : public device_memory_resource { */ explicit arena_memory_resource(Upstream* upstream_mr, std::size_t initial_size = global_arena::default_initial_size, - std::size_t maximum_size = global_arena::default_maximum_size) - : global_arena_{upstream_mr, initial_size, maximum_size} + std::size_t maximum_size = global_arena::default_maximum_size, + bool dump_log_on_failure = false) + : global_arena_{upstream_mr, initial_size, maximum_size}, + dump_log_on_failure_{dump_log_on_failure} { + if (dump_log_on_failure_) { + logger_ = spdlog::basic_logger_mt("arena_memory_dump", "rmm_arena_memory_dump.log"); + } } + ~arena_memory_resource() override = default; + // Disable copy (and move) semantics. arena_memory_resource(arena_memory_resource const&) = delete; arena_memory_resource& operator=(arena_memory_resource const&) = delete; + arena_memory_resource(arena_memory_resource&&) noexcept = delete; + arena_memory_resource& operator=(arena_memory_resource&&) noexcept = delete; /** * @brief Queries whether the resource supports use of non-null CUDA streams for @@ -130,69 +142,54 @@ class arena_memory_resource final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - if (bytes <= 0) return nullptr; + if (bytes <= 0) { return nullptr; } - bytes = detail::arena::align_up(bytes); - return get_arena(stream).allocate(bytes); + bytes = detail::arena::align_up(bytes); + auto& arena = get_arena(stream); + void* pointer = arena.allocate(bytes); + + if (pointer == nullptr) { + write_lock lock(mtx_); + defragment(); + pointer = arena.allocate(bytes); + if (pointer == nullptr) { + if (dump_log_on_failure_) { dump_memory_log(bytes); } + RMM_FAIL("Maximum pool size exceeded", rmm::bad_alloc); + } + } + + return pointer; } /** - * @brief Deallocate memory pointed to by `p`. + * @brief Deallocate memory pointed to by `ptr`. * - * @param p Pointer to be deallocated. + * @param ptr Pointer to be deallocated. * @param bytes The size in bytes of the allocation. This must be equal to the * value of `bytes` that was passed to the `allocate` call that returned `p`. * @param stream Stream on which to perform deallocation. */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - if (p == nullptr || bytes <= 0) return; + if (ptr == nullptr || bytes <= 0) { return; } bytes = detail::arena::align_up(bytes); -#ifdef RMM_POOL_TRACK_ALLOCATIONS - if (!get_arena(stream).deallocate(p, bytes, stream)) { - deallocate_from_other_arena(p, bytes, stream); - } -#else - get_arena(stream).deallocate(p, bytes, stream); -#endif + get_arena(stream).deallocate(ptr, bytes, stream); } -#ifdef RMM_POOL_TRACK_ALLOCATIONS /** - * @brief Deallocate memory pointed to by `p` that was allocated in a different arena. - * - * @param p Pointer to be deallocated. - * @param bytes The size in bytes of the allocation. This must be equal to the - * value of `bytes` that was passed to the `allocate` call that returned `p`. - * @param stream Stream on which to perform deallocation. + * @brief Defragment memory by returning all free blocks to the global arena. */ - void deallocate_from_other_arena(void* p, std::size_t bytes, cuda_stream_view stream) + void defragment() { - stream.synchronize_no_throw(); - - read_lock lock(mtx_); - - if (use_per_thread_arena(stream)) { - auto const id = std::this_thread::get_id(); - for (auto& kv : thread_arenas_) { - // If the arena does not belong to the current thread, try to deallocate from it, and return - // if successful. - if (kv.first != id && kv.second->deallocate(p, bytes)) return; - } - } else { - for (auto& kv : stream_arenas_) { - // If the arena does not belong to the current stream, try to deallocate from it, and return - // if successful. - if (stream != kv.first && kv.second.deallocate(p, bytes)) return; - } + RMM_CUDA_TRY(cudaDeviceSynchronize()); + for (auto& thread_arena : thread_arenas_) { + thread_arena.second->clean(); + } + for (auto& stream_arena : stream_arenas_) { + stream_arena.second.clean(); } - - // The thread that originally allocated the block has terminated, deallocate directly in the - // global arena. - global_arena_.deallocate({p, bytes}); } -#endif /** * @brief Get the arena associated with the current thread or the given stream. @@ -202,11 +199,8 @@ class arena_memory_resource final : public device_memory_resource { */ arena& get_arena(cuda_stream_view stream) { - if (use_per_thread_arena(stream)) { - return get_thread_arena(); - } else { - return get_stream_arena(stream); - } + if (use_per_thread_arena(stream)) { return get_thread_arena(); } + return get_stream_arena(stream); } /** @@ -216,18 +210,18 @@ class arena_memory_resource final : public device_memory_resource { */ arena& get_thread_arena() { - auto const id = std::this_thread::get_id(); + auto const thread_id = std::this_thread::get_id(); { read_lock lock(mtx_); - auto const it = thread_arenas_.find(id); - if (it != thread_arenas_.end()) { return *it->second; } + auto const iter = thread_arenas_.find(thread_id); + if (iter != thread_arenas_.end()) { return *iter->second; } } { write_lock lock(mtx_); - auto a = std::make_shared(global_arena_); - thread_arenas_.emplace(id, a); - thread_local detail::arena::arena_cleaner cleaner{a}; - return *a; + auto thread_arena = std::make_shared(global_arena_); + thread_arenas_.emplace(thread_id, thread_arena); + thread_local detail::arena::arena_cleaner cleaner{thread_arena}; + return *thread_arena; } } @@ -241,8 +235,8 @@ class arena_memory_resource final : public device_memory_resource { RMM_LOGGING_ASSERT(!use_per_thread_arena(stream)); { read_lock lock(mtx_); - auto const it = stream_arenas_.find(stream.value()); - if (it != stream_arenas_.end()) { return it->second; } + auto const iter = stream_arenas_.find(stream.value()); + if (iter != stream_arenas_.end()) { return iter->second; } } { write_lock lock(mtx_); @@ -262,6 +256,32 @@ class arena_memory_resource final : public device_memory_resource { return std::make_pair(0, 0); } + /** + * Dump memory to log. + * + * @param bytes the number of bytes requested for allocation + */ + void dump_memory_log(size_t bytes) + { + logger_->info("**************************************************"); + logger_->info("Ran out of memory trying to allocate {}.", rmm::detail::bytes{bytes}); + logger_->info("**************************************************"); + logger_->info("Global arena:"); + global_arena_.dump_memory_log(logger_); + logger_->info("Per-thread arenas:"); + for (auto const& thread_arena : thread_arenas_) { + logger_->info(" Thread {}:", thread_arena.first); + thread_arena.second->dump_memory_log(logger_); + } + if (!stream_arenas_.empty()) { + logger_->info("Per-stream arenas:"); + for (auto const& stream_arena : stream_arenas_) { + logger_->info(" Stream {}:", static_cast(stream_arena.first)); + stream_arena.second.dump_memory_log(logger_); + } + } + } + /** * @brief Should a per-thread arena be used given the CUDA stream. * @@ -281,9 +301,12 @@ class arena_memory_resource final : public device_memory_resource { /// Arenas for non-default streams, one per stream. /// Implementation note: for small sizes, map is more efficient than unordered_map. std::map stream_arenas_; + /// If true, dump memory information to log on allocation failure. + bool dump_log_on_failure_; + /// The logger for memory dump. + std::shared_ptr logger_{}; /// Mutex for read and write locks. mutable std::shared_timed_mutex mtx_; }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 7a449949c..6cfe94058 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -19,24 +19,26 @@ #include #include #include +#include #include +#include +#include + #include #include #include #include #include +#include #include #include -namespace rmm { -namespace mr { -namespace detail { -namespace arena { +namespace rmm::mr::detail::arena { /// Minimum size of a superblock (256 KiB). -constexpr std::size_t minimum_superblock_size = 1u << 18u; +constexpr std::size_t minimum_superblock_size = 1U << 18U; /** * @brief Represents a chunk of memory that can be allocated and deallocated. @@ -67,16 +69,16 @@ class block { block(void* pointer, std::size_t size) : pointer_(static_cast(pointer)), size_(size) {} /// Returns the underlying pointer. - void* pointer() const { return pointer_; } + [[nodiscard]] void* pointer() const { return pointer_; } /// Returns the size of the block. - std::size_t size() const { return size_; } + [[nodiscard]] std::size_t size() const { return size_; } /// Returns true if this block is valid (non-null), false otherwise. - bool is_valid() const { return pointer_ != nullptr; } + [[nodiscard]] bool is_valid() const { return pointer_ != nullptr; } /// Returns true if this block is a superblock, false otherwise. - bool is_superblock() const { return size_ >= minimum_superblock_size; } + [[nodiscard]] bool is_superblock() const { return size_ >= minimum_superblock_size; } /** * @brief Verifies whether this block can be merged to the beginning of block b. @@ -85,30 +87,32 @@ class block { * @return true Returns true if this block's `pointer` + `size` == `b.ptr`, and `not b.is_head`, false otherwise. */ - bool is_contiguous_before(block const& b) const { return pointer_ + size_ == b.pointer_; } + [[nodiscard]] bool is_contiguous_before(block const& blk) const + { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return pointer_ + size_ == blk.pointer_; + } /** * @brief Is this block large enough to fit `sz` bytes? * - * @param sz The size in bytes to check for fit. + * @param size The size in bytes to check for fit. * @return true if this block is at least `sz` bytes. */ - bool fits(std::size_t sz) const { return size_ >= sz; } + [[nodiscard]] bool fits(std::size_t size) const { return size_ >= size; } /** * @brief Split this block into two by the given size. * - * @param sz The size in bytes of the first block. + * @param size The size in bytes of the first block. * @return std::pair A pair of blocks split by sz. */ - std::pair split(std::size_t sz) const + [[nodiscard]] std::pair split(std::size_t size) const { RMM_LOGGING_ASSERT(size_ >= sz); - if (size_ > sz) { - return {{pointer_, sz}, {pointer_ + sz, size_ - sz}}; - } else { - return {*this, {}}; - } + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if (size_ > size) { return {{pointer_, size}, {pointer_ + size, size_ - size}}; } + return {*this, {}}; } /** @@ -119,29 +123,31 @@ class block { * @param b block to merge. * @return block The merged block. */ - block merge(block const& b) const + [[nodiscard]] block merge(block const& blk) const { RMM_LOGGING_ASSERT(is_contiguous_before(b)); - return {pointer_, size_ + b.size_}; + return {pointer_, size_ + blk.size_}; } /// Used by std::set to compare blocks. - bool operator<(block const& b) const { return pointer_ < b.pointer_; } + bool operator<(block const& blk) const { return pointer_ < blk.pointer_; } private: char* pointer_{}; ///< Raw memory pointer. std::size_t size_{}; ///< Size in bytes. }; +inline bool block_size_compare(block lhs, block rhs) { return lhs.size() < rhs.size(); } + /** * @brief Align up to the allocation alignment. * * @param[in] v value to align * @return Return the aligned value */ -constexpr std::size_t align_up(std::size_t v) noexcept +constexpr std::size_t align_up(std::size_t value) noexcept { - return rmm::detail::align_up(v, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + return rmm::detail::align_up(value, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); } /** @@ -150,9 +156,9 @@ constexpr std::size_t align_up(std::size_t v) noexcept * @param[in] v value to align * @return Return the aligned value */ -constexpr std::size_t align_down(std::size_t v) noexcept +constexpr std::size_t align_down(std::size_t value) noexcept { - return rmm::detail::align_down(v, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + return rmm::detail::align_down(value, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); } /** @@ -172,24 +178,20 @@ constexpr std::size_t align_down(std::size_t v) noexcept inline block first_fit(std::set& free_blocks, std::size_t size) { auto const iter = std::find_if( - free_blocks.cbegin(), free_blocks.cend(), [size](auto const& b) { return b.fits(size); }); - - if (iter == free_blocks.cend()) { - return {}; - } else { - // Remove the block from the free_list. - auto const b = *iter; - auto const i = free_blocks.erase(iter); - - if (b.size() > size) { - // Split the block and put the remainder back. - auto const split = b.split(size); - free_blocks.insert(i, split.second); - return split.first; - } else { - return b; - } + free_blocks.cbegin(), free_blocks.cend(), [size](auto const& blk) { return blk.fits(size); }); + + if (iter == free_blocks.cend()) { return {}; } + // Remove the block from the free_list. + auto const blk = *iter; + auto const next = free_blocks.erase(iter); + + if (blk.size() > size) { + // Split the block and put the remainder back. + auto const split = blk.split(size); + free_blocks.insert(next, split.second); + return split.first; } + return blk; } /** @@ -199,39 +201,47 @@ inline block first_fit(std::set& free_blocks, std::size_t size) * @param b The block to coalesce. * @return block The coalesced block. */ -inline block coalesce_block(std::set& free_blocks, block const& b) +inline block coalesce_block(std::set& free_blocks, block const& blk) { - if (!b.is_valid()) return b; + if (!blk.is_valid()) { return blk; } // Find the right place (in ascending address order) to insert the block. - auto const next = free_blocks.lower_bound(b); + auto const next = free_blocks.lower_bound(blk); auto const previous = next == free_blocks.cbegin() ? next : std::prev(next); // Coalesce with neighboring blocks. - bool const merge_prev = previous->is_contiguous_before(b); - bool const merge_next = next != free_blocks.cend() && b.is_contiguous_before(*next); + bool const merge_prev = previous->is_contiguous_before(blk); + bool const merge_next = next != free_blocks.cend() && blk.is_contiguous_before(*next); block merged{}; if (merge_prev && merge_next) { - merged = previous->merge(b).merge(*next); + merged = previous->merge(blk).merge(*next); free_blocks.erase(previous); - auto const i = free_blocks.erase(next); - free_blocks.insert(i, merged); + auto const iter = free_blocks.erase(next); + free_blocks.insert(iter, merged); } else if (merge_prev) { - merged = previous->merge(b); - auto const i = free_blocks.erase(previous); - free_blocks.insert(i, merged); + merged = previous->merge(blk); + auto const iter = free_blocks.erase(previous); + free_blocks.insert(iter, merged); } else if (merge_next) { - merged = b.merge(*next); - auto const i = free_blocks.erase(next); - free_blocks.insert(i, merged); + merged = blk.merge(*next); + auto const iter = free_blocks.erase(next); + free_blocks.insert(iter, merged); } else { - free_blocks.emplace(b); - merged = b; + free_blocks.emplace(blk); + merged = blk; } return merged; } +template +inline auto total_block_size(T const& blocks) +{ + return std::accumulate(blocks.cbegin(), blocks.cend(), std::size_t{}, [](auto lhs, auto rhs) { + return lhs + rhs.size(); + }); +} + /** * @brief The global arena for allocating memory from the upstream memory resource. * @@ -248,7 +258,7 @@ class global_arena final { /// The default maximum size for the global arena. static constexpr std::size_t default_maximum_size = std::numeric_limits::max(); /// Reserved memory that should not be allocated (64 MiB). - static constexpr std::size_t reserved_size = 1u << 26u; + static constexpr std::size_t reserved_size = 1U << 26U; /** * @brief Construct a global arena. @@ -275,7 +285,8 @@ class global_arena final { "Error, Maximum arena size required to be a multiple of 256 bytes"); if (initial_size == default_initial_size || maximum_size == default_maximum_size) { - std::size_t free{}, total{}; + std::size_t free{}; + std::size_t total{}; RMM_CUDA_TRY(cudaMemGetInfo(&free, &total)); if (initial_size == default_initial_size) { initial_size = align_up(std::min(free, total / 2)); @@ -290,8 +301,10 @@ class global_arena final { } // Disable copy (and move) semantics. - global_arena(const global_arena&) = delete; - global_arena& operator=(const global_arena&) = delete; + global_arena(global_arena const&) = delete; + global_arena& operator=(global_arena const&) = delete; + global_arena(global_arena&&) noexcept = delete; + global_arena& operator=(global_arena&&) noexcept = delete; /** * @brief Destroy the global arena and deallocate all memory it allocated using the upstream @@ -300,8 +313,8 @@ class global_arena final { ~global_arena() { lock_guard lock(mtx_); - for (auto const& b : upstream_blocks_) { - upstream_mr_->deallocate(b.pointer(), b.size()); + for (auto const& blk : upstream_blocks_) { + upstream_mr_->deallocate(blk.pointer(), blk.size()); } } @@ -320,16 +333,14 @@ class global_arena final { } /** - * @brief Deallocate memory pointed to by `p`. + * @brief Deallocate memory pointed to by `blk`. * - * @param p Pointer to be deallocated. - * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes` - * that was passed to the `allocate` call that returned `p`. + * @param blk Block to be deallocated. */ - void deallocate(block const& b) + void deallocate(block const& blk) { lock_guard lock(mtx_); - coalesce_block(free_blocks_, b); + coalesce_block(free_blocks_, blk); } /** @@ -340,11 +351,37 @@ class global_arena final { void deallocate(std::set const& free_blocks) { lock_guard lock(mtx_); - for (auto const& b : free_blocks) { - coalesce_block(free_blocks_, b); + for (auto const& blk : free_blocks) { + coalesce_block(free_blocks_, blk); } } + /** + * @brief Dump memory to log. + * + * @param logger the spdlog logger to use + */ + void dump_memory_log(std::shared_ptr const& logger) const + { + lock_guard lock(mtx_); + + logger->info(" Maximum size: {}", rmm::detail::bytes{maximum_size_}); + logger->info(" Current size: {}", rmm::detail::bytes{current_size_}); + + logger->info(" # free blocks: {}", free_blocks_.size()); + if (!free_blocks_.empty()) { + logger->info(" Total size of free blocks: {}", + rmm::detail::bytes{total_block_size(free_blocks_)}); + auto const largest_free = + *std::max_element(free_blocks_.begin(), free_blocks_.end(), block_size_compare); + logger->info(" Size of largest free block: {}", rmm::detail::bytes{largest_free.size()}); + } + + logger->info(" # upstream blocks={}", upstream_blocks_.size()); + logger->info(" Total size of upstream blocks: {}", + rmm::detail::bytes{total_block_size(upstream_blocks_)}); + } + private: using lock_guard = std::lock_guard; @@ -357,8 +394,8 @@ class global_arena final { block get_block(std::size_t size) { // Find the first-fit free block. - auto const b = first_fit(free_blocks_, size); - if (b.is_valid()) return b; + auto const blk = first_fit(free_blocks_, size); + if (blk.is_valid()) { return blk; } // No existing larger blocks available, so grow the arena. auto const upstream_block = expand_arena(size_to_grow(size)); @@ -372,13 +409,11 @@ class global_arena final { * This simply grows the global arena to the maximum size. * * @param size The number of bytes required. - * @return size The size for the arena to grow. + * @return size The size for the arena to grow, or 0 if no more memory. */ constexpr std::size_t size_to_grow(std::size_t size) const { - if (current_size_ + size > maximum_size_) { - RMM_FAIL("Maximum pool size exceeded", rmm::bad_alloc); - } + if (current_size_ + size > maximum_size_) { return 0; } return maximum_size_ - current_size_; } @@ -390,9 +425,12 @@ class global_arena final { */ block expand_arena(std::size_t size) { - upstream_blocks_.push_back({upstream_mr_->allocate(size), size}); - current_size_ += size; - return upstream_blocks_.back(); + if (size > 0) { + upstream_blocks_.push_back({upstream_mr_->allocate(size), size}); + current_size_ += size; + return upstream_blocks_.back(); + } + return {}; } /// The upstream resource to allocate memory from. @@ -428,9 +466,13 @@ class arena { */ explicit arena(global_arena& global_arena) : global_arena_{global_arena} {} + ~arena() = default; + // Disable copy (and move) semantics. - arena(const arena&) = delete; - arena& operator=(const arena&) = delete; + arena(arena const&) = delete; + arena& operator=(arena const&) = delete; + arena(arena&&) noexcept = delete; + arena& operator=(arena&&) noexcept = delete; /** * @brief Allocates memory of size at least `bytes`. @@ -443,101 +485,86 @@ class arena { void* allocate(std::size_t bytes) { lock_guard lock(mtx_); - auto const b = get_block(bytes); -#ifdef RMM_POOL_TRACK_ALLOCATIONS - allocated_blocks_.emplace(b.pointer(), b); -#endif - return b.pointer(); + auto const blk = get_block(bytes); + return blk.pointer(); } /** - * @brief Deallocate memory pointed to by `p`, and possibly return superblocks to upstream. + * @brief Deallocate memory pointed to by `ptr`, and possibly return superblocks to upstream. * - * @param p Pointer to be deallocated. + * @param ptr Pointer to be deallocated. * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes` * that was passed to the `allocate` call that returned `p`. * @param stream Stream on which to perform deallocation. - * @return true if the allocation is found, false otherwise. */ - bool deallocate(void* p, std::size_t bytes, cuda_stream_view stream) + void deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) { lock_guard lock(mtx_); -#ifdef RMM_POOL_TRACK_ALLOCATIONS - auto const b = free_block(p, bytes); -#else - block const b{p, bytes}; -#endif - if (b.is_valid()) { - auto const merged = coalesce_block(free_blocks_, b); - shrink_arena(merged, stream); - } - return b.is_valid(); + block const blk{ptr, bytes}; + auto const merged = coalesce_block(free_blocks_, blk); + shrink_arena(merged, stream); } -#ifdef RMM_POOL_TRACK_ALLOCATIONS /** - * @brief Deallocate memory pointed to by `p`, keeping all free superblocks. - * - * This is done when deallocating from another arena. Since we don't have access to the CUDA - * stream associated with this arena, we don't coalesce the freed block and return it directly to - * the global arena. - * - * @param p Pointer to be deallocated. - * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes` - * that was passed to the `allocate` call that returned `p`. - * @return true if the allocation is found, false otherwise. + * @brief Clean the arena and deallocate free blocks from the global arena. */ - bool deallocate(void* p, std::size_t bytes) + void clean() { lock_guard lock(mtx_); - auto const b = free_block(p, bytes); - if (b.is_valid()) { global_arena_.deallocate(b); } - return b.is_valid(); + global_arena_.deallocate(free_blocks_); + free_blocks_.clear(); } -#endif /** - * @brief Clean the arena and deallocate free blocks from the global arena. + * Dump memory to log. * - * This is only needed when a per-thread arena is about to die. + * @param logger the spdlog logger to use */ - void clean() + void dump_memory_log(std::shared_ptr const& logger) const { lock_guard lock(mtx_); - global_arena_.deallocate(free_blocks_); - free_blocks_.clear(); -#ifdef RMM_POOL_TRACK_ALLOCATIONS - allocated_blocks_.clear(); -#endif + logger->info(" # free blocks: {}", free_blocks_.size()); + if (!free_blocks_.empty()) { + logger->info(" Total size of free blocks: {}", + rmm::detail::bytes{total_block_size(free_blocks_)}); + auto const largest_free = + *std::max_element(free_blocks_.begin(), free_blocks_.end(), block_size_compare); + logger->info(" Size of largest free block: {}", rmm::detail::bytes{largest_free.size()}); + } } private: using lock_guard = std::lock_guard; + /// Maximum number of free blocks to keep. + static constexpr int max_free_blocks = 16; /** * @brief Get an available memory block of at least `size` bytes. * * @param size The number of bytes to allocate. - * @return block A block of memory of at least `size` bytes. + * @return A block of memory of at least `size` bytes. */ block get_block(std::size_t size) { if (size < minimum_superblock_size) { // Find the first-fit free block. - auto const b = first_fit(free_blocks_, size); - if (b.is_valid()) { return b; } + auto const blk = first_fit(free_blocks_, size); + if (blk.is_valid()) { return blk; } } // No existing larger blocks available, so grow the arena and obtain a superblock. auto const superblock = expand_arena(size); - coalesce_block(free_blocks_, superblock); - return first_fit(free_blocks_, size); + if (superblock.is_valid()) { + coalesce_block(free_blocks_, superblock); + return first_fit(free_blocks_, size); + } + return superblock; } /** * @brief Allocate space from upstream to supply the arena and return a superblock. * - * @return block A superblock. + * @return A superblock. */ block expand_arena(std::size_t size) { @@ -545,55 +572,25 @@ class arena { return global_arena_.allocate(superblock_size); } -#ifdef RMM_POOL_TRACK_ALLOCATIONS - /** - * @brief Finds, frees and returns the block associated with pointer `p`. - * - * @param p The pointer to the memory to free. - * @param size The size of the memory to free. Must be equal to the original allocation size. - * @return The (now freed) block associated with `p`. The caller is expected to return the block - * to the arena. - */ - block free_block(void* p, std::size_t size) noexcept - { - auto const i = allocated_blocks_.find(p); - - // The pointer may be allocated in another arena. - if (i == allocated_blocks_.end()) { return {}; } - - auto const found = i->second; - RMM_LOGGING_ASSERT(found.size() == size); - allocated_blocks_.erase(i); - - return found; - } -#endif - /** * @brief Shrink this arena by returning free superblocks to upstream. * - * @param b The block that can be used to shrink the arena. + * @param blk The block that can be used to shrink the arena. * @param stream Stream on which to perform shrinking. */ - void shrink_arena(block const& b, cuda_stream_view stream) + void shrink_arena(block const& blk, cuda_stream_view stream) { - // Don't shrink if b is not a superblock. - if (!b.is_superblock()) return; - - stream.synchronize_no_throw(); - - global_arena_.deallocate(b); - free_blocks_.erase(b); + if (blk.is_superblock() || free_blocks_.size() > max_free_blocks) { + stream.synchronize_no_throw(); + global_arena_.deallocate(blk); + free_blocks_.erase(blk); + } } /// The global arena to allocate superblocks from. global_arena& global_arena_; /// Free blocks. std::set free_blocks_; -#ifdef RMM_POOL_TRACK_ALLOCATIONS - //// Map of pointer address to allocated blocks. - std::unordered_map allocated_blocks_; -#endif /// Mutex for exclusive lock. mutable std::mutex mtx_; }; @@ -609,11 +606,13 @@ class arena { template class arena_cleaner { public: - explicit arena_cleaner(std::shared_ptr> const& a) : arena_(a) {} + explicit arena_cleaner(std::shared_ptr> const& arena) : arena_(arena) {} // Disable copy (and move) semantics. - arena_cleaner(const arena_cleaner&) = delete; - arena_cleaner& operator=(const arena_cleaner&) = delete; + arena_cleaner(arena_cleaner const&) = delete; + arena_cleaner& operator=(arena_cleaner const&) = delete; + arena_cleaner(arena_cleaner&&) noexcept = delete; + arena_cleaner& operator=(arena_cleaner&&) = delete; ~arena_cleaner() { @@ -628,7 +627,4 @@ class arena_cleaner { std::weak_ptr> arena_; }; -} // namespace arena -} // namespace detail -} // namespace mr -} // namespace rmm +} // namespace rmm::mr::detail::arena diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp index 42be3644f..14b833684 100644 --- a/include/rmm/mr/host/pinned_memory_resource.hpp +++ b/include/rmm/mr/host/pinned_memory_resource.hpp @@ -62,10 +62,10 @@ class pinned_memory_resource final : public host_memory_resource { (detail::is_supported_alignment(alignment)) ? alignment : detail::RMM_DEFAULT_HOST_ALIGNMENT; return detail::aligned_allocate(bytes, alignment, [](std::size_t size) { - void* p{nullptr}; - auto status = cudaMallocHost(&p, size); + void* ptr{nullptr}; + auto status = cudaMallocHost(&ptr, size); if (cudaSuccess != status) { throw std::bad_alloc{}; } - return p; + return ptr; }); } From 397962b3d91aa56aed4eafe34214f7c706cfb0e0 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 14 Sep 2021 16:44:56 +1000 Subject: [PATCH 61/72] More tidy warnings found on command line --- .clang-tidy | 9 +- .../random_allocations/random_allocations.cpp | 110 +++++++++--------- include/rmm/device_uvector.hpp | 35 +++--- .../rmm/mr/device/binning_memory_resource.hpp | 4 +- include/rmm/mr/device/detail/free_list.hpp | 22 ++-- .../detail/stream_ordered_memory_resource.hpp | 74 ++++++------ .../mr/device/fixed_size_memory_resource.hpp | 2 +- include/rmm/mr/device/owning_wrapper.hpp | 4 +- .../rmm/mr/device/pool_memory_resource.hpp | 3 +- 9 files changed, 144 insertions(+), 119 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 263a2d2d6..04689c330 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -7,9 +7,10 @@ Checks: 'clang-diagnostic-*, performance-*, readability-*, llvm-*, + -cppcoreguidelines-macro-usage, + -llvm-header-guard, -modernize-use-trailing-return-type, - -readability-named-parameter, - -cppcoreguidelines-macro-usage' + -readability-named-parameter' WarningsAsErrors: '' HeaderFilterRegex: '' AnalyzeTemporaryDtors: false @@ -59,4 +60,8 @@ CheckOptions: # value: '1' - key: bugprone-easily-swappable-parameters.IgnoredParameterNames value: 'alignment' + - key: cppcoreguidelines-avoid-magic-numbers.IgnorePowersOf2IntegerValues + value: '1' + - key: readability-magic-numbers.IgnorePowersOf2IntegerValues + value: '1' ... diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index 65abd56b5..828561dd1 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -284,71 +284,73 @@ static void profile_random_allocations(MRFactoryFunc const& factory, int main(int argc, char** argv) { - // benchmark::Initialize will remove GBench command line arguments it - // recognizes and leave any remaining arguments - ::benchmark::Initialize(&argc, argv); - - // Parse for replay arguments: - cxxopts::Options options("RMM Random Allocations Benchmark", - "Benchmarks random allocations within a size range."); - - options.add_options()( - "p,profile", "Profiling mode: run once", cxxopts::value()->default_value("false")); - options.add_options()("r,resource", - "Type of device_memory_resource", - cxxopts::value()->default_value("pool")); - options.add_options()("n,numallocs", - "Number of allocations (default of 0 tests a range)", - cxxopts::value()->default_value("1000")); - options.add_options()("m,maxsize", - "Maximum allocation size (default of 0 tests a range)", - cxxopts::value()->default_value("4096")); - - auto args = options.parse(argc, argv); - num_allocations = args["numallocs"].as(); - max_size = args["maxsize"].as(); - - if (args.count("profile") > 0) { - std::map const funcs({{"arena", &make_arena}, - {"binning", &make_binning}, - {"cuda", &make_cuda}, + try { + // benchmark::Initialize will remove GBench command line arguments it + // recognizes and leave any remaining arguments + ::benchmark::Initialize(&argc, argv); + + // Parse for replay arguments: + cxxopts::Options options("RMM Random Allocations Benchmark", + "Benchmarks random allocations within a size range."); + + options.add_options()( + "p,profile", "Profiling mode: run once", cxxopts::value()->default_value("false")); + options.add_options()("r,resource", + "Type of device_memory_resource", + cxxopts::value()->default_value("pool")); + options.add_options()("n,numallocs", + "Number of allocations (default of 0 tests a range)", + cxxopts::value()->default_value("1000")); + options.add_options()("m,maxsize", + "Maximum allocation size (default of 0 tests a range)", + cxxopts::value()->default_value("4096")); + + auto args = options.parse(argc, argv); + num_allocations = args["numallocs"].as(); + max_size = args["maxsize"].as(); + + if (args.count("profile") > 0) { + std::map const funcs({{"arena", &make_arena}, + {"binning", &make_binning}, + {"cuda", &make_cuda}, #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - {"cuda_async", &make_cuda_async}, + {"cuda_async", &make_cuda_async}, #endif - {"pool", &make_pool}}); - auto resource = args["resource"].as(); + {"pool", &make_pool}}); + auto resource = args["resource"].as(); - std::cout << "Profiling " << resource << " with " << num_allocations << " allocations of max " - << max_size << "B\n"; + std::cout << "Profiling " << resource << " with " << num_allocations << " allocations of max " + << max_size << "B\n"; - try { profile_random_allocations(funcs.at(resource), num_allocations, max_size); - } catch (std::exception const& e) { - std::cout << "Exception caught: " << e.what() << std::endl; - } - std::cout << "Finished\n"; - } else { - if (args.count("numallocs") == 0) { // if zero reset to -1 so we benchmark over a range - num_allocations = -1; - } - if (args.count("maxsize") == 0) { // if zero reset to -1 so we benchmark over a range - max_size = -1; - } - - if (args.count("resource") > 0) { - std::string mr_name = args["resource"].as(); - declare_benchmark(mr_name); + std::cout << "Finished\n"; } else { + if (args.count("numallocs") == 0) { // if zero reset to -1 so we benchmark over a range + num_allocations = -1; + } + if (args.count("maxsize") == 0) { // if zero reset to -1 so we benchmark over a range + max_size = -1; + } + + if (args.count("resource") > 0) { + std::string mr_name = args["resource"].as(); + declare_benchmark(mr_name); + } else { #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - std::vector mrs{"pool", "binning", "arena", "cuda_async", "cuda"}; + std::vector mrs{"pool", "binning", "arena", "cuda_async", "cuda"}; #else - std::vector mrs{"pool", "binning", "arena", "cuda"}; + std::vector mrs{"pool", "binning", "arena", "cuda"}; #endif - std::for_each( - std::cbegin(mrs), std::cend(mrs), [](auto const& mr) { declare_benchmark(mr); }); + std::for_each( + std::cbegin(mrs), std::cend(mrs), [](auto const& mr) { declare_benchmark(mr); }); + } + ::benchmark::RunSpecifiedBenchmarks(); } - ::benchmark::RunSpecifiedBenchmarks(); + + } catch (std::exception const& e) { + std::cout << "Exception caught: " << e.what() << std::endl; } + return 0; } diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp index f7f55a910..49d56a0de 100644 --- a/include/rmm/device_uvector.hpp +++ b/include/rmm/device_uvector.hpp @@ -147,7 +147,7 @@ class device_uvector { * @param element_index Index of the specified element. * @return T* Pointer to the desired element */ - pointer element_ptr(std::size_t element_index) noexcept + [[nodiscard]] pointer element_ptr(std::size_t element_index) noexcept { assert(element_index < size()); return data() + element_index; @@ -161,7 +161,7 @@ class device_uvector { * @param element_index Index of the specified element. * @return T* Pointer to the desired element */ - const_pointer element_ptr(std::size_t element_index) const noexcept + [[nodiscard]] const_pointer element_ptr(std::size_t element_index) const noexcept { assert(element_index < size()); return data() + element_index; @@ -323,7 +323,10 @@ class device_uvector { * @param stream The stream on which to perform the copy * @return The value of the first element */ - value_type front_element(cuda_stream_view stream) const { return element(0, stream); } + [[nodiscard]] value_type front_element(cuda_stream_view stream) const + { + return element(0, stream); + } /** * @brief Returns the last element. @@ -336,7 +339,10 @@ class device_uvector { * @param stream The stream on which to perform the copy * @return The value of the last element */ - value_type back_element(cuda_stream_view stream) const { return element(size() - 1, stream); } + [[nodiscard]] value_type back_element(cuda_stream_view stream) const + { + return element(size() - 1, stream); + } /** * @brief Resizes the vector to contain `new_size` elements. @@ -373,7 +379,7 @@ class device_uvector { * * @return The `device_buffer` used to store the vector elements */ - device_buffer release() noexcept { return std::move(_storage); } + [[nodiscard]] device_buffer release() noexcept { return std::move(_storage); } /** * @brief Returns the number of elements that can be held in currently allocated storage. @@ -394,7 +400,7 @@ class device_uvector { * * @return Raw pointer to element storage in device memory. */ - pointer data() noexcept { return static_cast(_storage.data()); } + [[nodiscard]] pointer data() noexcept { return static_cast(_storage.data()); } /** * @brief Returns const pointer to underlying device storage. @@ -404,7 +410,10 @@ class device_uvector { * * @return const_pointer Raw const pointer to element storage in device memory. */ - const_pointer data() const noexcept { return static_cast(_storage.data()); } + [[nodiscard]] const_pointer data() const noexcept + { + return static_cast(_storage.data()); + } /** * @brief Returns an iterator to the first element. @@ -413,7 +422,7 @@ class device_uvector { * * @return Iterator to the first element. */ - iterator begin() noexcept { return data(); } + [[nodiscard]] iterator begin() noexcept { return data(); } /** * @brief Returns a const_iterator to the first element. @@ -422,7 +431,7 @@ class device_uvector { * * @return Immutable iterator to the first element. */ - const_iterator cbegin() const noexcept { return data(); } + [[nodiscard]] const_iterator cbegin() const noexcept { return data(); } /** * @brief Returns a const_iterator to the first element. @@ -431,7 +440,7 @@ class device_uvector { * * @return Immutable iterator to the first element. */ - const_iterator begin() const noexcept { return cbegin(); } + [[nodiscard]] const_iterator begin() const noexcept { return cbegin(); } /** * @brief Returns an iterator to the element following the last element of the vector. @@ -441,7 +450,7 @@ class device_uvector { * * @return Iterator to one past the last element. */ - iterator end() noexcept { return data() + size(); } + [[nodiscard]] iterator end() noexcept { return data() + size(); } /** * @brief Returns a const_iterator to the element following the last element of the vector. @@ -451,7 +460,7 @@ class device_uvector { * * @return Immutable iterator to one past the last element. */ - const_iterator cend() const noexcept { return data() + size(); } + [[nodiscard]] const_iterator cend() const noexcept { return data() + size(); } /** * @brief Returns an iterator to the element following the last element of the vector. @@ -461,7 +470,7 @@ class device_uvector { * * @return Immutable iterator to one past the last element. */ - const_iterator end() const noexcept { return cend(); } + [[nodiscard]] const_iterator end() const noexcept { return cend(); } /** * @brief Returns the number of elements in the vector. diff --git a/include/rmm/mr/device/binning_memory_resource.hpp b/include/rmm/mr/device/binning_memory_resource.hpp index 46a7e204d..5d4b156c8 100644 --- a/include/rmm/mr/device/binning_memory_resource.hpp +++ b/include/rmm/mr/device/binning_memory_resource.hpp @@ -69,7 +69,7 @@ class binning_memory_resource final : public device_memory_resource { * @param max_size_exponent The maximum base-2 exponent bin size. */ binning_memory_resource(Upstream* upstream_resource, - int8_t min_size_exponent, + int8_t min_size_exponent, // NOLINT(bugprone-easily-swappable-parameters) int8_t max_size_exponent) : upstream_mr_{[upstream_resource]() { RMM_EXPECTS(nullptr != upstream_resource, "Unexpected null upstream pointer."); @@ -113,7 +113,7 @@ class binning_memory_resource final : public device_memory_resource { * * @return UpstreamResource* the upstream memory resource. */ - Upstream* get_upstream() const noexcept { return upstream_mr_; } + [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_mr_; } /** * @brief Add a bin allocator to this resource diff --git a/include/rmm/mr/device/detail/free_list.hpp b/include/rmm/mr/device/detail/free_list.hpp index f249c2f25..e561fe249 100644 --- a/include/rmm/mr/device/detail/free_list.hpp +++ b/include/rmm/mr/device/detail/free_list.hpp @@ -70,20 +70,26 @@ class free_list { using iterator = typename list_type::iterator; using const_iterator = typename list_type::const_iterator; - iterator begin() noexcept { return blocks.begin(); } /// beginning of the free list - const_iterator begin() const noexcept { return blocks.begin(); } /// beginning of the free list - const_iterator cbegin() const noexcept { return blocks.cbegin(); } /// beginning of the free list - - iterator end() noexcept { return blocks.end(); } /// end of the free list - const_iterator end() const noexcept { return blocks.end(); } /// end of the free list - const_iterator cend() const noexcept { return blocks.cend(); } /// end of the free list + /// beginning of the free list + [[nodiscard]] iterator begin() noexcept { return blocks.begin(); } + /// beginning of the free list + [[nodiscard]] const_iterator begin() const noexcept { return blocks.begin(); } + /// beginning of the free list + [[nodiscard]] const_iterator cbegin() const noexcept { return blocks.cbegin(); } + + /// end of the free list + [[nodiscard]] iterator end() noexcept { return blocks.end(); } + /// beginning of the free list + [[nodiscard]] const_iterator end() const noexcept { return blocks.end(); } + /// beginning of the free list + [[nodiscard]] const_iterator cend() const noexcept { return blocks.cend(); } /** * @brief The size of the free list in blocks. * * @return size_type The number of blocks in the free list. */ - size_type size() const noexcept { return blocks.size(); } + [[nodiscard]] size_type size() const noexcept { return blocks.size(); } /** * @brief checks whether the free_list is empty. diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp index f9106e17d..2a726377d 100644 --- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp +++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp @@ -46,8 +46,8 @@ namespace rmm::mr::detail { */ template struct crtp { - T& underlying() { return static_cast(*this); } - T const& underlying() const { return static_cast(*this); } + [[nodiscard]] T& underlying() { return static_cast(*this); } + [[nodiscard]] T const& underlying() const { return static_cast(*this); } }; /** @@ -288,7 +288,7 @@ class stream_ordered_memory_resource : public crtp, public device_ // instance ensures it is destroyed cleaned up only after all are finished with it. thread_local auto event_tls = std::make_shared(); default_stream_events.insert(event_tls); - return stream_event_pair{stream.value(), event_tls.get()->event}; + return stream_event_pair{stream.value(), event_tls->event}; } // We use cudaStreamLegacy as the event map key for the default stream for consistency between // PTDS and non-PTDS mode. In PTDS mode, the cudaStreamLegacy map key will only exist if the @@ -383,45 +383,47 @@ class stream_ordered_memory_resource : public crtp, public device_ free_list& blocks, bool merge_first) { - for (auto it = stream_free_blocks_.begin(), next_it = it; it != stream_free_blocks_.end(); - it = next_it) { - ++next_it; // Points to element after `it` to allow erasing `it` in the loop body - auto other_event = it->first.event; - if (other_event != stream_event.event) { - free_list& other_blocks = it->second; - - block_type const block = [&]() { - if (merge_first) { - merge_lists(stream_event, blocks, other_event, std::move(other_blocks)); - - RMM_LOG_DEBUG("[A][Stream {:p}][{}B][Merged stream {:p}]", - fmt::ptr(stream_event.stream), - size, - fmt::ptr(it->first.stream)); - - stream_free_blocks_.erase(it); - - block_type const block = - blocks.get_block(size); // get the best fit block in merged lists - if (block.is_valid()) { return allocate_and_insert_remainder(block, size, blocks); } - } else { - block_type const block = other_blocks.get_block(size); - if (block.is_valid()) { - // Since we found a block associated with a different stream, we have to insert a wait - // on the stream's associated event into the allocating stream. - RMM_CUDA_TRY(cudaStreamWaitEvent(stream_event.stream, other_event, 0)); - return allocate_and_insert_remainder(block, size, other_blocks); - } - } - return block_type{}; - }(); + auto find_block = [&](auto iter) { + auto other_event = iter->first.event; + auto& other_blocks = iter->second; + if (merge_first) { + merge_lists(stream_event, blocks, other_event, std::move(other_blocks)); + + RMM_LOG_DEBUG("[A][Stream {:p}][{}B][Merged stream {:p}]", + fmt::ptr(stream_event.stream), + size, + fmt::ptr(iter->first.stream)); + + stream_free_blocks_.erase(iter); + + block_type const block = blocks.get_block(size); // get the best fit block in merged lists + if (block.is_valid()) { return allocate_and_insert_remainder(block, size, blocks); } + } else { + block_type const block = other_blocks.get_block(size); + if (block.is_valid()) { + // Since we found a block associated with a different stream, we have to insert a wait + // on the stream's associated event into the allocating stream. + RMM_CUDA_TRY(cudaStreamWaitEvent(stream_event.stream, other_event, 0)); + return allocate_and_insert_remainder(block, size, other_blocks); + } + } + return block_type{}; + }; + + for (auto iter = stream_free_blocks_.begin(), next_iter = iter; + iter != stream_free_blocks_.end(); + iter = next_iter) { + ++next_iter; // Points to element after `iter` to allow erasing `iter` in the loop body + + if (iter->first.event != stream_event.event) { + block_type const block = find_block(iter); if (block.is_valid()) { RMM_LOG_DEBUG((merge_first) ? "[A][Stream {:p}][{}B][Found after merging stream {:p}]" : "[A][Stream {:p}][{}B][Taken from stream {:p}]", fmt::ptr(stream_event.stream), size, - fmt::ptr(it->first.stream)); + fmt::ptr(iter->first.stream)); return block; } } diff --git a/include/rmm/mr/device/fixed_size_memory_resource.hpp b/include/rmm/mr/device/fixed_size_memory_resource.hpp index 6ff02bcc3..4c29881fb 100644 --- a/include/rmm/mr/device/fixed_size_memory_resource.hpp +++ b/include/rmm/mr/device/fixed_size_memory_resource.hpp @@ -81,7 +81,7 @@ class fixed_size_memory_resource * @brief Destroy the `fixed_size_memory_resource` and free all memory allocated from upstream. * */ - ~fixed_size_memory_resource() { release(); } + ~fixed_size_memory_resource() override { release(); } fixed_size_memory_resource() = delete; fixed_size_memory_resource(fixed_size_memory_resource const&) = delete; diff --git a/include/rmm/mr/device/owning_wrapper.hpp b/include/rmm/mr/device/owning_wrapper.hpp index 17a5b4565..977ae0c11 100644 --- a/include/rmm/mr/device/owning_wrapper.hpp +++ b/include/rmm/mr/device/owning_wrapper.hpp @@ -119,13 +119,13 @@ class owning_wrapper : public device_memory_resource { * @brief Returns a constant reference to the wrapped resource. * */ - Resource const& wrapped() const noexcept { return *wrapped_; } + [[nodiscard]] Resource const& wrapped() const noexcept { return *wrapped_; } /** * @brief Returns reference to the wrapped resource. * */ - Resource& wrapped() noexcept { return *wrapped_; } + [[nodiscard]] Resource& wrapped() noexcept { return *wrapped_; } /** * @copydoc rmm::mr::device_memory_resource::supports_streams() diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index e446a9332..526852355 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -99,7 +99,7 @@ class pool_memory_resource final * @brief Destroy the `pool_memory_resource` and deallocate all memory it allocated using * the upstream resource. */ - ~pool_memory_resource() { release(); } + ~pool_memory_resource() override { release(); } pool_memory_resource() = delete; pool_memory_resource(pool_memory_resource const&) = delete; @@ -195,6 +195,7 @@ class pool_memory_resource final * @param initial_size The optional initial size for the pool * @param maximum_size The optional maximum size for the pool */ + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) void initialize_pool(thrust::optional initial_size, thrust::optional maximum_size) { From 87959eccb742376680e94d8b70b477622857c92b Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 15 Sep 2021 12:48:18 +1000 Subject: [PATCH 62/72] Fix potential leak and exception warnings --- benchmarks/replay/replay.cpp | 179 ++++++++++++++++++----------------- 1 file changed, 94 insertions(+), 85 deletions(-) diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp index 4e9bbffcf..aa8c077da 100644 --- a/benchmarks/replay/replay.cpp +++ b/benchmarks/replay/replay.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,7 +45,7 @@ #include /// MR factory functions -std::shared_ptr make_cuda(std::size_t /*unused*/ = 0) +std::shared_ptr make_cuda(std::size_t = 0) { return std::make_shared(); } @@ -57,18 +57,20 @@ std::shared_ptr make_simulated(std::size_t simu inline auto make_pool(std::size_t simulated_size) { - return simulated_size == 0 - ? rmm::mr::make_owning_wrapper(make_cuda()) - : rmm::mr::make_owning_wrapper( - make_simulated(simulated_size), simulated_size, simulated_size); + if (simulated_size > 0) { + return rmm::mr::make_owning_wrapper( + make_simulated(simulated_size), simulated_size, simulated_size); + } + return rmm::mr::make_owning_wrapper(make_cuda()); } inline auto make_arena(std::size_t simulated_size) { - return simulated_size == 0 - ? rmm::mr::make_owning_wrapper(make_cuda()) - : rmm::mr::make_owning_wrapper( - make_simulated(simulated_size), simulated_size, simulated_size); + if (simulated_size > 0) { + return rmm::mr::make_owning_wrapper( + make_simulated(simulated_size), simulated_size, simulated_size); + } + return rmm::mr::make_owning_wrapper(make_cuda()); } inline auto make_binning(std::size_t simulated_size) @@ -325,93 +327,100 @@ void declare_benchmark(std::string const& name, // Usage: REPLAY_BENCHMARK -f "path/to/log/file" int main(int argc, char** argv) { - // benchmark::Initialize will remove GBench command line arguments it - // recognizes and leave any remaining arguments - ::benchmark::Initialize(&argc, argv); - - // Parse for replay arguments: - auto args = [&argc, &argv]() { - cxxopts::Options options( - "RMM Replay Benchmark", - "Replays and benchmarks allocation activity captured from RMM logging."); - - options.add_options()("f,file", "Name of RMM log file.", cxxopts::value()); - options.add_options()("r,resource", - "Type of device_memory_resource", - cxxopts::value()->default_value("pool")); - options.add_options()("s,size", - "Size of simulated GPU memory in GiB. Not supported for the cuda memory " - "resource.", - cxxopts::value()->default_value("0")); - options.add_options()("v,verbose", - "Enable verbose printing of log events", - cxxopts::value()->default_value("false")); - - auto args = options.parse(argc, argv); - - if (args.count("file") == 0) { - std::cout << options.help() << std::endl; - exit(0); - } + try { + // benchmark::Initialize will remove GBench command line arguments it + // recognizes and leave any remaining arguments + ::benchmark::Initialize(&argc, argv); + + // Parse for replay arguments: + auto args = [&argc, &argv]() { + cxxopts::Options options( + "RMM Replay Benchmark", + "Replays and benchmarks allocation activity captured from RMM logging."); + + options.add_options()("f,file", "Name of RMM log file.", cxxopts::value()); + options.add_options()("r,resource", + "Type of device_memory_resource", + cxxopts::value()->default_value("pool")); + options.add_options()( + "s,size", + "Size of simulated GPU memory in GiB. Not supported for the cuda memory " + "resource.", + cxxopts::value()->default_value("0")); + options.add_options()("v,verbose", + "Enable verbose printing of log events", + cxxopts::value()->default_value("false")); + + auto args = options.parse(argc, argv); + + if (args.count("file") == 0) { + std::cout << options.help() << std::endl; + exit(0); + } - return args; - }(); + return args; + }(); - auto filename = args["file"].as(); + auto filename = args["file"].as(); - auto per_thread_events = [filename]() { - try { - auto events = parse_per_thread_events(filename); - return events; - } catch (std::exception const& e) { - std::cout << "Failed to parse events: " << e.what() << std::endl; - return std::vector>{}; - } - }(); + auto per_thread_events = [filename]() { + try { + auto events = parse_per_thread_events(filename); + return events; + } catch (std::exception const& e) { + std::cout << "Failed to parse events: " << e.what() << std::endl; + return std::vector>{}; + } + }(); #ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM - std::cout << "Using CUDA per-thread default stream.\n"; + std::cout << "Using CUDA per-thread default stream.\n"; #endif - auto const simulated_size = - static_cast(args["size"].as() * static_cast(1U << 30U)); - if (simulated_size != 0 && args["resource"].as() != "cuda") { - std::cout << "Simulating GPU with memory size of " << simulated_size << " bytes.\n"; - } + auto const simulated_size = + static_cast(args["size"].as() * static_cast(1U << 30U)); + if (simulated_size != 0 && args["resource"].as() != "cuda") { + std::cout << "Simulating GPU with memory size of " << simulated_size << " bytes.\n"; + } - std::cout << "Total Events: " - << std::accumulate( - per_thread_events.begin(), - per_thread_events.end(), - 0, - [](std::size_t accum, auto const& events) { return accum + events.size(); }) - << std::endl; - - for (std::size_t thread = 0; thread < per_thread_events.size(); ++thread) { - std::cout << "Thread " << thread << ": " << per_thread_events[thread].size() << " events\n"; - if (args["verbose"].as()) { - for (auto const& event : per_thread_events[thread]) { - std::cout << event << std::endl; + std::cout << "Total Events: " + << std::accumulate( + per_thread_events.begin(), + per_thread_events.end(), + 0, + [](std::size_t accum, auto const& events) { return accum + events.size(); }) + << std::endl; + + for (std::size_t thread = 0; thread < per_thread_events.size(); ++thread) { + std::cout << "Thread " << thread << ": " << per_thread_events[thread].size() << " events\n"; + if (args["verbose"].as()) { + for (auto const& event : per_thread_events[thread]) { + std::cout << event << std::endl; + } } } - } - auto const num_threads = per_thread_events.size(); - - // Uncomment to enable / change default log level - // rmm::logger().set_level(spdlog::level::trace); + auto const num_threads = per_thread_events.size(); + + // Uncomment to enable / change default log level + // rmm::logger().set_level(spdlog::level::trace); + + if (args.count("resource") > 0) { + std::string mr_name = args["resource"].as(); + declare_benchmark(mr_name, simulated_size, per_thread_events, num_threads); + } else { + std::array mrs{"pool", "arena", "binning", "cuda"}; + std::for_each(std::cbegin(mrs), + std::cend(mrs), + [&simulated_size, &per_thread_events, &num_threads](auto const& mr) { + declare_benchmark(mr, simulated_size, per_thread_events, num_threads); + }); + } - if (args.count("resource") > 0) { - std::string mr_name = args["resource"].as(); - declare_benchmark(mr_name, simulated_size, per_thread_events, num_threads); - } else { - std::array mrs{"pool", "arena", "binning", "cuda"}; - std::for_each(std::cbegin(mrs), - std::cend(mrs), - [&simulated_size, &per_thread_events, &num_threads](auto const& mr) { - declare_benchmark(mr, simulated_size, per_thread_events, num_threads); - }); + ::benchmark::RunSpecifiedBenchmarks(); + } catch (std::exception const& e) { + std::cout << "Exception caught: " << e.what() << std::endl; } - ::benchmark::RunSpecifiedBenchmarks(); + return 0; } From cca3880da2da41c0f49d0a713750531410c5ed3e Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 15 Sep 2021 13:12:43 +1000 Subject: [PATCH 63/72] Fix uninitialized member clang-tidy warning --- include/rmm/mr/device/detail/free_list.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/rmm/mr/device/detail/free_list.hpp b/include/rmm/mr/device/detail/free_list.hpp index e561fe249..1e8623431 100644 --- a/include/rmm/mr/device/detail/free_list.hpp +++ b/include/rmm/mr/device/detail/free_list.hpp @@ -25,6 +25,9 @@ namespace rmm::mr::detail { struct block_base { void* ptr{}; ///< Raw memory pointer + block_base() = default; + block_base(void* ptr) : ptr{ptr} {}; + /// Returns the raw pointer for this block [[nodiscard]] inline void* pointer() const { return ptr; } /// Returns true if this block is valid (non-null), false otherwise From 496474fecf13c1eeb3d27cd06c08f24c188dc9f0 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 15 Sep 2021 13:34:36 +1000 Subject: [PATCH 64/72] nodiscard --- include/rmm/device_uvector.hpp | 2 +- tests/device_uvector_tests.cpp | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp index 49d56a0de..a52cb2ea8 100644 --- a/include/rmm/device_uvector.hpp +++ b/include/rmm/device_uvector.hpp @@ -301,7 +301,7 @@ class device_uvector { * @param stream The stream on which to perform the copy * @return The value of the specified element */ - value_type element(std::size_t element_index, cuda_stream_view stream) const + [[nodiscard]] value_type element(std::size_t element_index, cuda_stream_view stream) const { RMM_EXPECTS( element_index < size(), rmm::out_of_range, "Attempt to access out of bounds element."); diff --git a/tests/device_uvector_tests.cpp b/tests/device_uvector_tests.cpp index 09aa7e527..00ad66832 100644 --- a/tests/device_uvector_tests.cpp +++ b/tests/device_uvector_tests.cpp @@ -158,7 +158,9 @@ TYPED_TEST(TypedUVectorTest, OOBGetElement) { auto const size{12345}; rmm::device_uvector vec(size, this->stream()); - EXPECT_THROW(vec.element(vec.size() + 1, this->stream()), rmm::out_of_range); + // avoid error due to nodiscard function + auto foo = [&]() { return vec.element(vec.size() + 1, this->stream()); }; + EXPECT_THROW(foo(), rmm::out_of_range); } TYPED_TEST(TypedUVectorTest, GetSetElement) From 6f4d73926ecda54bc4d9cfee5bd55e426fca1625 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 15 Sep 2021 13:35:07 +1000 Subject: [PATCH 65/72] tidying --- .../multi_stream_allocations_bench.cu | 129 ++++++++++-------- 1 file changed, 69 insertions(+), 60 deletions(-) diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu index dbcea2a45..5ed1b31f9 100644 --- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu +++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu @@ -113,7 +113,10 @@ inline auto make_binning() auto pool = make_pool(); // Add a binning_memory_resource with fixed-size bins of sizes 256, 512, 1024, 2048 and 4096KiB // Larger allocations will use the pool resource - auto mr = rmm::mr::make_owning_wrapper(pool, 18, 22); + constexpr auto min_bin_pow2{18}; + constexpr auto max_bin_pow2{22}; + auto mr = rmm::mr::make_owning_wrapper( + pool, min_bin_pow2, max_bin_pow2); return mr; } @@ -191,69 +194,75 @@ void run_profile(std::string const& resource_name, int kernel_count, int stream_ int main(int argc, char** argv) { - ::benchmark::Initialize(&argc, argv); - - // Parse for replay arguments: - cxxopts::Options options( - "RMM Multi Stream Allocations Benchmark", - "Benchmarks interleaving temporary allocations with compute-bound kernels."); - - options.add_options()( // - "p,profile", - "Profiling mode: run once", - cxxopts::value()->default_value("false")); - - options.add_options()( // - "r,resource", - "Type of device_memory_resource", - cxxopts::value()->default_value("pool")); - - options.add_options()( // - "k,kernels", - "Number of kernels to run: (default: 8)", - cxxopts::value()->default_value("8")); - - options.add_options()( // - "s,streams", - "Number of streams in stream pool (default: 8)", - cxxopts::value()->default_value("8")); - - options.add_options()( // - "w,warm", - "Ensure each stream has enough memory to satisfy allocations.", - cxxopts::value()->default_value("false")); - - auto args = options.parse(argc, argv); - - if (args.count("profile") > 0) { - auto resource_name = args["resource"].as(); - auto num_kernels = args["kernels"].as(); - auto num_streams = args["streams"].as(); - auto prewarm = args["warm"].as(); - try { - run_profile(resource_name, num_kernels, num_streams, prewarm); - } catch (std::exception const& e) { - std::cout << "Exception caught: " << e.what() << std::endl; - } - } else { - auto resource_names = std::vector(); - - if (args.count("resource") > 0) { - resource_names.emplace_back(args["resource"].as()); + try { + ::benchmark::Initialize(&argc, argv); + + // Parse for replay arguments: + cxxopts::Options options( + "RMM Multi Stream Allocations Benchmark", + "Benchmarks interleaving temporary allocations with compute-bound kernels."); + + options.add_options()( // + "p,profile", + "Profiling mode: run once", + cxxopts::value()->default_value("false")); + + options.add_options()( // + "r,resource", + "Type of device_memory_resource", + cxxopts::value()->default_value("pool")); + + options.add_options()( // + "k,kernels", + "Number of kernels to run: (default: 8)", + cxxopts::value()->default_value("8")); + + options.add_options()( // + "s,streams", + "Number of streams in stream pool (default: 8)", + cxxopts::value()->default_value("8")); + + options.add_options()( // + "w,warm", + "Ensure each stream has enough memory to satisfy allocations.", + cxxopts::value()->default_value("false")); + + auto args = options.parse(argc, argv); + + if (args.count("profile") > 0) { + auto resource_name = args["resource"].as(); + auto num_kernels = args["kernels"].as(); + auto num_streams = args["streams"].as(); + auto prewarm = args["warm"].as(); + try { + run_profile(resource_name, num_kernels, num_streams, prewarm); + } catch (std::exception const& e) { + std::cout << "Exception caught: " << e.what() << std::endl; + } } else { - resource_names.emplace_back("cuda"); + auto resource_names = std::vector(); + + if (args.count("resource") > 0) { + resource_names.emplace_back(args["resource"].as()); + } else { + resource_names.emplace_back("cuda"); #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - resource_names.emplace_back("cuda_async"); + resource_names.emplace_back("cuda_async"); #endif - resource_names.emplace_back("pool"); - resource_names.emplace_back("arena"); - resource_names.emplace_back("binning"); - } + resource_names.emplace_back("pool"); + resource_names.emplace_back("arena"); + resource_names.emplace_back("binning"); + } - for (auto& resource_name : resource_names) { - declare_benchmark(resource_name); - } + for (auto& resource_name : resource_names) { + declare_benchmark(resource_name); + } - ::benchmark::RunSpecifiedBenchmarks(); + ::benchmark::RunSpecifiedBenchmarks(); + } + } catch (std::exception const& e) { + std::cout << "Exception caught: " << e.what() << std::endl; } + + return 0; } From 8459c72bbbedee03d98474bb9697e06eafec6468 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 15 Sep 2021 14:39:04 +1000 Subject: [PATCH 66/72] nodiscard --- include/rmm/mr/device/limiting_resource_adaptor.hpp | 2 +- include/rmm/mr/device/polymorphic_allocator.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp index 810228715..c2a4dfe35 100644 --- a/include/rmm/mr/device/limiting_resource_adaptor.hpp +++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp @@ -69,7 +69,7 @@ class limiting_resource_adaptor final : public device_memory_resource { * * @return Upstream* Pointer to the upstream resource. */ - Upstream* get_upstream() const noexcept { return upstream_; } + [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; } /** * @brief Checks whether the upstream resource supports streams. diff --git a/include/rmm/mr/device/polymorphic_allocator.hpp b/include/rmm/mr/device/polymorphic_allocator.hpp index 643d1b6fb..5c87ef7f7 100644 --- a/include/rmm/mr/device/polymorphic_allocator.hpp +++ b/include/rmm/mr/device/polymorphic_allocator.hpp @@ -221,7 +221,7 @@ class stream_allocator_adaptor { * @brief Returns the underlying stream-ordered allocator * */ - Allocator underlying_allocator() const noexcept { return alloc_; } + [[nodiscard]] Allocator underlying_allocator() const noexcept { return alloc_; } private: Allocator alloc_; ///< Underlying allocator used for (de)allocation From 5b9159823df0c226b513a766b8bb01915bc16579 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 15 Sep 2021 14:39:16 +1000 Subject: [PATCH 67/72] nolints --- tests/cuda_stream_tests.cpp | 2 +- tests/device_buffer_tests.cu | 22 ++++++++++++---------- tests/device_scalar_tests.cpp | 5 +++-- tests/device_uvector_tests.cpp | 14 +++++++------- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/tests/cuda_stream_tests.cpp b/tests/cuda_stream_tests.cpp index 96cae868e..f801226c6 100644 --- a/tests/cuda_stream_tests.cpp +++ b/tests/cuda_stream_tests.cpp @@ -47,7 +47,7 @@ TEST_F(CudaStreamTest, MoveConstructor) rmm::cuda_stream stream_a; auto const view_a = stream_a.view(); rmm::cuda_stream stream_b = std::move(stream_a); - // NOLINTNEXTLINE(bugprone-use-after-move) + // NOLINTNEXTLINE(bugprone-use-after-move, clang-analyzer-cplusplus.Move) EXPECT_FALSE(stream_a.is_valid()); // Any other operations on stream_a are UB, may segfault EXPECT_EQ(stream_b, view_a); } diff --git a/tests/device_buffer_tests.cu b/tests/device_buffer_tests.cu index ff71dfba1..2f8d34bad 100644 --- a/tests/device_buffer_tests.cu +++ b/tests/device_buffer_tests.cu @@ -293,9 +293,10 @@ TYPED_TEST(DeviceBufferTest, MoveConstructor) EXPECT_EQ(mr, buff_new.memory_resource()); // Original buffer should be empty - EXPECT_EQ(nullptr, buff.data()); // NOLINT(bugprone-use-after-move) - EXPECT_EQ(0, buff.size()); // NOLINT(bugprone-use-after-move) - EXPECT_EQ(0, buff.capacity()); // NOLINT(bugprone-use-after-move) + EXPECT_EQ(nullptr, + buff.data()); // NOLINT(bugprone-use-after-move, clang-analyzer-cplusplus.Move) + EXPECT_EQ(0, buff.size()); // NOLINT(bugprone-use-after-move) + EXPECT_EQ(0, buff.capacity()); // NOLINT(bugprone-use-after-move) EXPECT_EQ(rmm::cuda_stream_default, buff.stream()); // NOLINT(bugprone-use-after-move) EXPECT_NE(nullptr, buff.memory_resource()); // NOLINT(bugprone-use-after-move) } @@ -321,9 +322,10 @@ TYPED_TEST(DeviceBufferTest, MoveConstructorStream) EXPECT_EQ(mr, buff_new.memory_resource()); // Original buffer should be empty - EXPECT_EQ(nullptr, buff.data()); // NOLINT(bugprone-use-after-move) - EXPECT_EQ(0, buff.size()); // NOLINT(bugprone-use-after-move) - EXPECT_EQ(0, buff.capacity()); // NOLINT(bugprone-use-after-move) + EXPECT_EQ(nullptr, + buff.data()); // NOLINT(bugprone-use-after-move, clang-analyzer-cplusplus.Move) + EXPECT_EQ(0, buff.size()); // NOLINT(bugprone-use-after-move) + EXPECT_EQ(0, buff.capacity()); // NOLINT(bugprone-use-after-move) EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream()); // NOLINT(bugprone-use-after-move) EXPECT_NE(nullptr, buff.memory_resource()); // NOLINT(bugprone-use-after-move) } @@ -349,7 +351,7 @@ TYPED_TEST(DeviceBufferTest, MoveAssignmentToDefault) EXPECT_EQ(mr, dest.memory_resource()); // `from` should be empty - EXPECT_EQ(nullptr, src.data()); + EXPECT_EQ(nullptr, src.data()); // NOLINT(bugprone-use-after-move,clang-analyzer-cplusplus.Move) EXPECT_EQ(0, src.size()); EXPECT_EQ(0, src.capacity()); EXPECT_EQ(rmm::cuda_stream_default, src.stream()); @@ -377,7 +379,7 @@ TYPED_TEST(DeviceBufferTest, MoveAssignment) EXPECT_EQ(mr, dest.memory_resource()); // `from` should be empty - EXPECT_EQ(nullptr, src.data()); + EXPECT_EQ(nullptr, src.data()); // NOLINT(bugprone-use-after-move,clang-analyzer-cplusplus.Move) EXPECT_EQ(0, src.size()); EXPECT_EQ(0, src.capacity()); EXPECT_EQ(rmm::cuda_stream_default, src.stream()); @@ -393,8 +395,8 @@ TYPED_TEST(DeviceBufferTest, SelfMoveAssignment) auto* mr = buff.memory_resource(); auto stream = buff.stream(); - buff = std::move(buff); // self-move-assignment shouldn't modify the buffer - EXPECT_NE(nullptr, buff.data()); + buff = std::move(buff); // self-move-assignment shouldn't modify the buffer + EXPECT_NE(nullptr, buff.data()); // NOLINT(bugprone-use-after-move,clang-analyzer-cplusplus.Move) EXPECT_EQ(ptr, buff.data()); EXPECT_EQ(size, buff.size()); EXPECT_EQ(capacity, buff.capacity()); diff --git a/tests/device_scalar_tests.cpp b/tests/device_scalar_tests.cpp index 65f963132..63c471094 100644 --- a/tests/device_scalar_tests.cpp +++ b/tests/device_scalar_tests.cpp @@ -93,13 +93,14 @@ TYPED_TEST(DeviceScalarTest, MoveCtor) EXPECT_NE(nullptr, scalar.data()); EXPECT_EQ(this->value, scalar.value(this->stream)); - auto original_pointer = scalar.data(); - auto original_value = scalar.value(this->stream); + auto* original_pointer = scalar.data(); + auto original_value = scalar.value(this->stream); rmm::device_scalar moved_to{std::move(scalar)}; EXPECT_NE(nullptr, moved_to.data()); EXPECT_EQ(moved_to.data(), original_pointer); EXPECT_EQ(moved_to.value(this->stream), original_value); + // NOLINTNEXTLINE(bugprone-use-after-move,clang-analyzer-cplusplus.Move) EXPECT_EQ(nullptr, scalar.data()); } diff --git a/tests/device_uvector_tests.cpp b/tests/device_uvector_tests.cpp index 00ad66832..dce2cbbf6 100644 --- a/tests/device_uvector_tests.cpp +++ b/tests/device_uvector_tests.cpp @@ -65,8 +65,8 @@ TYPED_TEST(TypedUVectorTest, ResizeSmaller) { auto const original_size{12345}; rmm::device_uvector vec(original_size, this->stream()); - auto original_data = vec.data(); - auto original_begin = vec.begin(); + auto* original_data = vec.data(); + auto* original_begin = vec.begin(); auto smaller_size = vec.size() - 1; vec.resize(smaller_size, this->stream()); @@ -86,8 +86,8 @@ TYPED_TEST(TypedUVectorTest, ResizeLarger) { auto const original_size{12345}; rmm::device_uvector vec(original_size, this->stream()); - auto original_data = vec.data(); - auto original_begin = vec.begin(); + auto* original_data = vec.data(); + auto* original_begin = vec.begin(); auto larger_size = vec.size() + 1; vec.resize(larger_size, this->stream()); @@ -97,8 +97,8 @@ TYPED_TEST(TypedUVectorTest, ResizeLarger) EXPECT_EQ(vec.size(), larger_size); EXPECT_EQ(vec.capacity(), larger_size); - auto larger_data = vec.data(); - auto larger_begin = vec.begin(); + auto* larger_data = vec.data(); + auto* larger_begin = vec.begin(); // shrink_to_fit shouldn't have any effect vec.shrink_to_fit(this->stream()); @@ -127,7 +127,7 @@ TYPED_TEST(TypedUVectorTest, Release) auto const original_size{12345}; rmm::device_uvector vec(original_size, this->stream()); - auto original_data = vec.data(); + auto* original_data = vec.data(); rmm::device_buffer storage = vec.release(); From 3bfaa55184c145b14b57b765925c8c63bc008e00 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 15 Sep 2021 14:39:31 +1000 Subject: [PATCH 68/72] nodiscard --- include/rmm/device_scalar.hpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index ff8461599..099abc08e 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -147,7 +147,10 @@ class device_scalar { * @return T The value of the scalar. * @param stream CUDA stream on which to perform the copy and synchronize. */ - value_type value(cuda_stream_view stream) const { return _storage.front_element(stream); } + [[nodiscard]] value_type value(cuda_stream_view stream) const + { + return _storage.front_element(stream); + } /** * @brief Sets the value of the `device_scalar` to the value of `v`. @@ -222,7 +225,7 @@ class device_scalar { * streams (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`), otherwise there may * be a race condition. */ - pointer data() noexcept { return static_cast(_storage.data()); } + [[nodiscard]] pointer data() noexcept { return static_cast(_storage.data()); } /** * @brief Returns const pointer to object in device memory. @@ -232,7 +235,10 @@ class device_scalar { * streams (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`), otherwise there may * be a race condition. */ - const_pointer data() const noexcept { return static_cast(_storage.data()); } + [[nodiscard]] const_pointer data() const noexcept + { + return static_cast(_storage.data()); + } private: rmm::device_uvector _storage; From b7ea38c9ce312140f0eb1ef66828afe285b044ce Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 15 Sep 2021 14:39:47 +1000 Subject: [PATCH 69/72] nodiscard and nolint --- .../rmm/mr/device/logging_resource_adaptor.hpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp index 0bb707a6c..26448f887 100644 --- a/include/rmm/mr/device/logging_resource_adaptor.hpp +++ b/include/rmm/mr/device/logging_resource_adaptor.hpp @@ -123,7 +123,7 @@ class logging_resource_adaptor final : public device_memory_resource { * * @return Upstream* Pointer to the upstream resource. */ - Upstream* get_upstream() const noexcept { return upstream_; } + [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; } /** * @brief Checks whether the upstream resource supports streams. @@ -162,12 +162,6 @@ class logging_resource_adaptor final : public device_memory_resource { } private: - // make_logging_adaptor needs access to private get_default_filename - template - friend logging_resource_adaptor make_logging_adaptor(T* upstream, - std::string const& filename, - bool auto_flush); - /** * @brief Return the value of the environment variable RMM_LOG_FILE. * @@ -273,6 +267,13 @@ class logging_resource_adaptor final : public device_memory_resource { return upstream_->get_mem_info(stream); } + // make_logging_adaptor needs access to private get_default_filename + template + // NOLINTNEXTLINE(readability-redundant-declaration) + friend logging_resource_adaptor make_logging_adaptor(T* upstream, + std::string const& filename, + bool auto_flush); + std::shared_ptr logger_; ///< spdlog logger object Upstream* upstream_; ///< The upstream resource used for satisfying From 098e08ad42e9762a4d6d8bf0ae598bd372126809 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 15 Sep 2021 14:40:21 +1000 Subject: [PATCH 70/72] cmake style --- cmake/thirdparty/get_gtest.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/thirdparty/get_gtest.cmake b/cmake/thirdparty/get_gtest.cmake index 8473fdbf2..a515b4e5a 100644 --- a/cmake/thirdparty/get_gtest.cmake +++ b/cmake/thirdparty/get_gtest.cmake @@ -12,7 +12,6 @@ # the License. # ============================================================================= - function(find_and_configure_gtest) include(${rapids-cmake-dir}/cpm/gtest.cmake) rapids_cpm_gtest() From c1f98725aad95006e009d6451999b5d9c4b025a6 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 15 Sep 2021 14:55:01 +1000 Subject: [PATCH 71/72] cmake docstring --- cmake/thirdparty/get_gtest.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/thirdparty/get_gtest.cmake b/cmake/thirdparty/get_gtest.cmake index a515b4e5a..4d4daff44 100644 --- a/cmake/thirdparty/get_gtest.cmake +++ b/cmake/thirdparty/get_gtest.cmake @@ -12,6 +12,7 @@ # the License. # ============================================================================= +# Use CPM to find or clone gtest function(find_and_configure_gtest) include(${rapids-cmake-dir}/cpm/gtest.cmake) rapids_cpm_gtest() From ce3b3edcbb5538a2e7a0f41497b6f367fa5a5ae5 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 16 Sep 2021 09:59:11 +1000 Subject: [PATCH 72/72] Fix device_uvector::set_element_async for non-fundamental types. --- include/rmm/device_uvector.hpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp index a52cb2ea8..b4b12c824 100644 --- a/include/rmm/device_uvector.hpp +++ b/include/rmm/device_uvector.hpp @@ -209,15 +209,22 @@ class device_uvector { { RMM_EXPECTS( element_index < size(), rmm::out_of_range, "Attempt to access out of bounds element."); + if constexpr (std::is_same::value) { RMM_CUDA_TRY( cudaMemsetAsync(element_ptr(element_index), value, sizeof(value), stream.value())); - } else if (std::is_fundamental::value and value == value_type{0}) { - set_element_to_zero_async(element_index, stream); - } else { - RMM_CUDA_TRY(cudaMemcpyAsync( - element_ptr(element_index), &value, sizeof(value), cudaMemcpyDefault, stream.value())); + return; + } + + if constexpr (std::is_fundamental::value) { + if (value == value_type{0}) { + set_element_to_zero_async(element_index, stream); + return; + } } + + RMM_CUDA_TRY(cudaMemcpyAsync( + element_ptr(element_index), &value, sizeof(value), cudaMemcpyDefault, stream.value())); } // We delete the r-value reference overload to prevent asynchronously copying from a literal or @@ -379,7 +386,7 @@ class device_uvector { * * @return The `device_buffer` used to store the vector elements */ - [[nodiscard]] device_buffer release() noexcept { return std::move(_storage); } + device_buffer release() noexcept { return std::move(_storage); } /** * @brief Returns the number of elements that can be held in currently allocated storage.