From debe302290c8520265167f952a9978a2c3672eda Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 25 Aug 2021 13:26:14 +1000
Subject: [PATCH 01/72] Clean up: use std::size_t, include cstddef and
 aligned.hpp where missing

---
 include/rmm/cuda_stream_pool.hpp              |  5 +--
 include/rmm/detail/stack_trace.hpp            |  3 +-
 include/rmm/device_buffer.hpp                 |  2 ++
 include/rmm/device_uvector.hpp                |  1 +
 include/rmm/exec_policy.hpp                   |  2 +-
 .../mr/device/aligned_resource_adaptor.hpp    | 12 ++++---
 .../rmm/mr/device/arena_memory_resource.hpp   |  3 +-
 .../mr/device/cuda_async_memory_resource.hpp  |  8 +++--
 .../rmm/mr/device/cuda_memory_resource.hpp    |  8 +++--
 include/rmm/mr/device/detail/arena.hpp        |  9 ++---
 .../mr/device/detail/coalescing_free_list.hpp | 20 ++++++-----
 .../mr/device/detail/fixed_size_free_list.hpp |  5 +--
 include/rmm/mr/device/detail/free_list.hpp    |  4 +--
 .../detail/stream_ordered_memory_resource.hpp | 33 ++++++++++---------
 .../mr/device/fixed_size_memory_resource.hpp  | 11 ++++---
 .../mr/device/limiting_resource_adaptor.hpp   |  8 +++--
 .../mr/device/logging_resource_adaptor.hpp    | 16 +++------
 .../rmm/mr/device/managed_memory_resource.hpp |  8 +++--
 include/rmm/mr/device/owning_wrapper.hpp      |  4 +--
 .../rmm/mr/device/polymorphic_allocator.hpp   |  9 ++---
 .../rmm/mr/device/pool_memory_resource.hpp    | 25 +++++++-------
 .../mr/device/statistics_resource_adaptor.hpp |  6 ++--
 .../device/thread_safe_resource_adaptor.hpp   |  5 +--
 .../mr/device/thrust_allocator_adaptor.hpp    |  8 ++---
 .../mr/device/tracking_resource_adaptor.hpp   |  8 +++--
 include/rmm/mr/host/new_delete_resource.hpp   |  4 +--
 .../rmm/mr/host/pinned_memory_resource.hpp    |  3 +-
 27 files changed, 130 insertions(+), 100 deletions(-)
diff --git a/include/rmm/cuda_stream_pool.hpp b/include/rmm/cuda_stream_pool.hpp
index 2e77f2047..27decc9fd 100644
--- a/include/rmm/cuda_stream_pool.hpp
+++ b/include/rmm/cuda_stream_pool.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
 #include <rmm/detail/error.hpp>
 
 #include <atomic>
+#include <cstddef>
 #include <vector>
 
 namespace rmm {
@@ -84,7 +85,7 @@ class cuda_stream_pool {
    *
    * @return the number of streams in the pool
    */
-  size_t get_pool_size() const noexcept { return streams_.size(); }
+  std::size_t get_pool_size() const noexcept { return streams_.size(); }
 
  private:
   std::vector<rmm::cuda_stream> streams_;
diff --git a/include/rmm/detail/stack_trace.hpp b/include/rmm/detail/stack_trace.hpp
index 358e95080..1e218fa53 100644
--- a/include/rmm/detail/stack_trace.hpp
+++ b/include/rmm/detail/stack_trace.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,6 +28,7 @@
 #include <cxxabi.h>
 #include <dlfcn.h>
 #include <execinfo.h>
+#include <cstddef>
 #include <memory>
 #include <vector>
 #endif
diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp
index ad8655180..f034b28fe 100644
--- a/include/rmm/device_buffer.hpp
+++ b/include/rmm/device_buffer.hpp
@@ -21,7 +21,9 @@
 #include <rmm/mr/device/per_device_resource.hpp>
 
 #include <cuda_runtime_api.h>
+
 #include <cassert>
+#include <cstddef>
 #include <stdexcept>
 #include <utility>
 
diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp
index 537801658..ca4cf6d30 100644
--- a/include/rmm/device_uvector.hpp
+++ b/include/rmm/device_uvector.hpp
@@ -23,6 +23,7 @@
 #include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
 
+#include <cstddef>
 #include <vector>
 
 namespace rmm {
diff --git a/include/rmm/exec_policy.hpp b/include/rmm/exec_policy.hpp
index d984f0b47..98cd91cd4 100644
--- a/include/rmm/exec_policy.hpp
+++ b/include/rmm/exec_policy.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/include/rmm/mr/device/aligned_resource_adaptor.hpp b/include/rmm/mr/device/aligned_resource_adaptor.hpp
index adb668395..4e29b90b3 100644
--- a/include/rmm/mr/device/aligned_resource_adaptor.hpp
+++ b/include/rmm/mr/device/aligned_resource_adaptor.hpp
@@ -15,15 +15,16 @@
  */
 #pragma once
 
-#include <mutex>
-#include <optional>
-#include <unordered_map>
-
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/aligned.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
+#include <cstddef>
+#include <mutex>
+#include <optional>
+#include <unordered_map>
+
 namespace rmm::mr {
 /**
  * @brief Resource that adapts `Upstream` memory resource to allocate memory in a specified
@@ -195,7 +196,8 @@ class aligned_resource_adaptor final : public device_memory_resource {
    * @param stream Stream on which to get the mem info.
    * @return std::pair containing free_size and total_size of memory
    */
-  [[nodiscard]] std::pair<size_t, size_t> do_get_mem_info(cuda_stream_view stream) const override
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
+    cuda_stream_view stream) const override
   {
     return upstream_->get_mem_info(stream);
   }
diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp
index 84bb9b129..28376142c 100644
--- a/include/rmm/mr/device/arena_memory_resource.hpp
+++ b/include/rmm/mr/device/arena_memory_resource.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
 
 #include <cuda_runtime_api.h>
 
+#include <cstddef>
 #include <map>
 #include <shared_mutex>
 
diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp
index cb7bd004f..9dab79893 100644
--- a/include/rmm/mr/device/cuda_async_memory_resource.hpp
+++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp
@@ -15,16 +15,18 @@
  */
 #pragma once
 
-#include <limits>
 #include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/cuda_util.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
+#include <thrust/optional.h>
+
 #include <cuda_runtime_api.h>
 
-#include <thrust/optional.h>
+#include <cstddef>
+#include <limits>
 
 #if CUDART_VERSION >= 11020  // 11.2 introduced cudaMallocAsync
 #define RMM_CUDA_MALLOC_ASYNC_SUPPORT
@@ -194,7 +196,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
    *
    * @return std::pair contaiing free_size and total_size of memory
    */
-  std::pair<size_t, size_t> do_get_mem_info(rmm::cuda_stream_view) const override
+  std::pair<std::size_t, std::size_t> do_get_mem_info(rmm::cuda_stream_view) const override
   {
     return std::make_pair(0, 0);
   }
diff --git a/include/rmm/mr/device/cuda_memory_resource.hpp b/include/rmm/mr/device/cuda_memory_resource.hpp
index f3ebf7e93..d419ce335 100644
--- a/include/rmm/mr/device/cuda_memory_resource.hpp
+++ b/include/rmm/mr/device/cuda_memory_resource.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,11 +15,13 @@
  */
 #pragma once
 
-#include "device_memory_resource.hpp"
+#include <rmm/mr/device/device_memory_resource.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
 
+#include <cstddef>
+
 namespace rmm {
 namespace mr {
 /**
@@ -108,7 +110,7 @@ class cuda_memory_resource final : public device_memory_resource {
    *
    * @return std::pair contaiing free_size and total_size of memory
    */
-  std::pair<size_t, size_t> do_get_mem_info(cuda_stream_view) const override
+  std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view) const override
   {
     std::size_t free_size;
     std::size_t total_size;
diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp
index 3d29b361e..7a449949c 100644
--- a/include/rmm/mr/device/detail/arena.hpp
+++ b/include/rmm/mr/device/detail/arena.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@
 #include <cuda_runtime_api.h>
 
 #include <algorithm>
+#include <cstddef>
 #include <limits>
 #include <memory>
 #include <mutex>
@@ -55,7 +56,7 @@ class block {
    * @param pointer The address for the beginning of the block.
    * @param size The size of the block.
    */
-  block(char* pointer, size_t size) : pointer_(pointer), size_(size) {}
+  block(char* pointer, std::size_t size) : pointer_(pointer), size_(size) {}
 
   /**
    * @brief Construct a block given a void pointer and size.
@@ -63,13 +64,13 @@ class block {
    * @param pointer The address for the beginning of the block.
    * @param size The size of the block.
    */
-  block(void* pointer, size_t size) : pointer_(static_cast<char*>(pointer)), size_(size) {}
+  block(void* pointer, std::size_t size) : pointer_(static_cast<char*>(pointer)), size_(size) {}
 
   /// Returns the underlying pointer.
   void* pointer() const { return pointer_; }
 
   /// Returns the size of the block.
-  size_t size() const { return size_; }
+  std::size_t size() const { return size_; }
 
   /// Returns true if this block is valid (non-null), false otherwise.
   bool is_valid() const { return pointer_ != nullptr; }
diff --git a/include/rmm/mr/device/detail/coalescing_free_list.hpp b/include/rmm/mr/device/detail/coalescing_free_list.hpp
index 6050f4f6f..6d7aa9612 100644
--- a/include/rmm/mr/device/detail/coalescing_free_list.hpp
+++ b/include/rmm/mr/device/detail/coalescing_free_list.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
 
 #include <algorithm>
 #include <cassert>
+#include <cstddef>
 #include <iostream>
 #include <list>
 
@@ -35,7 +36,10 @@ namespace detail {
  */
 struct block : public block_base {
   block() = default;
-  block(char* ptr, size_t size, bool is_head) : block_base{ptr}, size_bytes{size}, head{is_head} {}
+  block(char* ptr, std::size_t size, bool is_head)
+    : block_base{ptr}, size_bytes{size}, head{is_head}
+  {
+  }
 
   /**
    * @brief Returns the pointer to the memory represented by this block.
@@ -49,7 +53,7 @@ struct block : public block_base {
    *
    * @return the size in bytes of the memory represented by this block.
    */
-  inline size_t size() const { return size_bytes; }
+  inline std::size_t size() const { return size_bytes; }
 
   /**
    * @brief Returns whether this block is the start of an allocation from an upstream allocator.
@@ -104,7 +108,7 @@ struct block : public block_base {
    * @param sz The size in bytes to check for fit.
    * @return true if this block is at least `sz` bytes
    */
-  inline bool fits(size_t sz) const noexcept { return size() >= sz; }
+  inline bool fits(std::size_t sz) const noexcept { return size() >= sz; }
 
   /**
    * @brief Is this block a better fit for `sz` bytes than block `b`?
@@ -114,7 +118,7 @@ struct block : public block_base {
    * @return true If this block is a tighter fit for `sz` bytes than block `b`.
    * @return false If this block does not fit `sz` bytes or `b` is a tighter fit.
    */
-  inline bool is_better_fit(size_t sz, block const& b) const noexcept
+  inline bool is_better_fit(std::size_t sz, block const& b) const noexcept
   {
     return fits(sz) && (size() < b.size() || b.size() < sz);
   }
@@ -128,8 +132,8 @@ struct block : public block_base {
   }
 
  private:
-  size_t size_bytes{};  ///< Size in bytes
-  bool head{};          ///< Indicates whether ptr was allocated from the heap
+  std::size_t size_bytes{};  ///< Size in bytes
+  bool head{};               ///< Indicates whether ptr was allocated from the heap
 };
 
 /// Print block on an ostream
@@ -222,7 +226,7 @@ struct coalescing_free_list : free_list<block> {
    * @param size The size in bytes of the desired block.
    * @return block A block large enough to store `size` bytes.
    */
-  block_type get_block(size_t size)
+  block_type get_block(std::size_t size)
   {
     // find best fit block
     auto const iter =
diff --git a/include/rmm/mr/device/detail/fixed_size_free_list.hpp b/include/rmm/mr/device/detail/fixed_size_free_list.hpp
index 02a316c7d..f74ab1135 100644
--- a/include/rmm/mr/device/detail/fixed_size_free_list.hpp
+++ b/include/rmm/mr/device/detail/fixed_size_free_list.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 
 #include <rmm/mr/device/detail/free_list.hpp>
 
+#include <cstddef>
 #include <iostream>
 
 namespace rmm {
@@ -63,7 +64,7 @@ struct fixed_size_free_list : free_list<block_base> {
    * @param size The size in bytes of the desired block (unused).
    * @return block A block large enough to store `size` bytes.
    */
-  block_type get_block(size_t size)
+  block_type get_block(std::size_t size)
   {
     if (is_empty())
       return block_type{};
diff --git a/include/rmm/mr/device/detail/free_list.hpp b/include/rmm/mr/device/detail/free_list.hpp
index 45dbf8ad7..18b5ad493 100644
--- a/include/rmm/mr/device/detail/free_list.hpp
+++ b/include/rmm/mr/device/detail/free_list.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -50,7 +50,7 @@ inline std::ostream& operator<<(std::ostream& out, const block_base& b)
  *
  *  - `void insert(block_type const& b)  // insert a block into the free list`
  *  - `void insert(free_list&& other)    // insert / merge another free list`
- *  - `block_type get_block(size_t size) // get a block of at least size bytes
+ *  - `block_type get_block(std::size_t size) // get a block of at least size bytes
  *  - `void print()                      // print the block`
  *
  * @tparam list_type the type of the internal list data structure.
diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
index b6123e772..bdef45546 100644
--- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
+++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,14 +15,16 @@
  */
 #pragma once
 
-#include <limits>
+#include <rmm/detail/aligned.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/logger.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
 #include <cuda_runtime_api.h>
 
+#include <cstddef>
 #include <functional>
+#include <limits>
 #include <map>
 #include <mutex>
 #include <set>
@@ -66,10 +68,10 @@ struct crtp {
  * Classes derived from stream_ordered_memory_resource must implement the following four methods,
  * documented separately:
  *
- * 1. `size_t get_maximum_allocation_size() const`
- * 2. `block_type expand_pool(size_t size, free_list& blocks, cuda_stream_view stream)`
- * 3. `split_block allocate_from_block(block_type const& b, size_t size)`
- * 4. `block_type free_block(void* p, size_t size) noexcept`
+ * 1. `std::size_t get_maximum_allocation_size() const`
+ * 2. `block_type expand_pool(std::size_t size, free_list& blocks, cuda_stream_view stream)`
+ * 3. `split_block allocate_from_block(block_type const& b, std::size_t size)`
+ * 4. `block_type free_block(void* p, std::size_t size) noexcept`
  */
 template <typename PoolResource, typename FreeListType>
 class stream_ordered_memory_resource : public crtp<PoolResource>, public device_memory_resource {
@@ -93,12 +95,13 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
    * @brief Get the maximum size of a single allocation supported by this suballocator memory
    * resource
    *
-   * Default implementation is the maximum `size_t` value, but fixed-size allocators will have a
-   * lower limit. Override this function in derived classes as necessary.
+   * Default implementation is the maximum `std::size_t` value, but fixed-size allocators will have
+   * a lower limit. Override this function in derived classes as necessary.
    *
-   * @return size_t The maximum size of a single allocation supported by this memory resource
+   * @return std::size_t The maximum size of a single allocation supported by this memory resource
    */
-  // size_t get_maximum_allocation_size() const { return std::numeric_limits<size_t>::max(); }
+  // std::size_t get_maximum_allocation_size() const { return
+  // std::numeric_limits<std::size_t>::max(); }
 
   /**
    * @brief Allocate space (typically from upstream) to supply the suballocation pool and return
@@ -114,7 +117,7 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
    * @param stream The stream on which the memory is to be used.
    * @return block_type a block of at least `size` bytes
    */
-  // block_type expand_pool(size_t size, free_list& blocks, cuda_stream_view stream)
+  // block_type expand_pool(std::size_t size, free_list& blocks, cuda_stream_view stream)
 
   /// Struct representing a block that has been split for allocation
   struct split_block {
@@ -134,7 +137,7 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
    * @return A `split_block` comprising the allocated pointer and any unallocated remainder of the
    * input block.
    */
-  // split_block allocate_from_block(block_type const& b, size_t size)
+  // split_block allocate_from_block(block_type const& b, std::size_t size)
 
   /**
    * @brief Finds, frees and returns the block associated with pointer `p`.
@@ -144,7 +147,7 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
    * @return The (now freed) block associated with `p`. The caller is expected to return the block
    * to the pool.
    */
-  // block_type free_block(void* p, size_t size) noexcept
+  // block_type free_block(void* p, std::size_t size) noexcept
 
   /**
    * @brief Returns the block `b` (last used on stream `stream_event`) to the pool.
@@ -307,7 +310,7 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
    * @param stream_event The stream and associated event on which the allocation will be used.
    * @return block_type A block of memory of at least `size` bytes
    */
-  block_type get_block(size_t size, stream_event_pair stream_event)
+  block_type get_block(std::size_t size, stream_event_pair stream_event)
   {
     // Try to find a satisfactory block in free list for the same stream (no sync required)
     auto iter = stream_free_blocks_.find(stream_event);
@@ -352,7 +355,7 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
    * @return A block with non-null pointer and size >= `size`, or a nullptr block if none is
    *         available in `blocks`.
    */
-  block_type get_block_from_other_stream(size_t size,
+  block_type get_block_from_other_stream(std::size_t size,
                                          stream_event_pair stream_event,
                                          free_list& blocks,
                                          bool merge_first)
diff --git a/include/rmm/mr/device/fixed_size_memory_resource.hpp b/include/rmm/mr/device/fixed_size_memory_resource.hpp
index fe5f9707b..b744910a0 100644
--- a/include/rmm/mr/device/fixed_size_memory_resource.hpp
+++ b/include/rmm/mr/device/fixed_size_memory_resource.hpp
@@ -16,6 +16,7 @@
 #pragma once
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/detail/aligned.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/device/detail/fixed_size_free_list.hpp>
 #include <rmm/mr/device/detail/stream_ordered_memory_resource.hpp>
@@ -129,10 +130,10 @@ class fixed_size_memory_resource
   /**
    * @brief Get the (fixed) size of allocations supported by this memory resource
    *
-   * @return size_t The (fixed) maximum size of a single allocation supported by this memory
+   * @return std::size_t The (fixed) maximum size of a single allocation supported by this memory
    * resource
    */
-  size_t get_maximum_allocation_size() const { return get_block_size(); }
+  std::size_t get_maximum_allocation_size() const { return get_block_size(); }
 
   /**
    * @brief Allocate a block from upstream to supply the suballocation pool.
@@ -144,7 +145,7 @@ class fixed_size_memory_resource
    * @param stream The stream on which the memory is to be used.
    * @return block_type The allocated block
    */
-  block_type expand_pool(size_t size, free_list& blocks, cuda_stream_view stream)
+  block_type expand_pool(std::size_t size, free_list& blocks, cuda_stream_view stream)
   {
     blocks.insert(std::move(blocks_from_upstream(stream)));
     return blocks.get_block(size);
@@ -181,7 +182,7 @@ class fixed_size_memory_resource
    * @return A pair comprising the allocated pointer and any unallocated remainder of the input
    * block.
    */
-  split_block allocate_from_block(block_type const& b, size_t size)
+  split_block allocate_from_block(block_type const& b, std::size_t size)
   {
     return split_block{b.pointer(), block_type{nullptr}};
   }
@@ -195,7 +196,7 @@ class fixed_size_memory_resource
    * @return The (now freed) block associated with `p`. The caller is expected to return the block
    * to the pool.
    */
-  block_type free_block(void* p, size_t size) noexcept
+  block_type free_block(void* p, std::size_t size) noexcept
   {
     // Deallocating a fixed-size block just inserts it in the free list, which is
     // handled by the parent class
diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp
index d2ce01a1a..5002962d5 100644
--- a/include/rmm/mr/device/limiting_resource_adaptor.hpp
+++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,6 +19,8 @@
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
+#include <cstddef>
+
 namespace rmm {
 namespace mr {
 /**
@@ -185,7 +187,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
    * @param stream Stream on which to get the mem info.
    * @return std::pair contaiing free_size and total_size of memory
    */
-  std::pair<size_t, size_t> do_get_mem_info(cuda_stream_view stream) const override
+  std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view stream) const override
   {
     return {allocation_limit_ - allocated_bytes_, allocation_limit_};
   }
@@ -213,7 +215,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
  */
 template <typename Upstream>
 limiting_resource_adaptor<Upstream> make_limiting_adaptor(Upstream* upstream,
-                                                          size_t allocation_limit)
+                                                          std::size_t allocation_limit)
 {
   return limiting_resource_adaptor<Upstream>{upstream, allocation_limit};
 }
diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp
index bd5c38685..1148afa88 100644
--- a/include/rmm/mr/device/logging_resource_adaptor.hpp
+++ b/include/rmm/mr/device/logging_resource_adaptor.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,22 +15,16 @@
  */
 #pragma once
 
-#include <spdlog/common.h>
-#include <rmm/mr/device/device_memory_resource.hpp>
-
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
 
-// If using GCC, temporary workaround for older libcudacxx defining _LIBCPP_VERSION
-// undefine it before including spdlog, due to fmtlib checking if it is defined
-// TODO: remove once libcudacxx is on Github and RAPIDS depends on it
-#ifdef __GNUG__
-#undef _LIBCPP_VERSION
-#endif
+#include <spdlog/common.h>
 #include <spdlog/sinks/basic_file_sink.h>
 #include <spdlog/sinks/ostream_sink.h>
 #include <spdlog/spdlog.h>
 
+#include <cstddef>
 #include <memory>
 #include <sstream>
 
@@ -274,7 +268,7 @@ class logging_resource_adaptor final : public device_memory_resource {
    * @param stream Stream on which to get the mem info.
    * @return std::pair contaiing free_size and total_size of memory
    */
-  std::pair<size_t, size_t> do_get_mem_info(cuda_stream_view stream) const override
+  std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view stream) const override
   {
     return upstream_->get_mem_info(stream);
   }
diff --git a/include/rmm/mr/device/managed_memory_resource.hpp b/include/rmm/mr/device/managed_memory_resource.hpp
index d0ec75de8..ebce40bf5 100644
--- a/include/rmm/mr/device/managed_memory_resource.hpp
+++ b/include/rmm/mr/device/managed_memory_resource.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,11 +15,13 @@
  */
 #pragma once
 
-#include "device_memory_resource.hpp"
+#include <rmm/mr/device/device_memory_resource.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
 
+#include <cstddef>
+
 namespace rmm {
 namespace mr {
 /**
@@ -113,7 +115,7 @@ class managed_memory_resource final : public device_memory_resource {
    * @param stream to execute on
    * @return std::pair contaiing free_size and total_size of memory
    */
-  std::pair<size_t, size_t> do_get_mem_info(cuda_stream_view stream) const override
+  std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view stream) const override
   {
     std::size_t free_size{};
     std::size_t total_size{};
diff --git a/include/rmm/mr/device/owning_wrapper.hpp b/include/rmm/mr/device/owning_wrapper.hpp
index cee32e0fe..6abe950b0 100644
--- a/include/rmm/mr/device/owning_wrapper.hpp
+++ b/include/rmm/mr/device/owning_wrapper.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
  */
 #pragma once
 
-#include "device_memory_resource.hpp"
+#include <rmm/mr/device/device_memory_resource.hpp>
 
 #include <functional>
 #include <iostream>
diff --git a/include/rmm/mr/device/polymorphic_allocator.hpp b/include/rmm/mr/device/polymorphic_allocator.hpp
index a43b3bd36..4f97cf568 100644
--- a/include/rmm/mr/device/polymorphic_allocator.hpp
+++ b/include/rmm/mr/device/polymorphic_allocator.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,13 +16,14 @@
 
 #pragma once
 
-#include <memory>
-#include <type_traits>
-
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
 
+#include <cstddef>
+#include <memory>
+#include <type_traits>
+
 namespace rmm {
 namespace mr {
 
diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp
index 8b09e5208..cfdebf786 100644
--- a/include/rmm/mr/device/pool_memory_resource.hpp
+++ b/include/rmm/mr/device/pool_memory_resource.hpp
@@ -31,7 +31,7 @@
 #include <cuda_runtime_api.h>
 
 #include <algorithm>
-#include <cstdint>
+#include <cstddef>
 #include <iostream>
 #include <map>
 #include <mutex>
@@ -141,11 +141,14 @@ class pool_memory_resource final
    * @brief Get the maximum size of allocations supported by this memory resource
    *
    * Note this does not depend on the memory size of the device. It simply returns the maximum
-   * value of `size_t`
+   * value of `std::size_t`
    *
-   * @return size_t The maximum size of a single allocation supported by this memory resource
+   * @return std::size_t The maximum size of a single allocation supported by this memory resource
    */
-  size_t get_maximum_allocation_size() const { return std::numeric_limits<size_t>::max(); }
+  std::size_t get_maximum_allocation_size() const
+  {
+    return std::numeric_limits<std::size_t>::max();
+  }
 
   /**
    * @brief Try to expand the pool by allocating a block of at least `min_size` bytes from
@@ -247,7 +250,7 @@ class pool_memory_resource final
    * Returns 0 if the requested size cannot be satisfied.
    *
    * @param size The size of the minimum allocation immediately needed
-   * @return size_t The computed size to grow the pool.
+   * @return std::size_t The computed size to grow the pool.
    */
   std::size_t size_to_grow(std::size_t size) const
   {
@@ -268,7 +271,7 @@ class pool_memory_resource final
    * @param stream The stream on which the memory is to be used.
    * @return block_type The allocated block
    */
-  thrust::optional<block_type> block_from_upstream(size_t size, cuda_stream_view stream)
+  thrust::optional<block_type> block_from_upstream(std::size_t size, cuda_stream_view stream)
   {
     RMM_LOG_DEBUG("[A][Stream {}][Upstream {}B]", fmt::ptr(stream.value()), size);
 
@@ -294,7 +297,7 @@ class pool_memory_resource final
    * @return A pair comprising the allocated pointer and any unallocated remainder of the input
    * block.
    */
-  split_block allocate_from_block(block_type const& b, size_t size)
+  split_block allocate_from_block(block_type const& b, std::size_t size)
   {
     block_type const alloc{b.pointer(), size, b.is_head()};
 #ifdef RMM_POOL_TRACK_ALLOCATIONS
@@ -315,7 +318,7 @@ class pool_memory_resource final
    * @return The (now freed) block associated with `p`. The caller is expected to return the block
    * to the pool.
    */
-  block_type free_block(void* p, size_t size) noexcept
+  block_type free_block(void* p, std::size_t size) noexcept
   {
 #ifdef RMM_POOL_TRACK_ALLOCATIONS
     if (p == nullptr) return block_type{};
@@ -338,9 +341,9 @@ class pool_memory_resource final
    *
    * Includes allocated as well as free memory.
    *
-   * @return size_t The total size of the currently allocated pool.
+   * @return std::size_t The total size of the currently allocated pool.
    */
-  size_t pool_size() const noexcept { return current_pool_size_; }
+  std::size_t pool_size() const noexcept { return current_pool_size_; }
 
   /**
    * @brief Free all memory allocated from the upstream memory_resource.
@@ -419,7 +422,7 @@ class pool_memory_resource final
    * @param stream to execute on
    * @return std::pair contaiing free_size and total_size of memory
    */
-  std::pair<size_t, size_t> do_get_mem_info(cuda_stream_view stream) const override
+  std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view stream) const override
   {
     std::size_t free_size{};
     std::size_t total_size{};
diff --git a/include/rmm/mr/device/statistics_resource_adaptor.hpp b/include/rmm/mr/device/statistics_resource_adaptor.hpp
index d71dc52d4..bcc0bf10b 100644
--- a/include/rmm/mr/device/statistics_resource_adaptor.hpp
+++ b/include/rmm/mr/device/statistics_resource_adaptor.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,8 +15,10 @@
  */
 #pragma once
 
-#include <mutex>
 #include <rmm/mr/device/device_memory_resource.hpp>
+
+#include <cstddef>
+#include <mutex>
 #include <shared_mutex>
 
 namespace rmm {
diff --git a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
index b0b2f1273..2675a4df2 100644
--- a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
+++ b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
+#include <cstddef>
 #include <mutex>
 
 namespace rmm {
@@ -142,7 +143,7 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
    * @param stream Stream on which to get the mem info.
    * @return std::pair contaiing free_size and total_size of memory
    */
-  std::pair<size_t, size_t> do_get_mem_info(cuda_stream_view stream) const override
+  std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view stream) const override
   {
     lock_t lock(mtx);
     return upstream_->get_mem_info(stream);
diff --git a/include/rmm/mr/device/thrust_allocator_adaptor.hpp b/include/rmm/mr/device/thrust_allocator_adaptor.hpp
index e7acd00fd..d841304a6 100644
--- a/include/rmm/mr/device/thrust_allocator_adaptor.hpp
+++ b/include/rmm/mr/device/thrust_allocator_adaptor.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,12 +16,12 @@
 
 #pragma once
 
-#include <thrust/detail/type_traits/pointer_traits.h>
-#include <thrust/device_malloc_allocator.h>
-
 #include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
 
+#include <thrust/detail/type_traits/pointer_traits.h>
+#include <thrust/device_malloc_allocator.h>
+
 namespace rmm {
 namespace mr {
 /**
diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp
index 0e2ca9c6e..1a32a1c44 100644
--- a/include/rmm/mr/device/tracking_resource_adaptor.hpp
+++ b/include/rmm/mr/device/tracking_resource_adaptor.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,11 +15,13 @@
  */
 #pragma once
 
-#include <map>
-#include <mutex>
 #include <rmm/detail/error.hpp>
 #include <rmm/detail/stack_trace.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+
+#include <cstddef>
+#include <map>
+#include <mutex>
 #include <shared_mutex>
 #include <sstream>
 
diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp
index aaf1d9c25..0f27cbf3c 100644
--- a/include/rmm/mr/host/new_delete_resource.hpp
+++ b/include/rmm/mr/host/new_delete_resource.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
  */
 #pragma once
 
-#include "host_memory_resource.hpp"
+#include <rmm/mr/host/host_memory_resource.hpp>
 
 #include <rmm/detail/aligned.hpp>
 
diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp
index 5188644cd..d00a5cffe 100644
--- a/include/rmm/mr/host/pinned_memory_resource.hpp
+++ b/include/rmm/mr/host/pinned_memory_resource.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
  */
 #pragma once
 
+#include <rmm/detail/aligned.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/host/host_memory_resource.hpp>
 

From 86e78590e673c3d6b440af43667d66bac82ea915 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 26 Aug 2021 08:53:02 +1000
Subject: [PATCH 02/72] Fix copyright.

---
 include/rmm/cuda_stream_pool.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/rmm/cuda_stream_pool.hpp b/include/rmm/cuda_stream_pool.hpp
index 27decc9fd..c0ef1dbce 100644
--- a/include/rmm/cuda_stream_pool.hpp
+++ b/include/rmm/cuda_stream_pool.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

From b9f7b42f844d0101531bd23428571c182f614d47 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 31 Aug 2021 09:18:44 +1000
Subject: [PATCH 03/72] More missed std::size_t

---
 .../multi_stream_allocations_bench.cu         |  4 ++-
 .../random_allocations/random_allocations.cpp | 35 ++++++++++---------
 tests/mr/device/mr_multithreaded_tests.cpp    |  2 +-
 tests/mr/device/mr_test.hpp                   |  4 +--
 4 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
index 6c763fcb7..7d0a8a17a 100644
--- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
+++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
@@ -31,6 +31,8 @@
 
 #include <cuda_runtime_api.h>
 
+#include <cstddef>
+
 __global__ void compute_bound_kernel(int64_t* out)
 {
   clock_t clock_begin   = clock64();
@@ -56,7 +58,7 @@ static void run_prewarm(rmm::cuda_stream_pool& stream_pool, rmm::mr::device_memo
   }
 }
 
-static void run_test(size_t num_kernels,
+static void run_test(std::size_t num_kernels,
                      rmm::cuda_stream_pool& stream_pool,
                      rmm::mr::device_memory_resource* mr)
 {
diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp
index 6d43b0677..a69b26b91 100644
--- a/benchmarks/random_allocations/random_allocations.cpp
+++ b/benchmarks/random_allocations/random_allocations.cpp
@@ -27,6 +27,7 @@
 #include <benchmark/benchmark.h>
 
 #include <array>
+#include <cstddef>
 #include <cstdlib>
 #include <functional>
 #include <random>
@@ -61,8 +62,8 @@ allocation remove_at(allocation_vector& allocs, std::size_t index)
 template <typename SizeDistribution>
 void random_allocation_free(rmm::mr::device_memory_resource& mr,
                             SizeDistribution size_distribution,
-                            size_t num_allocations,
-                            size_t max_usage,  // in MiB
+                            std::size_t num_allocations,
+                            std::size_t max_usage,  // in MiB
                             rmm::cuda_stream_view stream = {})
 {
   std::default_random_engine generator;
@@ -77,11 +78,11 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr,
   std::size_t allocation_count{0};
 
   allocation_vector allocations{};
-  size_t allocation_size{0};
+  std::size_t allocation_size{0};
 
   for (std::size_t i = 0; i < num_allocations * 2; ++i) {
     bool do_alloc = true;
-    size_t size   = static_cast<size_t>(size_distribution(generator));
+    auto size     = static_cast<std::size_t>(size_distribution(generator));
 
     if (active_allocations > 0) {
       int chance = op_distribution(generator);
@@ -113,7 +114,7 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr,
 #endif
     } else {  // dealloc, or alloc failed
       if (active_allocations > 0) {
-        size_t index = index_distribution(generator) % active_allocations;
+        std::size_t index = index_distribution(generator) % active_allocations;
         active_allocations--;
         allocation to_free = remove_at(allocations, index);
         mr.deallocate(to_free.p, to_free.size, stream);
@@ -136,9 +137,9 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr,
 }  // namespace
 
 void uniform_random_allocations(rmm::mr::device_memory_resource& mr,
-                                size_t num_allocations,
-                                size_t max_allocation_size,  // in MiB
-                                size_t max_usage,
+                                std::size_t num_allocations,
+                                std::size_t max_allocation_size,  // in MiB
+                                std::size_t max_usage,
                                 rmm::cuda_stream_view stream = {})
 {
   std::uniform_int_distribution<std::size_t> size_distribution(1, max_allocation_size * size_mb);
@@ -147,10 +148,10 @@ void uniform_random_allocations(rmm::mr::device_memory_resource& mr,
 
 // TODO figure out how to map a normal distribution to integers between 1 and max_allocation_size
 /*void normal_random_allocations(rmm::mr::device_memory_resource& mr,
-                                size_t num_allocations = 1000,
-                                size_t mean_allocation_size = 500, // in MiB
-                                size_t stddev_allocation_size = 500, // in MiB
-                                size_t max_usage = 8 << 20,
+                                std::size_t num_allocations = 1000,
+                                std::size_t mean_allocation_size = 500, // in MiB
+                                std::size_t stddev_allocation_size = 500, // in MiB
+                                std::size_t max_usage = 8 << 20,
                                 cuda_stream_view stream) {
   std::normal_distribution<std::size_t> size_distribution(, max_allocation_size * size_mb);
 }*/
@@ -181,14 +182,14 @@ inline auto make_binning()
 
 using MRFactoryFunc = std::function<std::shared_ptr<rmm::mr::device_memory_resource>()>;
 
-constexpr size_t max_usage = 16000;
+constexpr std::size_t max_usage = 16000;
 
 static void BM_RandomAllocations(benchmark::State& state, MRFactoryFunc factory)
 {
   auto mr = factory();
 
-  size_t num_allocations = state.range(0);
-  size_t max_size        = state.range(1);
+  std::size_t num_allocations = state.range(0);
+  std::size_t max_size        = state.range(1);
 
   try {
     for (auto _ : state)
@@ -252,8 +253,8 @@ void declare_benchmark(std::string name)
 }
 
 static void profile_random_allocations(MRFactoryFunc factory,
-                                       size_t num_allocations,
-                                       size_t max_size)
+                                       std::size_t num_allocations,
+                                       std::size_t max_size)
 {
   auto mr = factory();
 
diff --git a/tests/mr/device/mr_multithreaded_tests.cpp b/tests/mr/device/mr_multithreaded_tests.cpp
index dbacaba71..233686f7e 100644
--- a/tests/mr/device/mr_multithreaded_tests.cpp
+++ b/tests/mr/device/mr_multithreaded_tests.cpp
@@ -175,7 +175,7 @@ void allocate_loop(rmm::mr::device_memory_resource* mr,
   std::uniform_int_distribution<std::size_t> size_distribution(1, max_size);
 
   for (std::size_t i = 0; i < num_allocations; ++i) {
-    size_t size = size_distribution(generator);
+    std::size_t size = size_distribution(generator);
     void* ptr{};
     EXPECT_NO_THROW(ptr = mr->allocate(size, stream));
     {
diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp
index 51a7e41ac..27da69fbe 100644
--- a/tests/mr/device/mr_test.hpp
+++ b/tests/mr/device/mr_test.hpp
@@ -179,7 +179,7 @@ inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* m
     }
 
     if (do_alloc) {
-      size_t size = size_distribution(generator);
+      std::size_t size = size_distribution(generator);
       active_allocations++;
       allocation_count++;
       EXPECT_NO_THROW(allocations.emplace_back(mr->allocate(size, stream), size));
@@ -187,7 +187,7 @@ inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* m
       EXPECT_NE(nullptr, new_allocation.p);
       EXPECT_TRUE(is_pointer_aligned(new_allocation.p));
     } else {
-      size_t index = index_distribution(generator) % active_allocations;
+      std::size_t index = index_distribution(generator) % active_allocations;
       active_allocations--;
       allocation to_free = allocations[index];
       allocations.erase(std::next(allocations.begin(), index));

From d7f1a32c6c41f2dcc1f1fd81eca74c0706a8ae18 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 31 Aug 2021 09:19:34 +1000
Subject: [PATCH 04/72] doc

---
 include/rmm/mr/device/device_memory_resource.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp
index 4b5011d1d..d9817a933 100644
--- a/include/rmm/mr/device/device_memory_resource.hpp
+++ b/include/rmm/mr/device/device_memory_resource.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -162,8 +162,8 @@ class device_memory_resource {
    *
    * @param stream the stream whose memory manager we want to retrieve
    *
-   * @returns a std::pair<size_t,size_t> which contains free memory in bytes
-   * in .first and total amount of memory in .second
+   * @returns a pair containing the free memory in bytes in .first and total amount of memory in
+   * .second
    */
   std::pair<std::size_t, std::size_t> get_mem_info(cuda_stream_view stream) const
   {

From 97f5571412f0de94552f2d880448fdd6828b6dde Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 31 Aug 2021 11:06:54 +1000
Subject: [PATCH 05/72] .clang-tidy and initial fixes

---
 .clang-tidy                                   |  56 ++++++
 .../random_allocations/random_allocations.cpp | 118 +++++++-----
 include/rmm/cuda_stream_view.hpp              |  56 +++---
 include/rmm/detail/aligned.hpp                |  22 +--
 include/rmm/detail/cuda_util.hpp              |   9 +-
 include/rmm/detail/error.hpp                  |  11 +-
 include/rmm/detail/stack_trace.hpp            |  31 ++--
 include/rmm/device_uvector.hpp                |  80 ++++----
 include/rmm/logger.hpp                        |   6 +-
 include/rmm/mr/device/detail/arena.hpp        | 172 +++++++++---------
 10 files changed, 318 insertions(+), 243 deletions(-)
 create mode 100644 .clang-tidy

diff --git a/.clang-tidy b/.clang-tidy
new file mode 100644
index 000000000..997fd9e6e
--- /dev/null
+++ b/.clang-tidy
@@ -0,0 +1,56 @@
+---
+Checks:          'clang-diagnostic-*,
+                  clang-analyzer-*,
+                  cppcoreguidelines-*,
+                  modernize-*,
+                  bugprone-*,
+                  performance-*,
+                  readability-*,
+                  llvm-*,
+                  -modernize-use-trailing-return-type'
+WarningsAsErrors: ''
+HeaderFilterRegex: ''
+AnalyzeTemporaryDtors: false
+FormatStyle:     none
+CheckOptions:
+  - key:             cert-dcl16-c.NewSuffixes
+    value:           'L;LL;LU;LLU'
+  - key:             cert-oop54-cpp.WarnOnlyIfThisHasSuspiciousField
+    value:           '0'
+  - key:             cert-str34-c.DiagnoseSignedUnsignedCharComparisons
+    value:           '0'
+  - key:             cppcoreguidelines-explicit-virtual-functions.IgnoreDestructors
+    value:           '1'
+  - key:             cppcoreguidelines-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic
+    value:           '1'
+  - key:             google-readability-braces-around-statements.ShortStatementLines
+    value:           '1'
+  - key:             google-readability-function-size.StatementThreshold
+    value:           '800'
+  - key:             google-readability-namespace-comments.ShortNamespaceLines
+    value:           '10'
+  - key:             google-readability-namespace-comments.SpacesBeforeComments
+    value:           '2'
+  - key:             llvm-else-after-return.WarnOnConditionVariables
+    value:           '0'
+  - key:             llvm-else-after-return.WarnOnUnfixable
+    value:           '0'
+  - key:             llvm-qualified-auto.AddConstToQualified
+    value:           '0'
+  - key:             modernize-loop-convert.MaxCopySize
+    value:           '16'
+  - key:             modernize-loop-convert.MinConfidence
+    value:           reasonable
+  - key:             modernize-loop-convert.NamingStyle
+    value:           CamelCase
+  - key:             modernize-pass-by-value.IncludeStyle
+    value:           llvm
+  - key:             modernize-replace-auto-ptr.IncludeStyle
+    value:           llvm
+  - key:             modernize-use-nullptr.NullMacros
+    value:           'NULL'
+  - key:             readability-identifier-length.IgnoredParameterNames
+    value:           'mr|os'
+  - key:             readability-identifier-length.IgnoredVariableNames
+    value:           'mr|_'
+...
diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp
index a69b26b91..dab65b769 100644
--- a/benchmarks/random_allocations/random_allocations.cpp
+++ b/benchmarks/random_allocations/random_allocations.cpp
@@ -38,9 +38,9 @@ namespace {
 constexpr std::size_t size_mb{1 << 20};
 
 struct allocation {
-  void* p{nullptr};
+  void* ptr{nullptr};
   std::size_t size{0};
-  allocation(void* _p, std::size_t _size) : p{_p}, size{_size} {}
+  allocation(void* ptr, std::size_t size) : ptr{ptr}, size{size} {}
   allocation() = default;
 };
 
@@ -70,9 +70,10 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr,
 
   max_usage *= size_mb;  // convert to bytes
 
-  constexpr int allocation_probability = 73;  // percent
-  std::uniform_int_distribution<int> op_distribution(0, 99);
-  std::uniform_int_distribution<int> index_distribution(0, num_allocations - 1);
+  constexpr int allocation_probability{73};  // percent
+  constexpr int max_op_chance{99};
+  std::uniform_int_distribution<int> op_distribution(0, max_op_chance);
+  std::uniform_int_distribution<int> index_distribution(0, static_cast<int>(num_allocations) - 1);
 
   int active_allocations{0};
   std::size_t allocation_count{0};
@@ -117,7 +118,7 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr,
         std::size_t index = index_distribution(generator) % active_allocations;
         active_allocations--;
         allocation to_free = remove_at(allocations, index);
-        mr.deallocate(to_free.p, to_free.size, stream);
+        mr.deallocate(to_free.ptr, to_free.size, stream);
         allocation_size -= to_free.size;
 
 #if VERBOSE
@@ -136,11 +137,12 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr,
 }
 }  // namespace
 
-void uniform_random_allocations(rmm::mr::device_memory_resource& mr,
-                                std::size_t num_allocations,
-                                std::size_t max_allocation_size,  // in MiB
-                                std::size_t max_usage,
-                                rmm::cuda_stream_view stream = {})
+void uniform_random_allocations(
+  rmm::mr::device_memory_resource& mr,
+  std::size_t num_allocations,      // NOLINT(bugprone-easily-swappable-parameters)
+  std::size_t max_allocation_size,  // size in MiB
+  std::size_t max_usage,
+  rmm::cuda_stream_view stream = {})
 {
   std::uniform_int_distribution<std::size_t> size_distribution(1, max_allocation_size * size_mb);
   random_allocation_free(mr, size_distribution, num_allocations, max_usage, stream);
@@ -176,7 +178,10 @@ inline auto make_binning()
   auto pool = make_pool();
   // Add a binning_memory_resource with fixed-size bins of sizes 256, 512, 1024, 2048 and 4096KiB
   // Larger allocations will use the pool resource
-  auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(pool, 18, 22);
+  constexpr auto min_bin_pow2{18};
+  constexpr auto max_bin_pow2{22};
+  auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(
+    pool, min_bin_pow2, max_bin_pow2);
   return mr;
 }
 
@@ -184,7 +189,7 @@ using MRFactoryFunc = std::function<std::shared_ptr<rmm::mr::device_memory_resou
 
 constexpr std::size_t max_usage = 16000;
 
-static void BM_RandomAllocations(benchmark::State& state, MRFactoryFunc factory)
+static void BM_RandomAllocations(benchmark::State& state, MRFactoryFunc const& factory)
 {
   auto mr = factory();
 
@@ -192,67 +197,79 @@ static void BM_RandomAllocations(benchmark::State& state, MRFactoryFunc factory)
   std::size_t max_size        = state.range(1);
 
   try {
-    for (auto _ : state)
+    for (auto _ : state) {
       uniform_random_allocations(*mr, num_allocations, max_size, max_usage);
+    }
   } catch (std::exception const& e) {
     std::cout << "Error: " << e.what() << "\n";
   }
 }
 
-static void num_range(benchmark::internal::Benchmark* b, int size)
+static void num_range(benchmark::internal::Benchmark* bench, int size)
 {
-  for (int num_allocations : std::vector<int>{1000, 10000, 100000})
-    b->Args({num_allocations, size})->Unit(benchmark::kMillisecond);
+  for (int num_allocations : std::vector<int>{1000, 10000, 100000}) {
+    bench->Args({num_allocations, size})->Unit(benchmark::kMillisecond);
+  }
 }
 
-static void size_range(benchmark::internal::Benchmark* b, int num)
+static void size_range(benchmark::internal::Benchmark* bench, int num)
 {
-  for (int max_size : std::vector<int>{1, 4, 64, 256, 1024, 4096})
-    b->Args({num, max_size})->Unit(benchmark::kMillisecond);
+  for (int max_size : std::vector<int>{1, 4, 64, 256, 1024, 4096}) {
+    bench->Args({num, max_size})->Unit(benchmark::kMillisecond);
+  }
 }
 
-static void num_size_range(benchmark::internal::Benchmark* b)
+static void num_size_range(benchmark::internal::Benchmark* bench)
 {
-  for (int num_allocations : std::vector<int>{1000, 10000, 100000})
-    size_range(b, num_allocations);
+  for (int num_allocations : std::vector<int>{1000, 10000, 100000}) {
+    size_range(bench, num_allocations);
+  }
 }
 
-int num_allocations = -1;
-int max_size        = -1;
+int num_allocations = -1;  // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
+int max_size        = -1;  // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
 
-static void benchmark_range(benchmark::internal::Benchmark* b)
+void benchmark_range(benchmark::internal::Benchmark* bench)
 {
   if (num_allocations > 0) {
-    if (max_size > 0)
-      b->Args({num_allocations, max_size})->Unit(benchmark::kMillisecond);
-    else
-      size_range(b, num_allocations);
+    if (max_size > 0) {
+      bench->Args({num_allocations, max_size})->Unit(benchmark::kMillisecond);
+    } else {
+      size_range(bench, num_allocations);
+    }
   } else {
-    if (max_size > 0)
-      num_range(b, max_size);
-    else
-      num_size_range(b);
+    if (max_size > 0) {
+      num_range(bench, max_size);
+    } else {
+      num_size_range(bench);
+    }
   }
 }
 
-void declare_benchmark(std::string name)
+void declare_benchmark(const std::string& name)
 {
-  if (name == "cuda")
-    BENCHMARK_CAPTURE(BM_RandomAllocations, cuda_mr, &make_cuda)->Apply(benchmark_range);
-  if (name == "cuda_async")
-    BENCHMARK_CAPTURE(BM_RandomAllocations, cuda_async_mr, &make_cuda_async)
+  if (name == "cuda") {
+    BENCHMARK_CAPTURE(BM_RandomAllocations, cuda_mr, &make_cuda)  // NOLINT
       ->Apply(benchmark_range);
-  else if (name == "binning")
-    BENCHMARK_CAPTURE(BM_RandomAllocations, binning_mr, &make_binning)->Apply(benchmark_range);
-  else if (name == "pool")
-    BENCHMARK_CAPTURE(BM_RandomAllocations, pool_mr, &make_pool)->Apply(benchmark_range);
-  else if (name == "arena")
-    BENCHMARK_CAPTURE(BM_RandomAllocations, arena_mr, &make_arena)->Apply(benchmark_range);
-  else
+  }
+  if (name == "cuda_async") {
+    BENCHMARK_CAPTURE(BM_RandomAllocations, cuda_async_mr, &make_cuda_async)  // NOLINT
+      ->Apply(benchmark_range);
+  } else if (name == "binning") {
+    BENCHMARK_CAPTURE(BM_RandomAllocations, binning_mr, &make_binning)  // NOLINT
+      ->Apply(benchmark_range);
+  } else if (name == "pool") {
+    BENCHMARK_CAPTURE(BM_RandomAllocations, pool_mr, &make_pool)  // NOLINT
+      ->Apply(benchmark_range);
+  } else if (name == "arena") {
+    BENCHMARK_CAPTURE(BM_RandomAllocations, arena_mr, &make_arena)  // NOLINT
+      ->Apply(benchmark_range);
+  } else {
     std::cout << "Error: invalid memory_resource name: " << name << "\n";
+  }
 }
 
-static void profile_random_allocations(MRFactoryFunc factory,
+static void profile_random_allocations(const MRFactoryFunc& factory,
                                        std::size_t num_allocations,
                                        std::size_t max_size)
 {
@@ -320,11 +337,12 @@ int main(int argc, char** argv)
       declare_benchmark(mr_name);
     } else {
 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
-      std::array<std::string, 5> mrs{"pool", "binning", "arena", "cuda_async", "cuda"};
+      std::vector<std::string> mrs{"pool", "binning", "arena", "cuda_async", "cuda"};
 #else
-      std::array<std::string, 4> mrs{"pool", "binning", "arena", "cuda"};
+      std::vector<std::string> mrs{"pool", "binning", "arena", "cuda"};
 #endif
-      std::for_each(std::cbegin(mrs), std::cend(mrs), [](auto const& s) { declare_benchmark(s); });
+      std::for_each(
+        std::cbegin(mrs), std::cend(mrs), [](auto const& mr) { declare_benchmark(mr); });
     }
     ::benchmark::RunSpecifiedBenchmarks();
   }
diff --git a/include/rmm/cuda_stream_view.hpp b/include/rmm/cuda_stream_view.hpp
index c80d4de2f..f913609f9 100644
--- a/include/rmm/cuda_stream_view.hpp
+++ b/include/rmm/cuda_stream_view.hpp
@@ -54,7 +54,7 @@ class cuda_stream_view {
    *
    * @return cudaStream_t The wrapped stream.
    */
-  constexpr cudaStream_t value() const noexcept { return stream_; }
+  [[nodiscard]] constexpr cudaStream_t value() const noexcept { return stream_; }
 
   /**
    * @brief Implicit conversion to cudaStream_t.
@@ -64,26 +64,12 @@ class cuda_stream_view {
   /**
    * @brief Return true if the wrapped stream is the CUDA per-thread default stream.
    */
-  bool is_per_thread_default() const noexcept
-  {
-#ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM
-    return value() == cudaStreamPerThread || value() == 0;
-#else
-    return value() == cudaStreamPerThread;
-#endif
-  }
+  [[nodiscard]] inline bool is_per_thread_default() const noexcept;
 
   /**
    * @brief Return true if the wrapped stream is explicitly the CUDA legacy default stream.
    */
-  bool is_default() const noexcept
-  {
-#ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM
-    return value() == cudaStreamLegacy;
-#else
-    return value() == cudaStreamLegacy || value() == 0;
-#endif
-  }
+  [[nodiscard]] inline bool is_default() const noexcept;
 
   /**
    * @brief Synchronize the viewed CUDA stream.
@@ -105,7 +91,7 @@ class cuda_stream_view {
   }
 
  private:
-  cudaStream_t stream_{0};
+  cudaStream_t stream_{};
 };
 
 /**
@@ -116,12 +102,38 @@ static constexpr cuda_stream_view cuda_stream_default{};
 /**
  * @brief Static cuda_stream_view of cudaStreamLegacy, for convenience
  */
-static cuda_stream_view cuda_stream_legacy{cudaStreamLegacy};
+
+static const cuda_stream_view cuda_stream_legacy{
+  cudaStreamLegacy  // NOLINT(cppcoreguidelines-pro-type-cstyle-cast)
+};
 
 /**
  * @brief Static cuda_stream_view of cudaStreamPerThread, for convenience
  */
-static cuda_stream_view cuda_stream_per_thread{cudaStreamPerThread};
+static const cuda_stream_view cuda_stream_per_thread{
+  cudaStreamPerThread  // NOLINT(cppcoreguidelines-pro-type-cstyle-cast)
+};
+
+[[nodiscard]] inline bool cuda_stream_view::is_per_thread_default() const noexcept
+{
+#ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM
+  return value() == cuda_stream_per_thread || value() == nullptr;
+#else
+  return value() == cuda_stream_per_thread;
+#endif
+}
+
+/**
+ * @brief Return true if the wrapped stream is explicitly the CUDA legacy default stream.
+ */
+[[nodiscard]] inline bool cuda_stream_view::is_default() const noexcept
+{
+#ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM
+  return value() == cuda_stream_legacy;
+#else
+  return value() == cuda_stream_legacy || value() == nullptr;
+#endif
+}
 
 /**
  * @brief Equality comparison operator for streams
@@ -151,9 +163,9 @@ inline bool operator!=(cuda_stream_view lhs, cuda_stream_view rhs) { return not(
  * @param sv The cuda_stream_view to output
  * @return std::ostream& The output ostream
  */
-inline std::ostream& operator<<(std::ostream& os, cuda_stream_view sv)
+inline std::ostream& operator<<(std::ostream& os, cuda_stream_view stream)
 {
-  os << sv.value();
+  os << stream.value();
   return os;
 }
 
diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp
index 17973d033..d4612844e 100644
--- a/include/rmm/detail/aligned.hpp
+++ b/include/rmm/detail/aligned.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,8 +22,7 @@
 #include <memory>
 #include <new>
 
-namespace rmm {
-namespace detail {
+namespace rmm::detail {
 
 /**
  * @brief Default alignment used for host memory allocated by RMM.
@@ -41,7 +40,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
  * @brief Returns whether or not `n` is a power of 2.
  *
  */
-constexpr bool is_pow2(std::size_t n) { return (0 == (n & (n - 1))); }
+constexpr bool is_pow2(std::size_t value) { return (0 == (value & (value - 1))); }
 
 /**
  * @brief Returns whether or not `alignment` is a valid memory alignment.
@@ -57,10 +56,10 @@ constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(al
  *
  * @return Return the aligned value, as one would expect
  */
-constexpr std::size_t align_up(std::size_t v, std::size_t align_bytes) noexcept
+constexpr std::size_t align_up(std::size_t value, std::size_t align_bytes) noexcept
 {
   assert(is_supported_alignment(align_bytes));
-  return (v + (align_bytes - 1)) & ~(align_bytes - 1);
+  return (value + (align_bytes - 1)) & ~(align_bytes - 1);
 }
 
 /**
@@ -71,10 +70,10 @@ constexpr std::size_t align_up(std::size_t v, std::size_t align_bytes) noexcept
  *
  * @return Return the aligned value, as one would expect
  */
-constexpr std::size_t align_down(std::size_t v, std::size_t align_bytes) noexcept
+constexpr std::size_t align_down(std::size_t value, std::size_t align_bytes) noexcept
 {
   assert(is_supported_alignment(align_bytes));
-  return v & ~(align_bytes - 1);
+  return value & ~(align_bytes - 1);
 }
 
 /**
@@ -85,10 +84,10 @@ constexpr std::size_t align_down(std::size_t v, std::size_t align_bytes) noexcep
  *
  * @return true if aligned
  */
-constexpr bool is_aligned(std::size_t v, std::size_t align_bytes) noexcept
+constexpr bool is_aligned(std::size_t value, std::size_t align_bytes) noexcept
 {
   assert(is_supported_alignment(align_bytes));
-  return v == align_down(v, align_bytes);
+  return value == align_down(value, align_bytes);
 }
 
 /**
@@ -171,5 +170,4 @@ void aligned_deallocate(void* p, std::size_t bytes, std::size_t alignment, Deall
 
   dealloc(original);
 }
-}  // namespace detail
-}  // namespace rmm
+}  // namespace rmm::detail
diff --git a/include/rmm/detail/cuda_util.hpp b/include/rmm/detail/cuda_util.hpp
index d60bb560f..613b8d156 100644
--- a/include/rmm/detail/cuda_util.hpp
+++ b/include/rmm/detail/cuda_util.hpp
@@ -17,16 +17,15 @@
 
 #include <rmm/detail/error.hpp>
 
-namespace rmm {
-namespace detail {
+namespace rmm::detail {
 
 /// Gets the available and total device memory in bytes for the current device
 inline std::pair<std::size_t, std::size_t> available_device_memory()
 {
-  std::size_t free{}, total{};
+  std::size_t free{};
+  std::size_t total{};
   RMM_CUDA_TRY(cudaMemGetInfo(&free, &total));
   return {free, total};
 }
 
-}  // namespace detail
-}  // namespace rmm
+}  // namespace rmm::detail
diff --git a/include/rmm/detail/error.hpp b/include/rmm/detail/error.hpp
index 057f67ba3..7c052ea8d 100644
--- a/include/rmm/detail/error.hpp
+++ b/include/rmm/detail/error.hpp
@@ -51,15 +51,10 @@ struct cuda_error : public std::runtime_error {
  */
 class bad_alloc : public std::bad_alloc {
  public:
-  bad_alloc(const char* w) : std::bad_alloc{}, _what{std::string{std::bad_alloc::what()} + ": " + w}
-  {
-  }
-
-  bad_alloc(std::string const& w) : bad_alloc(w.c_str()) {}
-
-  virtual ~bad_alloc() = default;
+  bad_alloc(const char* msg) : _what{std::string{std::bad_alloc::what()} + ": " + msg} {}
+  bad_alloc(std::string const& msg) : bad_alloc(msg.c_str()) {}
 
-  virtual const char* what() const noexcept { return _what.c_str(); }
+  [[nodiscard]] const char* what() const noexcept override { return _what.c_str(); }
 
  private:
   std::string _what;
diff --git a/include/rmm/detail/stack_trace.hpp b/include/rmm/detail/stack_trace.hpp
index 1e218fa53..93d8fe555 100644
--- a/include/rmm/detail/stack_trace.hpp
+++ b/include/rmm/detail/stack_trace.hpp
@@ -33,9 +33,7 @@
 #include <vector>
 #endif
 
-namespace rmm {
-
-namespace detail {
+namespace rmm::detail {
 
 /**
  * @brief stack_trace is a class that will capture a stack on instatiation for output later.
@@ -52,36 +50,37 @@ class stack_trace {
   {
 #if defined(RMM_ENABLE_STACK_TRACES)
     const int MaxStackDepth = 64;
-    void* stack[MaxStackDepth];
-    auto const depth = backtrace(stack, MaxStackDepth);
-    stack_ptrs.insert(stack_ptrs.end(), &stack[0], &stack[depth]);
+    std::array<void*, MaxStackDepth> stack{};
+    auto const depth = backtrace(stack.begin(), MaxStackDepth);
+    stack_ptrs.insert(stack_ptrs.end(), stack.begin(), &stack.at(depth));
 #endif  // RMM_ENABLE_STACK_TRACES
   }
 
-  friend std::ostream& operator<<(std::ostream& os, const stack_trace& st)
+  friend std::ostream& operator<<(std::ostream& os, const stack_trace& trace)
   {
 #if defined(RMM_ENABLE_STACK_TRACES)
     std::unique_ptr<char*, decltype(&::free)> strings(
-      backtrace_symbols(st.stack_ptrs.data(), st.stack_ptrs.size()), &::free);
+      backtrace_symbols(trace.stack_ptrs.data(), static_cast<int>(trace.stack_ptrs.size())),
+      &::free);
 
-    if (strings.get() == nullptr) {
+    if (strings == nullptr) {
       os << "But no stack trace could be found!" << std::endl;
     } else {
       // Iterate over the stack pointers converting to a string
-      for (std::size_t i = 0; i < st.stack_ptrs.size(); ++i) {
+      for (std::size_t i = 0; i < trace.stack_ptrs.size(); ++i) {
         // Leading index
         os << "#" << i << " in ";
 
         auto const str = [&] {
           Dl_info info;
-          if (dladdr(st.stack_ptrs[i], &info)) {
+          if (dladdr(trace.stack_ptrs[i], &info) != 0) {
             int status = -1;  // Demangle the name. This can occasionally fail
 
             std::unique_ptr<char, decltype(&::free)> demangled(
-              abi::__cxa_demangle(info.dli_sname, nullptr, 0, &status), &::free);
+              abi::__cxa_demangle(info.dli_sname, nullptr, nullptr, &status), &::free);
             // If it fails, fallback to the dli_name.
-            if (status == 0 or info.dli_sname) {
-              auto name = status == 0 ? demangled.get() : info.dli_sname;
+            if (status == 0 or (info.dli_sname != nullptr)) {
+              auto const* name = status == 0 ? demangled.get() : info.dli_sname;
               return name + std::string(" from ") + info.dli_fname;
             }
           }
@@ -103,6 +102,4 @@ class stack_trace {
 #endif  // RMM_ENABLE_STACK_TRACES
 };
 
-}  // namespace detail
-
-}  // namespace rmm
+}  // namespace rmm::detail
diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp
index ca4cf6d30..f7f55a910 100644
--- a/include/rmm/device_uvector.hpp
+++ b/include/rmm/device_uvector.hpp
@@ -84,9 +84,9 @@ class device_uvector {
   ~device_uvector() = default;
 
   RMM_EXEC_CHECK_DISABLE
-  device_uvector(device_uvector&&) = default;
+  device_uvector(device_uvector&&) noexcept = default;
 
-  device_uvector& operator=(device_uvector&&) = default;
+  device_uvector& operator=(device_uvector&&) noexcept = default;
 
   /**
    * @brief Copy ctor is deleted as it doesn't allow a stream argument
@@ -203,24 +203,20 @@ class device_uvector {
    * @param v The value to copy to the specified element
    * @param s The stream on which to perform the copy
    */
-  void set_element_async(std::size_t element_index, value_type const& v, cuda_stream_view s)
+  void set_element_async(std::size_t element_index,
+                         value_type const& value,
+                         cuda_stream_view stream)
   {
     RMM_EXPECTS(
       element_index < size(), rmm::out_of_range, "Attempt to access out of bounds element.");
-    if constexpr (std::is_fundamental<value_type>::value) {
-      if constexpr (std::is_same<value_type, bool>::value) {
-        RMM_CUDA_TRY(cudaMemsetAsync(element_ptr(element_index), v, sizeof(v), s.value()));
-      } else {
-        if (v == value_type{0}) {
-          set_element_to_zero_async(element_index, s);
-        } else {
-          RMM_CUDA_TRY(cudaMemcpyAsync(
-            element_ptr(element_index), &v, sizeof(v), cudaMemcpyDefault, s.value()));
-        }
-      }
-    } else {
+    if constexpr (std::is_same<value_type, bool>::value) {
       RMM_CUDA_TRY(
-        cudaMemcpyAsync(element_ptr(element_index), &v, sizeof(v), cudaMemcpyDefault, s.value()));
+        cudaMemsetAsync(element_ptr(element_index), value, sizeof(value), stream.value()));
+    } else if (std::is_fundamental<value_type>::value and value == value_type{0}) {
+      set_element_to_zero_async(element_index, stream);
+    } else {
+      RMM_CUDA_TRY(cudaMemcpyAsync(
+        element_ptr(element_index), &value, sizeof(value), cudaMemcpyDefault, stream.value()));
     }
   }
 
@@ -250,11 +246,12 @@ class device_uvector {
    * @param element_index Index of the target element
    * @param s The stream on which to perform the copy
    */
-  void set_element_to_zero_async(std::size_t element_index, cuda_stream_view s)
+  void set_element_to_zero_async(std::size_t element_index, cuda_stream_view stream)
   {
     RMM_EXPECTS(
       element_index < size(), rmm::out_of_range, "Attempt to access out of bounds element.");
-    RMM_CUDA_TRY(cudaMemsetAsync(element_ptr(element_index), 0, sizeof(value_type), s.value()));
+    RMM_CUDA_TRY(
+      cudaMemsetAsync(element_ptr(element_index), 0, sizeof(value_type), stream.value()));
   }
 
   /**
@@ -283,13 +280,13 @@ class device_uvector {
    * @throws rmm::out_of_range exception if `element_index >= size()`
    *
    * @param element_index Index of the target element
-   * @param v The value to copy to the specified element
-   * @param s The stream on which to perform the copy
+   * @param value The value to copy to the specified element
+   * @param stream The stream on which to perform the copy
    */
-  void set_element(std::size_t element_index, T const& v, cuda_stream_view s)
+  void set_element(std::size_t element_index, T const& value, cuda_stream_view stream)
   {
-    set_element_async(element_index, v, s);
-    s.synchronize_no_throw();
+    set_element_async(element_index, value, stream);
+    stream.synchronize_no_throw();
   }
 
   /**
@@ -301,18 +298,18 @@ class device_uvector {
    * @throws rmm::out_of_range exception if `element_index >= size()`
    *
    * @param element_index Index of the desired element
-   * @param s The stream on which to perform the copy
+   * @param stream The stream on which to perform the copy
    * @return The value of the specified element
    */
-  value_type element(std::size_t element_index, cuda_stream_view s) const
+  value_type element(std::size_t element_index, cuda_stream_view stream) const
   {
     RMM_EXPECTS(
       element_index < size(), rmm::out_of_range, "Attempt to access out of bounds element.");
-    value_type v;
-    RMM_CUDA_TRY(
-      cudaMemcpyAsync(&v, element_ptr(element_index), sizeof(v), cudaMemcpyDefault, s.value()));
-    s.synchronize();
-    return v;
+    value_type value;
+    RMM_CUDA_TRY(cudaMemcpyAsync(
+      &value, element_ptr(element_index), sizeof(value), cudaMemcpyDefault, stream.value()));
+    stream.synchronize();
+    return value;
   }
 
   /**
@@ -323,10 +320,10 @@ class device_uvector {
    *
    * @throws rmm::out_of_range exception if the vector is empty.
    *
-   * @param s The stream on which to perform the copy
+   * @param stream The stream on which to perform the copy
    * @return The value of the first element
    */
-  value_type front_element(cuda_stream_view s) const { return element(0, s); }
+  value_type front_element(cuda_stream_view stream) const { return element(0, stream); }
 
   /**
    * @brief Returns the last element.
@@ -336,10 +333,10 @@ class device_uvector {
    *
    * @throws rmm::out_of_range exception if the vector is empty.
    *
-   * @param s The stream on which to perform the copy
+   * @param stream The stream on which to perform the copy
    * @return The value of the last element
    */
-  value_type back_element(cuda_stream_view s) const { return element(size() - 1, s); }
+  value_type back_element(cuda_stream_view stream) const { return element(size() - 1, stream); }
 
   /**
    * @brief Resizes the vector to contain `new_size` elements.
@@ -384,7 +381,10 @@ class device_uvector {
    * @return std::size_t The number of elements that can be stored without requiring a new
    * allocation.
    */
-  std::size_t capacity() const noexcept { return bytes_to_elements(_storage.capacity()); }
+  [[nodiscard]] std::size_t capacity() const noexcept
+  {
+    return bytes_to_elements(_storage.capacity());
+  }
 
   /**
    * @brief Returns pointer to underlying device storage.
@@ -468,7 +468,7 @@ class device_uvector {
    *
    * @return The number of elements.
    */
-  std::size_t size() const noexcept { return bytes_to_elements(_storage.size()); }
+  [[nodiscard]] std::size_t size() const noexcept { return bytes_to_elements(_storage.size()); }
 
   /**
    * @brief Returns true if the vector contains no elements, i.e., `size() == 0`.
@@ -476,14 +476,14 @@ class device_uvector {
    * @return true The vector is empty
    * @return false The vector is not empty
    */
-  bool is_empty() const noexcept { return size() == 0; }
+  [[nodiscard]] bool is_empty() const noexcept { return size() == 0; }
 
   /**
    * @brief Returns pointer to the resource used to allocate and deallocate the device storage.
    *
    * @return Pointer to underlying resource
    */
-  mr::device_memory_resource* memory_resource() const noexcept
+  [[nodiscard]] mr::device_memory_resource* memory_resource() const noexcept
   {
     return _storage.memory_resource();
   }
@@ -491,12 +491,12 @@ class device_uvector {
  private:
   device_buffer _storage{};  ///< Device memory storage for vector elements
 
-  std::size_t constexpr elements_to_bytes(std::size_t num_elements) const noexcept
+  [[nodiscard]] std::size_t constexpr elements_to_bytes(std::size_t num_elements) const noexcept
   {
     return num_elements * sizeof(value_type);
   }
 
-  std::size_t constexpr bytes_to_elements(std::size_t num_bytes) const noexcept
+  [[nodiscard]] std::size_t constexpr bytes_to_elements(std::size_t num_bytes) const noexcept
   {
     return num_bytes / sizeof(value_type);
   }
diff --git a/include/rmm/logger.hpp b/include/rmm/logger.hpp
index 99a0f7b2e..d0ce63bac 100644
--- a/include/rmm/logger.hpp
+++ b/include/rmm/logger.hpp
@@ -42,7 +42,7 @@ namespace detail {
  */
 inline std::string default_log_filename()
 {
-  auto filename = std::getenv("RMM_DEBUG_LOG_FILE");
+  auto* filename = std::getenv("RMM_DEBUG_LOG_FILE");
   return (filename == nullptr) ? std::string{"rmm_log.txt"} : std::string{filename};
 }
 
@@ -80,8 +80,8 @@ struct logger_wrapper {
  */
 inline spdlog::logger& logger()
 {
-  static detail::logger_wrapper w{};
-  return w.logger_;
+  static detail::logger_wrapper wrapped{};
+  return wrapped.logger_;
 }
 
 // The default is INFO, but it should be used sparingly, so that by default a log file is only
diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp
index 7a449949c..f99c6bf97 100644
--- a/include/rmm/mr/device/detail/arena.hpp
+++ b/include/rmm/mr/device/detail/arena.hpp
@@ -30,13 +30,10 @@
 #include <set>
 #include <unordered_map>
 
-namespace rmm {
-namespace mr {
-namespace detail {
-namespace arena {
+namespace rmm::mr::detail::arena {
 
 /// Minimum size of a superblock (256 KiB).
-constexpr std::size_t minimum_superblock_size = 1u << 18u;
+constexpr std::size_t minimum_superblock_size = 1U << 18U;
 
 /**
  * @brief Represents a chunk of memory that can be allocated and deallocated.
@@ -67,16 +64,16 @@ class block {
   block(void* pointer, std::size_t size) : pointer_(static_cast<char*>(pointer)), size_(size) {}
 
   /// Returns the underlying pointer.
-  void* pointer() const { return pointer_; }
+  [[nodiscard]] void* pointer() const { return pointer_; }
 
   /// Returns the size of the block.
-  std::size_t size() const { return size_; }
+  [[nodiscard]] std::size_t size() const { return size_; }
 
   /// Returns true if this block is valid (non-null), false otherwise.
-  bool is_valid() const { return pointer_ != nullptr; }
+  [[nodiscard]] bool is_valid() const { return pointer_ != nullptr; }
 
   /// Returns true if this block is a superblock, false otherwise.
-  bool is_superblock() const { return size_ >= minimum_superblock_size; }
+  [[nodiscard]] bool is_superblock() const { return size_ >= minimum_superblock_size; }
 
   /**
    * @brief Verifies whether this block can be merged to the beginning of block b.
@@ -85,7 +82,10 @@ class block {
    * @return true Returns true if this block's `pointer` + `size` == `b.ptr`, and `not b.is_head`,
                   false otherwise.
    */
-  bool is_contiguous_before(block const& b) const { return pointer_ + size_ == b.pointer_; }
+  [[nodiscard]] bool is_contiguous_before(block const& blk) const
+  {
+    return pointer_ + size_ == blk.pointer_;
+  }
 
   /**
    * @brief Is this block large enough to fit `sz` bytes?
@@ -93,7 +93,7 @@ class block {
    * @param sz The size in bytes to check for fit.
    * @return true if this block is at least `sz` bytes.
    */
-  bool fits(std::size_t sz) const { return size_ >= sz; }
+  [[nodiscard]] bool fits(std::size_t size) const { return size_ >= size; }
 
   /**
    * @brief Split this block into two by the given size.
@@ -101,14 +101,11 @@ class block {
    * @param sz The size in bytes of the first block.
    * @return std::pair<block, block> A pair of blocks split by sz.
    */
-  std::pair<block, block> split(std::size_t sz) const
+  [[nodiscard]] std::pair<block, block> split(std::size_t size) const
   {
     RMM_LOGGING_ASSERT(size_ >= sz);
-    if (size_ > sz) {
-      return {{pointer_, sz}, {pointer_ + sz, size_ - sz}};
-    } else {
-      return {*this, {}};
-    }
+    if (size_ > size) { return {{pointer_, size}, {pointer_ + size, size_ - size}}; }
+    return {*this, {}};
   }
 
   /**
@@ -119,14 +116,14 @@ class block {
    * @param b block to merge.
    * @return block The merged block.
    */
-  block merge(block const& b) const
+  [[nodiscard]] block merge(block const& blk) const
   {
     RMM_LOGGING_ASSERT(is_contiguous_before(b));
-    return {pointer_, size_ + b.size_};
+    return {pointer_, size_ + blk.size_};
   }
 
   /// Used by std::set to compare blocks.
-  bool operator<(block const& b) const { return pointer_ < b.pointer_; }
+  bool operator<(block const& blk) const { return pointer_ < blk.pointer_; }
 
  private:
   char* pointer_{};     ///< Raw memory pointer.
@@ -139,9 +136,9 @@ class block {
  * @param[in] v value to align
  * @return Return the aligned value
  */
-constexpr std::size_t align_up(std::size_t v) noexcept
+constexpr std::size_t align_up(std::size_t value) noexcept
 {
-  return rmm::detail::align_up(v, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
+  return rmm::detail::align_up(value, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
 }
 
 /**
@@ -150,9 +147,9 @@ constexpr std::size_t align_up(std::size_t v) noexcept
  * @param[in] v value to align
  * @return Return the aligned value
  */
-constexpr std::size_t align_down(std::size_t v) noexcept
+constexpr std::size_t align_down(std::size_t value) noexcept
 {
-  return rmm::detail::align_down(v, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
+  return rmm::detail::align_down(value, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
 }
 
 /**
@@ -172,24 +169,21 @@ constexpr std::size_t align_down(std::size_t v) noexcept
 inline block first_fit(std::set<block>& free_blocks, std::size_t size)
 {
   auto const iter = std::find_if(
-    free_blocks.cbegin(), free_blocks.cend(), [size](auto const& b) { return b.fits(size); });
+    free_blocks.cbegin(), free_blocks.cend(), [size](auto const& blk) { return blk.fits(size); });
 
-  if (iter == free_blocks.cend()) {
-    return {};
-  } else {
-    // Remove the block from the free_list.
-    auto const b = *iter;
-    auto const i = free_blocks.erase(iter);
-
-    if (b.size() > size) {
-      // Split the block and put the remainder back.
-      auto const split = b.split(size);
-      free_blocks.insert(i, split.second);
-      return split.first;
-    } else {
-      return b;
-    }
+  if (iter == free_blocks.cend()) { return {}; }
+
+  // Remove the block from the free_list.
+  auto const blk  = *iter;
+  auto const next = free_blocks.erase(iter);
+
+  if (blk.size() > size) {
+    // Split the block and put the remainder back.
+    auto const split = blk.split(size);
+    free_blocks.insert(next, split.second);
+    return split.first;
   }
+  return blk;
 }
 
 /**
@@ -199,35 +193,35 @@ inline block first_fit(std::set<block>& free_blocks, std::size_t size)
  * @param b The block to coalesce.
  * @return block The coalesced block.
  */
-inline block coalesce_block(std::set<block>& free_blocks, block const& b)
+inline block coalesce_block(std::set<block>& free_blocks, block const& blk)
 {
-  if (!b.is_valid()) return b;
+  if (!blk.is_valid()) { return blk; }
 
   // Find the right place (in ascending address order) to insert the block.
-  auto const next     = free_blocks.lower_bound(b);
+  auto const next     = free_blocks.lower_bound(blk);
   auto const previous = next == free_blocks.cbegin() ? next : std::prev(next);
 
   // Coalesce with neighboring blocks.
-  bool const merge_prev = previous->is_contiguous_before(b);
-  bool const merge_next = next != free_blocks.cend() && b.is_contiguous_before(*next);
+  bool const merge_prev = previous->is_contiguous_before(blk);
+  bool const merge_next = next != free_blocks.cend() && blk.is_contiguous_before(*next);
 
   block merged{};
   if (merge_prev && merge_next) {
-    merged = previous->merge(b).merge(*next);
+    merged = previous->merge(blk).merge(*next);
     free_blocks.erase(previous);
-    auto const i = free_blocks.erase(next);
-    free_blocks.insert(i, merged);
+    auto const iter = free_blocks.erase(next);
+    free_blocks.insert(iter, merged);
   } else if (merge_prev) {
-    merged       = previous->merge(b);
-    auto const i = free_blocks.erase(previous);
-    free_blocks.insert(i, merged);
+    merged          = previous->merge(blk);
+    auto const iter = free_blocks.erase(previous);
+    free_blocks.insert(iter, merged);
   } else if (merge_next) {
-    merged       = b.merge(*next);
-    auto const i = free_blocks.erase(next);
-    free_blocks.insert(i, merged);
+    merged          = blk.merge(*next);
+    auto const iter = free_blocks.erase(next);
+    free_blocks.insert(iter, merged);
   } else {
-    free_blocks.emplace(b);
-    merged = b;
+    free_blocks.emplace(blk);
+    merged = blk;
   }
   return merged;
 }
@@ -248,7 +242,7 @@ class global_arena final {
   /// The default maximum size for the global arena.
   static constexpr std::size_t default_maximum_size = std::numeric_limits<std::size_t>::max();
   /// Reserved memory that should not be allocated (64 MiB).
-  static constexpr std::size_t reserved_size = 1u << 26u;
+  static constexpr std::size_t reserved_size = 1U << 26U;
 
   /**
    * @brief Construct a global arena.
@@ -275,7 +269,8 @@ class global_arena final {
                 "Error, Maximum arena size required to be a multiple of 256 bytes");
 
     if (initial_size == default_initial_size || maximum_size == default_maximum_size) {
-      std::size_t free{}, total{};
+      std::size_t free{};
+      std::size_t total{};
       RMM_CUDA_TRY(cudaMemGetInfo(&free, &total));
       if (initial_size == default_initial_size) {
         initial_size = align_up(std::min(free, total / 2));
@@ -292,6 +287,8 @@ class global_arena final {
   // Disable copy (and move) semantics.
   global_arena(const global_arena&) = delete;
   global_arena& operator=(const global_arena&) = delete;
+  global_arena(global_arena&&)                 = delete;
+  global_arena& operator=(global_arena&&) = delete;
 
   /**
    * @brief Destroy the global arena and deallocate all memory it allocated using the upstream
@@ -300,8 +297,8 @@ class global_arena final {
   ~global_arena()
   {
     lock_guard lock(mtx_);
-    for (auto const& b : upstream_blocks_) {
-      upstream_mr_->deallocate(b.pointer(), b.size());
+    for (auto const& blk : upstream_blocks_) {
+      upstream_mr_->deallocate(blk.pointer(), blk.size());
     }
   }
 
@@ -326,10 +323,10 @@ class global_arena final {
    * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes`
    * that was passed to the `allocate` call that returned `p`.
    */
-  void deallocate(block const& b)
+  void deallocate(block const& blk)
   {
     lock_guard lock(mtx_);
-    coalesce_block(free_blocks_, b);
+    coalesce_block(free_blocks_, blk);
   }
 
   /**
@@ -340,8 +337,8 @@ class global_arena final {
   void deallocate(std::set<block> const& free_blocks)
   {
     lock_guard lock(mtx_);
-    for (auto const& b : free_blocks) {
-      coalesce_block(free_blocks_, b);
+    for (auto const& blk : free_blocks) {
+      coalesce_block(free_blocks_, blk);
     }
   }
 
@@ -357,8 +354,8 @@ class global_arena final {
   block get_block(std::size_t size)
   {
     // Find the first-fit free block.
-    auto const b = first_fit(free_blocks_, size);
-    if (b.is_valid()) return b;
+    auto const blk = first_fit(free_blocks_, size);
+    if (blk.is_valid()) { return blk; }
 
     // No existing larger blocks available, so grow the arena.
     auto const upstream_block = expand_arena(size_to_grow(size));
@@ -427,10 +424,13 @@ class arena {
    * @param global_arena The global arena from which to allocate superblocks.
    */
   explicit arena(global_arena<Upstream>& global_arena) : global_arena_{global_arena} {}
+  ~arena() = default;
 
   // Disable copy (and move) semantics.
   arena(const arena&) = delete;
   arena& operator=(const arena&) = delete;
+  arena(arena&&)                 = delete;
+  arena& operator=(arena&&) = delete;
 
   /**
    * @brief Allocates memory of size at least `bytes`.
@@ -443,11 +443,11 @@ class arena {
   void* allocate(std::size_t bytes)
   {
     lock_guard lock(mtx_);
-    auto const b = get_block(bytes);
+    auto const blk = get_block(bytes);
 #ifdef RMM_POOL_TRACK_ALLOCATIONS
     allocated_blocks_.emplace(b.pointer(), b);
 #endif
-    return b.pointer();
+    return blk.pointer();
   }
 
   /**
@@ -459,19 +459,19 @@ class arena {
    * @param stream Stream on which to perform deallocation.
    * @return true if the allocation is found, false otherwise.
    */
-  bool deallocate(void* p, std::size_t bytes, cuda_stream_view stream)
+  bool deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream)
   {
     lock_guard lock(mtx_);
 #ifdef RMM_POOL_TRACK_ALLOCATIONS
     auto const b = free_block(p, bytes);
 #else
-    block const b{p, bytes};
+    block const blk{ptr, bytes};
 #endif
-    if (b.is_valid()) {
-      auto const merged = coalesce_block(free_blocks_, b);
+    if (blk.is_valid()) {
+      auto const merged = coalesce_block(free_blocks_, blk);
       shrink_arena(merged, stream);
     }
-    return b.is_valid();
+    return blk.is_valid();
   }
 
 #ifdef RMM_POOL_TRACK_ALLOCATIONS
@@ -524,8 +524,8 @@ class arena {
   {
     if (size < minimum_superblock_size) {
       // Find the first-fit free block.
-      auto const b = first_fit(free_blocks_, size);
-      if (b.is_valid()) { return b; }
+      auto const blk = first_fit(free_blocks_, size);
+      if (blk.is_valid()) { return blk; }
     }
 
     // No existing larger blocks available, so grow the arena and obtain a superblock.
@@ -575,15 +575,15 @@ class arena {
    * @param b The block that can be used to shrink the arena.
    * @param stream Stream on which to perform shrinking.
    */
-  void shrink_arena(block const& b, cuda_stream_view stream)
+  void shrink_arena(block const& blk, cuda_stream_view stream)
   {
     // Don't shrink if b is not a superblock.
-    if (!b.is_superblock()) return;
+    if (!blk.is_superblock()) { return; }
 
     stream.synchronize_no_throw();
 
-    global_arena_.deallocate(b);
-    free_blocks_.erase(b);
+    global_arena_.deallocate(blk);
+    free_blocks_.erase(blk);
   }
 
   /// The global arena to allocate superblocks from.
@@ -609,11 +609,14 @@ class arena {
 template <typename Upstream>
 class arena_cleaner {
  public:
-  explicit arena_cleaner(std::shared_ptr<arena<Upstream>> const& a) : arena_(a) {}
+  explicit arena_cleaner(std::shared_ptr<arena<Upstream>> const& arena) : arena_(arena) {}
 
   // Disable copy (and move) semantics.
-  arena_cleaner(const arena_cleaner&) = delete;
-  arena_cleaner& operator=(const arena_cleaner&) = delete;
+  arena_cleaner()                     = delete;
+  arena_cleaner(arena_cleaner const&) = delete;
+  arena_cleaner& operator=(arena_cleaner const&) = delete;
+  arena_cleaner(arena_cleaner&&)                 = delete;
+  arena_cleaner& operator=(arena_cleaner&&) = delete;
 
   ~arena_cleaner()
   {
@@ -628,7 +631,4 @@ class arena_cleaner {
   std::weak_ptr<arena<Upstream>> arena_;
 };
 
-}  // namespace arena
-}  // namespace detail
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr::detail::arena

From d9b9ab49b7eec42c349ce4216864c25d857218f7 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 31 Aug 2021 11:16:22 +1000
Subject: [PATCH 06/72] Suppress cppcoreguidelines-macro-usage

---
 .clang-tidy | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.clang-tidy b/.clang-tidy
index 997fd9e6e..3d19ae996 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -7,7 +7,8 @@ Checks:          'clang-diagnostic-*,
                   performance-*,
                   readability-*,
                   llvm-*,
-                  -modernize-use-trailing-return-type'
+                  -modernize-use-trailing-return-type',
+                  -cppcoreguidelines-macro-usage
 WarningsAsErrors: ''
 HeaderFilterRegex: ''
 AnalyzeTemporaryDtors: false

From f65249b75b64cb915672e0a1c0abef41c9a555b8 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 31 Aug 2021 11:16:39 +1000
Subject: [PATCH 07/72] parameter name

---
 include/rmm/detail/aligned.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp
index d4612844e..8b15d5463 100644
--- a/include/rmm/detail/aligned.hpp
+++ b/include/rmm/detail/aligned.hpp
@@ -159,14 +159,14 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc)
  * @tparam Dealloc A unary callable type that deallocates memory.
  */
 template <typename Dealloc>
-void aligned_deallocate(void* p, std::size_t bytes, std::size_t alignment, Dealloc dealloc)
+void aligned_deallocate(void* ptr, std::size_t bytes, std::size_t alignment, Dealloc dealloc)
 {
   (void)alignment;
 
   // Get offset from the location immediately prior to the aligned pointer
-  std::ptrdiff_t const offset = *(reinterpret_cast<std::ptrdiff_t*>(p) - 1);
+  std::ptrdiff_t const offset = *(reinterpret_cast<std::ptrdiff_t*>(ptr) - 1);
 
-  void* const original = static_cast<char*>(p) - offset;
+  void* const original = static_cast<char*>(ptr) - offset;
 
   dealloc(original);
 }

From 401f2aed6331d73f553210f6954f8334dfe3d65e Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 2 Sep 2021 11:29:38 +1000
Subject: [PATCH 08/72] tidying

---
 .clang-tidy                        |   4 +-
 include/rmm/detail/error.hpp       |  22 +++---
 include/rmm/detail/stack_trace.hpp |   1 +
 tests/cuda_stream_pool_tests.cpp   |   1 -
 tests/device_buffer_tests.cu       | 107 +++++++++++++++--------------
 5 files changed, 69 insertions(+), 66 deletions(-)

diff --git a/.clang-tidy b/.clang-tidy
index 3d19ae996..a0bf9994a 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -7,8 +7,8 @@ Checks:          'clang-diagnostic-*,
                   performance-*,
                   readability-*,
                   llvm-*,
-                  -modernize-use-trailing-return-type',
-                  -cppcoreguidelines-macro-usage
+                  -modernize-use-trailing-return-type,
+                  -cppcoreguidelines-macro-usage'
 WarningsAsErrors: ''
 HeaderFilterRegex: ''
 AnalyzeTemporaryDtors: false
diff --git a/include/rmm/detail/error.hpp b/include/rmm/detail/error.hpp
index 7c052ea8d..50ae70b2f 100644
--- a/include/rmm/detail/error.hpp
+++ b/include/rmm/detail/error.hpp
@@ -101,7 +101,7 @@ class out_of_range : public std::out_of_range {
   (__VA_ARGS__)
 #define GET_RMM_EXPECTS_MACRO(_1, _2, _3, NAME, ...) NAME
 #define RMM_EXPECTS_3(_condition, _exception_type, _reason)              \
-  (!!(_condition)) ? static_cast<void>(0) : throw _exception_type        \
+  (!!(_condition)) ? static_cast<void>(0) : throw(_exception_type)       \
   {                                                                      \
     "RMM failure at: " __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _reason \
   }
@@ -124,7 +124,7 @@ class out_of_range : public std::out_of_range {
   (__VA_ARGS__)
 #define GET_RMM_FAIL_MACRO(_1, _2, NAME, ...) NAME
 #define RMM_FAIL_2(_what, _exception_type) \
-  throw _exception_type{"RMM failure at:" __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _what};
+  throw(_exception_type){"RMM failure at:" __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _what};
 #define RMM_FAIL_1(_what) RMM_FAIL_2(_what, rmm::logic_error)
 
 /**
@@ -152,15 +152,15 @@ class out_of_range : public std::out_of_range {
   GET_RMM_CUDA_TRY_MACRO(__VA_ARGS__, RMM_CUDA_TRY_2, RMM_CUDA_TRY_1) \
   (__VA_ARGS__)
 #define GET_RMM_CUDA_TRY_MACRO(_1, _2, NAME, ...) NAME
-#define RMM_CUDA_TRY_2(_call, _exception_type)                                               \
-  do {                                                                                       \
-    cudaError_t const error = (_call);                                                       \
-    if (cudaSuccess != error) {                                                              \
-      cudaGetLastError();                                                                    \
-      throw _exception_type{std::string{"CUDA error at: "} + __FILE__ + ":" +                \
-                            RMM_STRINGIFY(__LINE__) + ": " + cudaGetErrorName(error) + " " + \
-                            cudaGetErrorString(error)};                                      \
-    }                                                                                        \
+#define RMM_CUDA_TRY_2(_call, _exception_type)                                                \
+  do {                                                                                        \
+    cudaError_t const error = (_call);                                                        \
+    if (cudaSuccess != error) {                                                               \
+      cudaGetLastError();                                                                     \
+      throw(_exception_type){std::string{"CUDA error at: "} + __FILE__ + ":" +                \
+                             RMM_STRINGIFY(__LINE__) + ": " + cudaGetErrorName(error) + " " + \
+                             cudaGetErrorString(error)};                                      \
+    }                                                                                         \
   } while (0)
 #define RMM_CUDA_TRY_1(_call) RMM_CUDA_TRY_2(_call, rmm::cuda_error)
 
diff --git a/include/rmm/detail/stack_trace.hpp b/include/rmm/detail/stack_trace.hpp
index 93d8fe555..2b83aa7cf 100644
--- a/include/rmm/detail/stack_trace.hpp
+++ b/include/rmm/detail/stack_trace.hpp
@@ -28,6 +28,7 @@
 #include <cxxabi.h>
 #include <dlfcn.h>
 #include <execinfo.h>
+
 #include <cstddef>
 #include <memory>
 #include <vector>
diff --git a/tests/cuda_stream_pool_tests.cpp b/tests/cuda_stream_pool_tests.cpp
index 1e14e2abf..b1f61a5fb 100644
--- a/tests/cuda_stream_pool_tests.cpp
+++ b/tests/cuda_stream_pool_tests.cpp
@@ -37,7 +37,6 @@ TEST_F(CudaStreamPoolTest, Unequal)
 TEST_F(CudaStreamPoolTest, Nondefault)
 {
   auto const stream_a = this->pool.get_stream();
-  auto const stream_b = this->pool.get_stream();
 
   // pool streams are explicit, non-default streams
   EXPECT_FALSE(stream_a.is_default());
diff --git a/tests/device_buffer_tests.cu b/tests/device_buffer_tests.cu
index fa36a2751..05ecef4c4 100644
--- a/tests/device_buffer_tests.cu
+++ b/tests/device_buffer_tests.cu
@@ -42,7 +42,10 @@ struct DeviceBufferTest : public ::testing::Test {
   DeviceBufferTest()
   {
     std::default_random_engine generator;
-    std::uniform_int_distribution<std::size_t> distribution(1000, 100000);
+
+    auto constexpr range_min{1000};
+    auto constexpr range_max{100000};
+    std::uniform_int_distribution<std::size_t> distribution(range_min, range_max);
     size = distribution(generator);
   }
 };
@@ -263,16 +266,16 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSizeExplicitMr)
 TYPED_TEST(DeviceBufferTest, MoveConstructor)
 {
   rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr);
-  auto p        = buff.data();
+  auto* ptr     = buff.data();
   auto size     = buff.size();
   auto capacity = buff.capacity();
-  auto mr       = buff.memory_resource();
+  auto* mr      = buff.memory_resource();
   auto stream   = buff.stream();
 
   // New buffer should have the same contents as the original
   rmm::device_buffer buff_new(std::move(buff));
   EXPECT_NE(nullptr, buff_new.data());
-  EXPECT_EQ(p, buff_new.data());
+  EXPECT_EQ(ptr, buff_new.data());
   EXPECT_EQ(size, buff_new.size());
   EXPECT_EQ(capacity, buff_new.capacity());
   EXPECT_EQ(stream, buff_new.stream());
@@ -290,17 +293,17 @@ TYPED_TEST(DeviceBufferTest, MoveConstructorStream)
 {
   rmm::device_buffer buff(this->size, this->stream, &this->mr);
   this->stream.synchronize();
-  auto p        = buff.data();
+  auto* ptr     = buff.data();
   auto size     = buff.size();
   auto capacity = buff.capacity();
-  auto mr       = buff.memory_resource();
+  auto* mr      = buff.memory_resource();
   auto stream   = buff.stream();
 
   // New buffer should have the same contents as the original
   rmm::device_buffer buff_new(std::move(buff));
   this->stream.synchronize();
   EXPECT_NE(nullptr, buff_new.data());
-  EXPECT_EQ(p, buff_new.data());
+  EXPECT_EQ(ptr, buff_new.data());
   EXPECT_EQ(size, buff_new.size());
   EXPECT_EQ(capacity, buff_new.capacity());
   EXPECT_EQ(stream, buff_new.stream());
@@ -316,72 +319,72 @@ TYPED_TEST(DeviceBufferTest, MoveConstructorStream)
 
 TYPED_TEST(DeviceBufferTest, MoveAssignmentToDefault)
 {
-  rmm::device_buffer from(this->size, rmm::cuda_stream_default, &this->mr);
-  auto p        = from.data();
-  auto size     = from.size();
-  auto capacity = from.capacity();
-  auto mr       = from.memory_resource();
-  auto stream   = from.stream();
+  rmm::device_buffer src(this->size, rmm::cuda_stream_default, &this->mr);
+  auto* ptr     = src.data();
+  auto size     = src.size();
+  auto capacity = src.capacity();
+  auto* mr      = src.memory_resource();
+  auto stream   = src.stream();
 
-  rmm::device_buffer to;
-  EXPECT_NO_THROW(to = std::move(from));
+  rmm::device_buffer dest;
+  dest = std::move(src);
 
   // contents of `from` should be in `to`
-  EXPECT_NE(nullptr, to.data());
-  EXPECT_EQ(p, to.data());
-  EXPECT_EQ(size, to.size());
-  EXPECT_EQ(capacity, to.capacity());
-  EXPECT_EQ(stream, to.stream());
-  EXPECT_EQ(mr, to.memory_resource());
+  EXPECT_NE(nullptr, dest.data());
+  EXPECT_EQ(ptr, dest.data());
+  EXPECT_EQ(size, dest.size());
+  EXPECT_EQ(capacity, dest.capacity());
+  EXPECT_EQ(stream, dest.stream());
+  EXPECT_EQ(mr, dest.memory_resource());
 
   // `from` should be empty
-  EXPECT_EQ(nullptr, from.data());
-  EXPECT_EQ(0, from.size());
-  EXPECT_EQ(0, from.capacity());
-  EXPECT_EQ(rmm::cuda_stream_default, from.stream());
-  EXPECT_NE(nullptr, from.memory_resource());
+  EXPECT_EQ(nullptr, src.data());
+  EXPECT_EQ(0, src.size());
+  EXPECT_EQ(0, src.capacity());
+  EXPECT_EQ(rmm::cuda_stream_default, src.stream());
+  EXPECT_NE(nullptr, src.memory_resource());
 }
 
 TYPED_TEST(DeviceBufferTest, MoveAssignment)
 {
-  rmm::device_buffer from(this->size, rmm::cuda_stream_default, &this->mr);
-  auto p        = from.data();
-  auto size     = from.size();
-  auto capacity = from.capacity();
-  auto mr       = from.memory_resource();
-  auto stream   = from.stream();
+  rmm::device_buffer src(this->size, rmm::cuda_stream_default, &this->mr);
+  auto* ptr     = src.data();
+  auto size     = src.size();
+  auto capacity = src.capacity();
+  auto* mr      = src.memory_resource();
+  auto stream   = src.stream();
 
-  rmm::device_buffer to(this->size - 1, rmm::cuda_stream_default, &this->mr);
-  EXPECT_NO_THROW(to = std::move(from));
+  rmm::device_buffer dest(this->size - 1, rmm::cuda_stream_default, &this->mr);
+  dest = std::move(src);
 
   // contents of `from` should be in `to`
-  EXPECT_NE(nullptr, to.data());
-  EXPECT_EQ(p, to.data());
-  EXPECT_EQ(size, to.size());
-  EXPECT_EQ(capacity, to.capacity());
-  EXPECT_EQ(stream, to.stream());
-  EXPECT_EQ(mr, to.memory_resource());
+  EXPECT_NE(nullptr, dest.data());
+  EXPECT_EQ(ptr, dest.data());
+  EXPECT_EQ(size, dest.size());
+  EXPECT_EQ(capacity, dest.capacity());
+  EXPECT_EQ(stream, dest.stream());
+  EXPECT_EQ(mr, dest.memory_resource());
 
   // `from` should be empty
-  EXPECT_EQ(nullptr, from.data());
-  EXPECT_EQ(0, from.size());
-  EXPECT_EQ(0, from.capacity());
-  EXPECT_EQ(rmm::cuda_stream_default, from.stream());
-  EXPECT_NE(nullptr, from.memory_resource());
+  EXPECT_EQ(nullptr, src.data());
+  EXPECT_EQ(0, src.size());
+  EXPECT_EQ(0, src.capacity());
+  EXPECT_EQ(rmm::cuda_stream_default, src.stream());
+  EXPECT_NE(nullptr, src.memory_resource());
 }
 
 TYPED_TEST(DeviceBufferTest, SelfMoveAssignment)
 {
   rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr);
-  auto p        = buff.data();
+  auto* ptr     = buff.data();
   auto size     = buff.size();
   auto capacity = buff.capacity();
-  auto mr       = buff.memory_resource();
+  auto* mr      = buff.memory_resource();
   auto stream   = buff.stream();
 
   buff = std::move(buff);  // self-move-assignment shouldn't modify the buffer
   EXPECT_NE(nullptr, buff.data());
-  EXPECT_EQ(p, buff.data());
+  EXPECT_EQ(ptr, buff.data());
   EXPECT_EQ(size, buff.size());
   EXPECT_EQ(capacity, buff.capacity());
   EXPECT_EQ(stream, buff.stream());
@@ -397,7 +400,7 @@ TYPED_TEST(DeviceBufferTest, ResizeSmaller)
                    static_cast<signed char*>(buff.data()) + buff.size(),
                    0);
 
-  auto old_data = buff.data();
+  auto* old_data = buff.data();
   rmm::device_buffer old_content(
     old_data, buff.size(), rmm::cuda_stream_default, &this->mr);  // for comparison
 
@@ -408,7 +411,7 @@ TYPED_TEST(DeviceBufferTest, ResizeSmaller)
   // Resizing smaller means the existing allocation should remain unchanged
   EXPECT_EQ(old_data, buff.data());
 
-  EXPECT_NO_THROW(buff.shrink_to_fit(rmm::cuda_stream_default));
+  buff.shrink_to_fit(rmm::cuda_stream_default);
   EXPECT_NE(nullptr, buff.data());
   // A reallocation should have occured
   EXPECT_NE(old_data, buff.data());
@@ -424,8 +427,8 @@ TYPED_TEST(DeviceBufferTest, ResizeSmaller)
 TYPED_TEST(DeviceBufferTest, ResizeBigger)
 {
   rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr);
-  auto old_data = buff.data();
-  auto new_size = this->size + 1;
+  auto* old_data = buff.data();
+  auto new_size  = this->size + 1;
   buff.resize(new_size, rmm::cuda_stream_default);
   EXPECT_EQ(new_size, buff.size());
   EXPECT_EQ(new_size, buff.capacity());

From 573dd3610be17efcb6a40250148790aa359814c3 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 2 Sep 2021 11:51:10 +1000
Subject: [PATCH 09/72] tidy some tests

---
 tests/cuda_stream_pool_tests.cpp |  11 +-
 tests/device_scalar_tests.cpp    |   7 +-
 tests/device_uvector_tests.cpp   | 186 ++++++++++++++++---------------
 tests/logger_tests.cpp           |  90 +++++++++------
 4 files changed, 160 insertions(+), 134 deletions(-)

diff --git a/tests/cuda_stream_pool_tests.cpp b/tests/cuda_stream_pool_tests.cpp
index b1f61a5fb..3f1919600 100644
--- a/tests/cuda_stream_pool_tests.cpp
+++ b/tests/cuda_stream_pool_tests.cpp
@@ -49,13 +49,14 @@ TEST_F(CudaStreamPoolTest, ValidStreams)
   auto const stream_b = this->pool.get_stream();
 
   // Operations on the streams should work correctly and without throwing exceptions
-  auto v = rmm::device_uvector<std::uint8_t>{100, stream_a};
-  RMM_CUDA_TRY(cudaMemsetAsync(v.data(), 0xcc, 100, stream_a.value()));
+  auto constexpr vector_size{100};
+  auto vec1 = rmm::device_uvector<std::uint8_t>{vector_size, stream_a};
+  RMM_CUDA_TRY(cudaMemsetAsync(vec1.data(), 0xcc, 100, stream_a.value()));
   stream_a.synchronize();
 
-  auto v2 = rmm::device_uvector<std::uint8_t>{v, stream_b};
-  auto x  = v2.front_element(stream_b);
-  EXPECT_EQ(x, 0xcc);
+  auto vec2    = rmm::device_uvector<std::uint8_t>{vec1, stream_b};
+  auto element = vec2.front_element(stream_b);
+  EXPECT_EQ(element, 0xcc);
 }
 
 TEST_F(CudaStreamPoolTest, PoolSize) { EXPECT_GE(this->pool.get_pool_size(), 1); }
diff --git a/tests/device_scalar_tests.cpp b/tests/device_scalar_tests.cpp
index e4c1a42ff..f58655951 100644
--- a/tests/device_scalar_tests.cpp
+++ b/tests/device_scalar_tests.cpp
@@ -22,6 +22,7 @@
 #include <rmm/mr/device/per_device_resource.hpp>
 
 #include <cuda_runtime_api.h>
+
 #include <chrono>
 #include <cstddef>
 #include <random>
@@ -34,7 +35,7 @@ struct DeviceScalarTest : public ::testing::Test {
   rmm::mr::device_memory_resource* mr{rmm::mr::get_current_device_resource()};
   std::default_random_engine generator{};
 
-  DeviceScalarTest() { value = random_value(); }
+  DeviceScalarTest() : value{random_value()} {}
 
   template <typename U = T, std::enable_if_t<std::is_same<U, bool>::value, bool> = true>
   U random_value()
@@ -56,7 +57,9 @@ struct DeviceScalarTest : public ::testing::Test {
   template <typename U = T, std::enable_if_t<std::is_floating_point<U>::value, bool> = true>
   U random_value()
   {
-    static std::normal_distribution<U> distribution{100, 20};
+    auto const mean{100};
+    auto const stddev{20};
+    static std::normal_distribution<U> distribution(mean, stddev);
     return distribution(generator);
   }
 };
diff --git a/tests/device_uvector_tests.cpp b/tests/device_uvector_tests.cpp
index db06f7c47..b3c06885b 100644
--- a/tests/device_uvector_tests.cpp
+++ b/tests/device_uvector_tests.cpp
@@ -23,7 +23,7 @@
 
 template <typename T>
 struct TypedUVectorTest : ::testing::Test {
-  rmm::cuda_stream_view stream() const noexcept { return rmm::cuda_stream_view{}; }
+  [[nodiscard]] rmm::cuda_stream_view stream() const noexcept { return rmm::cuda_stream_view{}; }
 };
 
 using TestTypes = ::testing::Types<int8_t, int32_t, uint64_t, float, double>;
@@ -32,27 +32,29 @@ TYPED_TEST_CASE(TypedUVectorTest, TestTypes);
 
 TYPED_TEST(TypedUVectorTest, ZeroSizeConstructor)
 {
-  rmm::device_uvector<TypeParam> uv(0, this->stream());
-  EXPECT_EQ(uv.size(), 0);
-  EXPECT_EQ(uv.end(), uv.begin());
-  EXPECT_TRUE(uv.is_empty());
+  rmm::device_uvector<TypeParam> vec(0, this->stream());
+  EXPECT_EQ(vec.size(), 0);
+  EXPECT_EQ(vec.end(), vec.begin());
+  EXPECT_TRUE(vec.is_empty());
 }
 
 TYPED_TEST(TypedUVectorTest, NonZeroSizeConstructor)
 {
-  rmm::device_uvector<TypeParam> uv(12345, this->stream());
-  EXPECT_EQ(uv.size(), 12345);
-  EXPECT_NE(uv.data(), nullptr);
-  EXPECT_EQ(uv.end(), uv.begin() + uv.size());
-  EXPECT_FALSE(uv.is_empty());
-  EXPECT_NE(uv.element_ptr(0), nullptr);
+  auto const size{12345};
+  rmm::device_uvector<TypeParam> vec(size, this->stream());
+  EXPECT_EQ(vec.size(), 12345);
+  EXPECT_NE(vec.data(), nullptr);
+  EXPECT_EQ(vec.end(), vec.begin() + vec.size());
+  EXPECT_FALSE(vec.is_empty());
+  EXPECT_NE(vec.element_ptr(0), nullptr);
 }
 
 TYPED_TEST(TypedUVectorTest, CopyConstructor)
 {
-  rmm::device_uvector<TypeParam> uv(12345, this->stream());
-  rmm::device_uvector<TypeParam> uv_copy(uv, this->stream());
-  EXPECT_EQ(uv_copy.size(), uv.size());
+  auto const size{12345};
+  rmm::device_uvector<TypeParam> vec(size, this->stream());
+  rmm::device_uvector<TypeParam> uv_copy(vec, this->stream());
+  EXPECT_EQ(uv_copy.size(), vec.size());
   EXPECT_NE(uv_copy.data(), nullptr);
   EXPECT_EQ(uv_copy.end(), uv_copy.begin() + uv_copy.size());
   EXPECT_FALSE(uv_copy.is_empty());
@@ -61,145 +63,145 @@ TYPED_TEST(TypedUVectorTest, CopyConstructor)
 
 TYPED_TEST(TypedUVectorTest, ResizeSmaller)
 {
-  auto original_size = 12345;
-  rmm::device_uvector<TypeParam> uv(original_size, this->stream());
-  auto original_data  = uv.data();
-  auto original_begin = uv.begin();
+  auto const original_size{12345};
+  rmm::device_uvector<TypeParam> vec(original_size, this->stream());
+  auto original_data  = vec.data();
+  auto original_begin = vec.begin();
 
-  auto smaller_size = uv.size() - 1;
-  uv.resize(smaller_size, this->stream());
+  auto smaller_size = vec.size() - 1;
+  vec.resize(smaller_size, this->stream());
 
-  EXPECT_EQ(original_data, uv.data());
-  EXPECT_EQ(original_begin, uv.begin());
-  EXPECT_EQ(uv.size(), smaller_size);
-  EXPECT_EQ(uv.capacity(), original_size);
+  EXPECT_EQ(original_data, vec.data());
+  EXPECT_EQ(original_begin, vec.begin());
+  EXPECT_EQ(vec.size(), smaller_size);
+  EXPECT_EQ(vec.capacity(), original_size);
 
   // shrink_to_fit should force a new allocation
-  uv.shrink_to_fit(this->stream());
-  EXPECT_EQ(uv.size(), smaller_size);
-  EXPECT_EQ(uv.capacity(), smaller_size);
+  vec.shrink_to_fit(this->stream());
+  EXPECT_EQ(vec.size(), smaller_size);
+  EXPECT_EQ(vec.capacity(), smaller_size);
 }
 
 TYPED_TEST(TypedUVectorTest, ResizeLarger)
 {
-  auto original_size = 12345;
-  rmm::device_uvector<TypeParam> uv(original_size, this->stream());
-  auto original_data  = uv.data();
-  auto original_begin = uv.begin();
+  auto const original_size{12345};
+  rmm::device_uvector<TypeParam> vec(original_size, this->stream());
+  auto original_data  = vec.data();
+  auto original_begin = vec.begin();
 
-  auto larger_size = uv.size() + 1;
-  uv.resize(larger_size, this->stream());
+  auto larger_size = vec.size() + 1;
+  vec.resize(larger_size, this->stream());
 
-  EXPECT_NE(uv.data(), original_data);
-  EXPECT_NE(uv.begin(), original_begin);
-  EXPECT_EQ(uv.size(), larger_size);
-  EXPECT_EQ(uv.capacity(), larger_size);
+  EXPECT_NE(vec.data(), original_data);
+  EXPECT_NE(vec.begin(), original_begin);
+  EXPECT_EQ(vec.size(), larger_size);
+  EXPECT_EQ(vec.capacity(), larger_size);
 
-  auto larger_data  = uv.data();
-  auto larger_begin = uv.begin();
+  auto larger_data  = vec.data();
+  auto larger_begin = vec.begin();
 
   // shrink_to_fit shouldn't have any effect
-  uv.shrink_to_fit(this->stream());
-  EXPECT_EQ(uv.size(), larger_size);
-  EXPECT_EQ(uv.capacity(), larger_size);
-  EXPECT_EQ(uv.data(), larger_data);
-  EXPECT_EQ(uv.begin(), larger_begin);
+  vec.shrink_to_fit(this->stream());
+  EXPECT_EQ(vec.size(), larger_size);
+  EXPECT_EQ(vec.capacity(), larger_size);
+  EXPECT_EQ(vec.data(), larger_data);
+  EXPECT_EQ(vec.begin(), larger_begin);
 }
 
 TYPED_TEST(TypedUVectorTest, ResizeToZero)
 {
-  auto original_size = 12345;
-  rmm::device_uvector<TypeParam> uv(original_size, this->stream());
-  uv.resize(0, this->stream());
+  auto const original_size{12345};
+  rmm::device_uvector<TypeParam> vec(original_size, this->stream());
+  vec.resize(0, this->stream());
 
-  EXPECT_EQ(uv.size(), 0);
-  EXPECT_TRUE(uv.is_empty());
-  EXPECT_EQ(uv.capacity(), original_size);
+  EXPECT_EQ(vec.size(), 0);
+  EXPECT_TRUE(vec.is_empty());
+  EXPECT_EQ(vec.capacity(), original_size);
 
-  uv.shrink_to_fit(this->stream());
-  EXPECT_EQ(uv.capacity(), 0);
+  vec.shrink_to_fit(this->stream());
+  EXPECT_EQ(vec.capacity(), 0);
 }
 
 TYPED_TEST(TypedUVectorTest, Release)
 {
-  auto original_size = 12345;
-  rmm::device_uvector<TypeParam> uv(original_size, this->stream());
+  auto const original_size{12345};
+  rmm::device_uvector<TypeParam> vec(original_size, this->stream());
 
-  auto original_data = uv.data();
+  auto original_data = vec.data();
 
-  rmm::device_buffer storage = uv.release();
+  rmm::device_buffer storage = vec.release();
 
-  EXPECT_EQ(uv.size(), 0);
-  EXPECT_EQ(uv.capacity(), 0);
-  EXPECT_TRUE(uv.is_empty());
+  EXPECT_EQ(vec.size(), 0);
+  EXPECT_EQ(vec.capacity(), 0);
+  EXPECT_TRUE(vec.is_empty());
   EXPECT_EQ(storage.data(), original_data);
   EXPECT_EQ(storage.size(), original_size * sizeof(TypeParam));
 }
 
 TYPED_TEST(TypedUVectorTest, ElementPointer)
 {
-  auto size = 12345;
-  rmm::device_uvector<TypeParam> uv(size, this->stream());
-  for (std::size_t i = 0; i < uv.size(); ++i) {
-    EXPECT_NE(uv.element_ptr(i), nullptr);
+  auto const size{12345};
+  rmm::device_uvector<TypeParam> vec(size, this->stream());
+  for (std::size_t i = 0; i < vec.size(); ++i) {
+    EXPECT_NE(vec.element_ptr(i), nullptr);
   }
 }
 
 TYPED_TEST(TypedUVectorTest, OOBSetElement)
 {
-  auto size = 12345;
-  rmm::device_uvector<TypeParam> uv(size, this->stream());
-  EXPECT_THROW(uv.set_element(uv.size() + 1, 42, this->stream()), rmm::out_of_range);
+  auto const size{12345};
+  rmm::device_uvector<TypeParam> vec(size, this->stream());
+  EXPECT_THROW(vec.set_element(vec.size() + 1, 42, this->stream()), rmm::out_of_range);
 }
 
 TYPED_TEST(TypedUVectorTest, OOBGetElement)
 {
-  auto size = 12345;
-  rmm::device_uvector<TypeParam> uv(size, this->stream());
-  EXPECT_THROW(uv.element(uv.size() + 1, this->stream()), rmm::out_of_range);
+  auto const size{12345};
+  rmm::device_uvector<TypeParam> vec(size, this->stream());
+  EXPECT_THROW(vec.element(vec.size() + 1, this->stream()), rmm::out_of_range);
 }
 
 TYPED_TEST(TypedUVectorTest, GetSetElement)
 {
-  auto size = 12345;
-  rmm::device_uvector<TypeParam> uv(size, this->stream());
-  for (std::size_t i = 0; i < uv.size(); ++i) {
-    uv.set_element(i, i, this->stream());
-    EXPECT_EQ(static_cast<TypeParam>(i), uv.element(i, this->stream()));
+  auto const size{12345};
+  rmm::device_uvector<TypeParam> vec(size, this->stream());
+  for (std::size_t i = 0; i < vec.size(); ++i) {
+    vec.set_element(i, i, this->stream());
+    EXPECT_EQ(static_cast<TypeParam>(i), vec.element(i, this->stream()));
   }
 }
 
 TYPED_TEST(TypedUVectorTest, GetSetElementAsync)
 {
-  auto size = 12345;
-  rmm::device_uvector<TypeParam> uv(size, this->stream());
-  for (std::size_t i = 0; i < uv.size(); ++i) {
+  auto const size{12345};
+  rmm::device_uvector<TypeParam> vec(size, this->stream());
+  for (std::size_t i = 0; i < vec.size(); ++i) {
     auto init = static_cast<TypeParam>(i);
-    uv.set_element_async(i, init, this->stream());
-    EXPECT_EQ(init, uv.element(i, this->stream()));
+    vec.set_element_async(i, init, this->stream());
+    EXPECT_EQ(init, vec.element(i, this->stream()));
   }
 }
 
 TYPED_TEST(TypedUVectorTest, SetElementZeroAsync)
 {
-  auto size = 12345;
-  rmm::device_uvector<TypeParam> uv(size, this->stream());
-  for (std::size_t i = 0; i < uv.size(); ++i) {
-    uv.set_element_to_zero_async(i, this->stream());
-    EXPECT_EQ(TypeParam{0}, uv.element(i, this->stream()));
+  auto const size{12345};
+  rmm::device_uvector<TypeParam> vec(size, this->stream());
+  for (std::size_t i = 0; i < vec.size(); ++i) {
+    vec.set_element_to_zero_async(i, this->stream());
+    EXPECT_EQ(TypeParam{0}, vec.element(i, this->stream()));
   }
 }
 
 TYPED_TEST(TypedUVectorTest, FrontBackElement)
 {
-  auto size = 12345;
-  rmm::device_uvector<TypeParam> uv(size, this->stream());
+  auto const size{12345};
+  rmm::device_uvector<TypeParam> vec(size, this->stream());
 
-  auto first = TypeParam{42};
-  auto last  = TypeParam{13};
-  uv.set_element(0, first, this->stream());
-  uv.set_element(uv.size() - 1, last, this->stream());
+  auto const first = TypeParam{42};
+  auto const last  = TypeParam{13};
+  vec.set_element(0, first, this->stream());
+  vec.set_element(vec.size() - 1, last, this->stream());
 
-  EXPECT_EQ(first, uv.front_element(this->stream()));
-  EXPECT_EQ(last, uv.back_element(this->stream()));
+  EXPECT_EQ(first, vec.front_element(this->stream()));
+  EXPECT_EQ(last, vec.back_element(this->stream()));
 }
diff --git a/tests/logger_tests.cpp b/tests/logger_tests.cpp
index b343c7e35..1da32a137 100644
--- a/tests/logger_tests.cpp
+++ b/tests/logger_tests.cpp
@@ -26,7 +26,7 @@ class raii_restore_env {
  public:
   raii_restore_env(char const* name) : name_(name)
   {
-    auto const value_or_null = getenv(name);
+    auto* const value_or_null = getenv(name);
     if (value_or_null != nullptr) {
       value_  = value_or_null;
       is_set_ = true;
@@ -42,6 +42,11 @@ class raii_restore_env {
     }
   }
 
+  raii_restore_env(raii_restore_env const&) = default;
+  raii_restore_env& operator=(raii_restore_env const&) = default;
+  raii_restore_env(raii_restore_env&&)                 = default;
+  raii_restore_env& operator=(raii_restore_env&&) = default;
+
  private:
   std::string name_{};
   std::string value_{};
@@ -88,19 +93,22 @@ TEST(Adaptor, FilenameConstructor)
   rmm::mr::cuda_memory_resource upstream;
   rmm::mr::logging_resource_adaptor<rmm::mr::cuda_memory_resource> log_mr{&upstream, filename};
 
-  auto p0 = log_mr.allocate(100);
-  auto p1 = log_mr.allocate(42);
-  log_mr.deallocate(p0, 100);
-  log_mr.deallocate(p1, 42);
+  auto const size0{100};
+  auto const size1{42};
+
+  auto* ptr0 = log_mr.allocate(size0);
+  auto* ptr1 = log_mr.allocate(size1);
+  log_mr.deallocate(ptr0, size0);
+  log_mr.deallocate(ptr1, size1);
   log_mr.flush();
 
   using rmm::detail::action;
   using rmm::detail::event;
 
-  std::vector<event> expected_events{{action::ALLOCATE, 100, p0},
-                                     {action::ALLOCATE, 42, p1},
-                                     {action::FREE, 100, p0},
-                                     {action::FREE, 42, p1}};
+  std::vector<event> expected_events{{action::ALLOCATE, size0, ptr0},
+                                     {action::ALLOCATE, size1, ptr1},
+                                     {action::FREE, size0, ptr0},
+                                     {action::FREE, size1, ptr1}};
 
   expect_log_events(filename, expected_events);
 }
@@ -117,19 +125,22 @@ TEST(Adaptor, MultiSinkConstructor)
   rmm::mr::logging_resource_adaptor<rmm::mr::cuda_memory_resource> log_mr{&upstream,
                                                                           {file_sink1, file_sink2}};
 
-  auto p0 = log_mr.allocate(100);
-  auto p1 = log_mr.allocate(42);
-  log_mr.deallocate(p0, 100);
-  log_mr.deallocate(p1, 42);
+  auto const size0{100};
+  auto const size1{42};
+
+  auto* ptr0 = log_mr.allocate(size0);
+  auto* ptr1 = log_mr.allocate(size1);
+  log_mr.deallocate(ptr0, size0);
+  log_mr.deallocate(ptr1, size1);
   log_mr.flush();
 
   using rmm::detail::action;
   using rmm::detail::event;
 
-  std::vector<event> expected_events{{action::ALLOCATE, 100, p0},
-                                     {action::ALLOCATE, 42, p1},
-                                     {action::FREE, 100, p0},
-                                     {action::FREE, 42, p1}};
+  std::vector<event> expected_events{{action::ALLOCATE, size0, ptr0},
+                                     {action::ALLOCATE, size1, ptr1},
+                                     {action::FREE, size0, ptr0},
+                                     {action::FREE, size1, ptr1}};
 
   expect_log_events(filename1, expected_events);
   expect_log_events(filename2, expected_events);
@@ -142,19 +153,22 @@ TEST(Adaptor, Factory)
 
   auto log_mr = rmm::mr::make_logging_adaptor(&upstream, filename);
 
-  auto p0 = log_mr.allocate(99);
-  log_mr.deallocate(p0, 99);
-  auto p1 = log_mr.allocate(42);
-  log_mr.deallocate(p1, 42);
+  auto const size0{99};
+  auto const size1{42};
+
+  auto* ptr0 = log_mr.allocate(size0);
+  log_mr.deallocate(ptr0, size0);
+  auto* ptr1 = log_mr.allocate(size1);
+  log_mr.deallocate(ptr1, size1);
   log_mr.flush();
 
   using rmm::detail::action;
   using rmm::detail::event;
 
-  std::vector<event> expected_events{{action::ALLOCATE, 99, p0},
-                                     {action::FREE, 99, p0},
-                                     {action::ALLOCATE, 42, p1},
-                                     {action::FREE, 42, p1}};
+  std::vector<event> expected_events{{action::ALLOCATE, size0, ptr0},
+                                     {action::FREE, size0, ptr0},
+                                     {action::ALLOCATE, size1, ptr1},
+                                     {action::FREE, size1, ptr1}};
 
   expect_log_events(filename, expected_events);
 }
@@ -178,8 +192,10 @@ TEST(Adaptor, EnvironmentPath)
   // use log file location specified in environment variable RMM_LOG_FILE
   auto log_mr = rmm::mr::make_logging_adaptor(&upstream);
 
-  auto p = log_mr.allocate(100);
-  log_mr.deallocate(p, 100);
+  auto const size{100};
+
+  auto* ptr = log_mr.allocate(size);
+  log_mr.deallocate(ptr, size);
 
   log_mr.flush();
 
@@ -187,8 +203,8 @@ TEST(Adaptor, EnvironmentPath)
   using rmm::detail::event;
 
   std::vector<event> expected_events{
-    {action::ALLOCATE, 100, p},
-    {action::FREE, 100, p},
+    {action::ALLOCATE, size, ptr},
+    {action::FREE, size, ptr},
   };
 
   expect_log_events(filename, expected_events);
@@ -202,11 +218,13 @@ TEST(Adaptor, STDOUT)
 
   auto log_mr = rmm::mr::make_logging_adaptor(&upstream, std::cout);
 
-  auto p = log_mr.allocate(100);
-  log_mr.deallocate(p, 100);
+  auto const size{100};
+
+  auto* p = log_mr.allocate(size);
+  log_mr.deallocate(p, size);
 
   std::string output = testing::internal::GetCapturedStdout();
-  std::string header = output.substr(0, output.find("\n"));
+  std::string header = output.substr(0, output.find('\n'));
   ASSERT_EQ(header, log_mr.header());
 }
 
@@ -218,10 +236,12 @@ TEST(Adaptor, STDERR)
 
   auto log_mr = rmm::mr::make_logging_adaptor(&upstream, std::cerr);
 
-  auto p = log_mr.allocate(100);
-  log_mr.deallocate(p, 100);
+  auto const size{100};
+
+  auto* p = log_mr.allocate(size);
+  log_mr.deallocate(p, size);
 
   std::string output = testing::internal::GetCapturedStderr();
-  std::string header = output.substr(0, output.find("\n"));
+  std::string header = output.substr(0, output.find('\n'));
   ASSERT_EQ(header, log_mr.header());
 }

From be955033804934a34a9cba5383c85e42f1640f0b Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 2 Sep 2021 12:24:02 +1000
Subject: [PATCH 10/72] tidy cuda_stream.hpp

---
 include/rmm/cuda_stream.hpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/rmm/cuda_stream.hpp b/include/rmm/cuda_stream.hpp
index 10d944c8f..6a3304e2c 100644
--- a/include/rmm/cuda_stream.hpp
+++ b/include/rmm/cuda_stream.hpp
@@ -57,13 +57,13 @@ class cuda_stream {
    */
   cuda_stream()
     : stream_{[]() {
-                cudaStream_t* s = new cudaStream_t;
+                auto* s = new cudaStream_t;
                 RMM_CUDA_TRY(cudaStreamCreate(s));
                 return s;
               }(),
-              [](cudaStream_t* s) {
-                RMM_ASSERT_CUDA_SUCCESS(cudaStreamDestroy(*s));
-                delete s;
+              [](cudaStream_t* stream) {
+                RMM_ASSERT_CUDA_SUCCESS(cudaStreamDestroy(*stream));
+                delete stream;
               }}
   {
   }
@@ -74,14 +74,14 @@ class cuda_stream {
    * @return true If the owned stream has not been explicitly moved and is therefore non-null.
    * @return false If the owned stream has been explicitly moved and is therefore null.
    */
-  bool is_valid() const { return stream_ != nullptr; }
+  [[nodiscard]] bool is_valid() const { return stream_ != nullptr; }
 
   /**
    * @brief Get the value of the wrapped CUDA stream.
    *
    * @return cudaStream_t The wrapped CUDA stream.
    */
-  cudaStream_t value() const
+  [[nodiscard]] cudaStream_t value() const
   {
     RMM_LOGGING_ASSERT(is_valid());
     return *stream_;
@@ -97,7 +97,7 @@ class cuda_stream {
    *
    * @return rmm::cuda_stream_view The view of the CUDA stream
    */
-  cuda_stream_view view() const { return cuda_stream_view{value()}; }
+  [[nodiscard]] cuda_stream_view view() const { return cuda_stream_view{value()}; }
 
   /**
    * @brief Implicit conversion to cuda_stream_view

From 7c2653d54ce606ef96724096efc5e02386326e53 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 08:37:54 +1000
Subject: [PATCH 11/72] Remove incorrect fix for warning

---
 include/rmm/detail/error.hpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/rmm/detail/error.hpp b/include/rmm/detail/error.hpp
index 50ae70b2f..7c052ea8d 100644
--- a/include/rmm/detail/error.hpp
+++ b/include/rmm/detail/error.hpp
@@ -101,7 +101,7 @@ class out_of_range : public std::out_of_range {
   (__VA_ARGS__)
 #define GET_RMM_EXPECTS_MACRO(_1, _2, _3, NAME, ...) NAME
 #define RMM_EXPECTS_3(_condition, _exception_type, _reason)              \
-  (!!(_condition)) ? static_cast<void>(0) : throw(_exception_type)       \
+  (!!(_condition)) ? static_cast<void>(0) : throw _exception_type        \
   {                                                                      \
     "RMM failure at: " __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _reason \
   }
@@ -124,7 +124,7 @@ class out_of_range : public std::out_of_range {
   (__VA_ARGS__)
 #define GET_RMM_FAIL_MACRO(_1, _2, NAME, ...) NAME
 #define RMM_FAIL_2(_what, _exception_type) \
-  throw(_exception_type){"RMM failure at:" __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _what};
+  throw _exception_type{"RMM failure at:" __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _what};
 #define RMM_FAIL_1(_what) RMM_FAIL_2(_what, rmm::logic_error)
 
 /**
@@ -152,15 +152,15 @@ class out_of_range : public std::out_of_range {
   GET_RMM_CUDA_TRY_MACRO(__VA_ARGS__, RMM_CUDA_TRY_2, RMM_CUDA_TRY_1) \
   (__VA_ARGS__)
 #define GET_RMM_CUDA_TRY_MACRO(_1, _2, NAME, ...) NAME
-#define RMM_CUDA_TRY_2(_call, _exception_type)                                                \
-  do {                                                                                        \
-    cudaError_t const error = (_call);                                                        \
-    if (cudaSuccess != error) {                                                               \
-      cudaGetLastError();                                                                     \
-      throw(_exception_type){std::string{"CUDA error at: "} + __FILE__ + ":" +                \
-                             RMM_STRINGIFY(__LINE__) + ": " + cudaGetErrorName(error) + " " + \
-                             cudaGetErrorString(error)};                                      \
-    }                                                                                         \
+#define RMM_CUDA_TRY_2(_call, _exception_type)                                               \
+  do {                                                                                       \
+    cudaError_t const error = (_call);                                                       \
+    if (cudaSuccess != error) {                                                              \
+      cudaGetLastError();                                                                    \
+      throw _exception_type{std::string{"CUDA error at: "} + __FILE__ + ":" +                \
+                            RMM_STRINGIFY(__LINE__) + ": " + cudaGetErrorName(error) + " " + \
+                            cudaGetErrorString(error)};                                      \
+    }                                                                                        \
   } while (0)
 #define RMM_CUDA_TRY_1(_call) RMM_CUDA_TRY_2(_call, rmm::cuda_error)
 

From d108e80c3be02e0f531bf966983ac218aa1e24c6 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 08:38:02 +1000
Subject: [PATCH 12/72] include order

---
 tests/device_buffer_tests.cu | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/device_buffer_tests.cu b/tests/device_buffer_tests.cu
index 05ecef4c4..63841a67e 100644
--- a/tests/device_buffer_tests.cu
+++ b/tests/device_buffer_tests.cu
@@ -27,9 +27,11 @@
 #include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/mr/device/pool_memory_resource.hpp>
 
-#include <cuda_runtime_api.h>
 #include <thrust/equal.h>
 #include <thrust/sequence.h>
+
+#include <cuda_runtime_api.h>
+
 #include <cstddef>
 #include <random>
 

From d4ee0d4ce1cfd92e94b88193ee721e0ab44b1916 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 08:38:30 +1000
Subject: [PATCH 13/72] tidying

---
 include/rmm/cuda_device.hpp          |   6 +-
 tests/mr/device/aligned_mr_tests.cpp | 179 ++++++++++++++++++---------
 2 files changed, 125 insertions(+), 60 deletions(-)

diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp
index cad929de7..ab225490e 100644
--- a/include/rmm/cuda_device.hpp
+++ b/include/rmm/cuda_device.hpp
@@ -33,10 +33,10 @@ struct cuda_device_id {
    *
    * @param id The device's integer identifier
    */
-  explicit constexpr cuda_device_id(value_type id) noexcept : id_{id} {}
+  explicit constexpr cuda_device_id(value_type dev_id) noexcept : id_{dev_id} {}
 
   /// Returns the wrapped integer value
-  constexpr value_type value() const noexcept { return id_; }
+  [[nodiscard]] constexpr value_type value() const noexcept { return id_; }
 
  private:
   value_type id_;
@@ -52,7 +52,7 @@ namespace detail {
  */
 inline cuda_device_id current_device()
 {
-  int dev_id;
+  int dev_id{};
   RMM_CUDA_TRY(cudaGetDevice(&dev_id));
   return cuda_device_id{dev_id};
 }
diff --git a/tests/mr/device/aligned_mr_tests.cpp b/tests/mr/device/aligned_mr_tests.cpp
index 3eafd624e..a39dcdbfd 100644
--- a/tests/mr/device/aligned_mr_tests.cpp
+++ b/tests/mr/device/aligned_mr_tests.cpp
@@ -49,7 +49,9 @@ TEST(AlignedTest, ThrowOnNullUpstream)
 TEST(AlignedTest, ThrowOnInvalidAllocationAlignment)
 {
   mock_resource mock;
-  auto construct_alignment = [](auto* r, std::size_t a) { aligned_mock mr{r, a}; };
+  auto construct_alignment = [](auto* memres, std::size_t align) {
+    aligned_mock mr{memres, align};
+  };
   EXPECT_THROW(construct_alignment(&mock, 255), rmm::logic_error);
   EXPECT_NO_THROW(construct_alignment(&mock, 256));
   EXPECT_THROW(construct_alignment(&mock, 768), rmm::logic_error);
@@ -85,97 +87,160 @@ TEST(AlignedTest, DefaultAllocationAlignmentPassthrough)
   aligned_mock mr{&mock};
 
   cuda_stream_view stream;
-  void* pointer = reinterpret_cast<void*>(123);
+  auto const unaligned_address{123};
+  void* const pointer = reinterpret_cast<void*>(unaligned_address);
   // device_memory_resource aligns to 8.
-  EXPECT_CALL(mock, do_allocate(8, stream)).WillOnce(Return(pointer));
-  EXPECT_CALL(mock, do_deallocate(pointer, 8, stream)).Times(1);
-  EXPECT_EQ(mr.allocate(5, stream), pointer);
-  mr.deallocate(pointer, 5, stream);
+  {
+    auto const size{8};
+    EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer));
+    EXPECT_CALL(mock, do_deallocate(pointer, size, stream)).Times(1);
+  }
+
+  {
+    auto const size{5};
+    EXPECT_EQ(mr.allocate(size, stream), pointer);
+    mr.deallocate(pointer, size, stream);
+  }
 }
 
 TEST(AlignedTest, BelowAlignmentThresholdPassthrough)
 {
   mock_resource mock;
-  aligned_mock mr{&mock, 4096, 65536};
+  auto const alignment{4096};
+  auto const threshold{65536};
+  aligned_mock mr{&mock, alignment, threshold};
 
   cuda_stream_view stream;
-  void* pointer = reinterpret_cast<void*>(123);
+  auto const unaligned_address1{123};
+  void* const pointer = reinterpret_cast<void*>(unaligned_address1);
   // device_memory_resource aligns to 8.
-  EXPECT_CALL(mock, do_allocate(8, stream)).WillOnce(Return(pointer));
-  EXPECT_CALL(mock, do_deallocate(pointer, 8, stream)).Times(1);
-  EXPECT_EQ(mr.allocate(3, stream), pointer);
-  mr.deallocate(pointer, 3, stream);
-
-  void* pointer1 = reinterpret_cast<void*>(456);
-  EXPECT_CALL(mock, do_allocate(65528, stream)).WillOnce(Return(pointer1));
-  EXPECT_CALL(mock, do_deallocate(pointer1, 65528, stream)).Times(1);
-  EXPECT_EQ(mr.allocate(65528, stream), pointer1);
-  mr.deallocate(pointer1, 65528, stream);
+  {
+    auto const size{8};
+    EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer));
+    EXPECT_CALL(mock, do_deallocate(pointer, size, stream)).Times(1);
+  }
+
+  {
+    auto const size{3};
+    EXPECT_EQ(mr.allocate(size, stream), pointer);
+    mr.deallocate(pointer, size, stream);
+  }
+
+  {
+    auto const unaligned_address2{456};
+    auto const size{65528};
+    void* const pointer1 = reinterpret_cast<void*>(unaligned_address2);
+    EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer1));
+    EXPECT_CALL(mock, do_deallocate(pointer1, size, stream)).Times(1);
+    EXPECT_EQ(mr.allocate(size, stream), pointer1);
+    mr.deallocate(pointer1, size, stream);
+  }
 }
 
 TEST(AlignedTest, UpstreamAddressAlreadyAligned)
 {
   mock_resource mock;
-  aligned_mock mr{&mock, 4096, 65536};
+  auto const alignment{4096};
+  auto const threshold{65536};
+  aligned_mock mr{&mock, alignment, threshold};
 
   cuda_stream_view stream;
-  void* pointer = reinterpret_cast<void*>(4096);
-  EXPECT_CALL(mock, do_allocate(69376, stream)).WillOnce(Return(pointer));
-  EXPECT_CALL(mock, do_deallocate(pointer, 69376, stream)).Times(1);
-
-  EXPECT_EQ(mr.allocate(65536, stream), pointer);
-  mr.deallocate(pointer, 65536, stream);
+  auto const aligned_address{4096};
+  void* const pointer = reinterpret_cast<void*>(aligned_address);
+
+  {
+    auto const size{69376};
+    EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer));
+    EXPECT_CALL(mock, do_deallocate(pointer, size, stream)).Times(1);
+  }
+
+  {
+    auto const size{65536};
+    EXPECT_EQ(mr.allocate(size, stream), pointer);
+    mr.deallocate(pointer, size, stream);
+  }
 }
 
 TEST(AlignedTest, AlignUpstreamAddress)
 {
   mock_resource mock;
-  aligned_mock mr{&mock, 4096, 65536};
+  auto const alignment{4096};
+  auto const threshold{65536};
+  aligned_mock mr{&mock, alignment, threshold};
 
   cuda_stream_view stream;
-  void* pointer = reinterpret_cast<void*>(256);
-  EXPECT_CALL(mock, do_allocate(69376, stream)).WillOnce(Return(pointer));
-  EXPECT_CALL(mock, do_deallocate(pointer, 69376, stream)).Times(1);
-
-  void* expected_pointer = reinterpret_cast<void*>(4096);
-  EXPECT_EQ(mr.allocate(65536, stream), expected_pointer);
-  mr.deallocate(expected_pointer, 65536, stream);
+  {
+    auto const address{256};
+    void* const pointer = reinterpret_cast<void*>(address);
+    auto const size{69376};
+    EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer));
+    EXPECT_CALL(mock, do_deallocate(pointer, size, stream)).Times(1);
+  }
+
+  {
+    auto const address{4096};
+    void* const expected_pointer = reinterpret_cast<void*>(address);
+    auto const size{65536};
+    EXPECT_EQ(mr.allocate(size, stream), expected_pointer);
+    mr.deallocate(expected_pointer, size, stream);
+  }
 }
 
 TEST(AlignedTest, AlignMultiple)
 {
   mock_resource mock;
-  aligned_mock mr{&mock, 4096, 65536};
+  auto const alignment{4096};
+  auto const threshold{65536};
+  aligned_mock mr{&mock, alignment, threshold};
 
   cuda_stream_view stream;
-  void* pointer  = reinterpret_cast<void*>(256);
-  void* pointer1 = reinterpret_cast<void*>(131584);
-  void* pointer2 = reinterpret_cast<void*>(263168);
-  EXPECT_CALL(mock, do_allocate(69376, stream)).WillOnce(Return(pointer));
-  EXPECT_CALL(mock, do_allocate(77568, stream)).WillOnce(Return(pointer1));
-  EXPECT_CALL(mock, do_allocate(81664, stream)).WillOnce(Return(pointer2));
-  EXPECT_CALL(mock, do_deallocate(pointer, 69376, stream)).Times(1);
-  EXPECT_CALL(mock, do_deallocate(pointer1, 77568, stream)).Times(1);
-  EXPECT_CALL(mock, do_deallocate(pointer2, 81664, stream)).Times(1);
-
-  void* expected_pointer  = reinterpret_cast<void*>(4096);
-  void* expected_pointer1 = reinterpret_cast<void*>(135168);
-  void* expected_pointer2 = reinterpret_cast<void*>(266240);
-  EXPECT_EQ(mr.allocate(65536, stream), expected_pointer);
-  EXPECT_EQ(mr.allocate(73728, stream), expected_pointer1);
-  EXPECT_EQ(mr.allocate(77800, stream), expected_pointer2);
-  mr.deallocate(expected_pointer1, 73728, stream);
-  mr.deallocate(expected_pointer, 65536, stream);
-  mr.deallocate(expected_pointer2, 77800, stream);
+
+  {
+    auto const address1{256};
+    auto const address2{131584};
+    auto const address3{263168};
+    void* const pointer1 = reinterpret_cast<void*>(address1);
+    void* const pointer2 = reinterpret_cast<void*>(address2);
+    void* const pointer3 = reinterpret_cast<void*>(address3);
+    auto const size1{69376};
+    auto const size2{77568};
+    auto const size3{81664};
+    EXPECT_CALL(mock, do_allocate(size1, stream)).WillOnce(Return(pointer1));
+    EXPECT_CALL(mock, do_allocate(size2, stream)).WillOnce(Return(pointer2));
+    EXPECT_CALL(mock, do_allocate(size3, stream)).WillOnce(Return(pointer3));
+    EXPECT_CALL(mock, do_deallocate(pointer1, size1, stream)).Times(1);
+    EXPECT_CALL(mock, do_deallocate(pointer2, size2, stream)).Times(1);
+    EXPECT_CALL(mock, do_deallocate(pointer3, size3, stream)).Times(1);
+  }
+
+  {
+    auto const expected_address1{4096};
+    auto const expected_address2{135168};
+    auto const expected_address3{266240};
+    void* const expected_pointer1 = reinterpret_cast<void*>(expected_address1);
+    void* const expected_pointer2 = reinterpret_cast<void*>(expected_address2);
+    void* const expected_pointer3 = reinterpret_cast<void*>(expected_address3);
+    auto const size1{65536};
+    auto const size2{73728};
+    auto const size3{77800};
+    EXPECT_EQ(mr.allocate(size1, stream), expected_pointer1);
+    EXPECT_EQ(mr.allocate(size2, stream), expected_pointer2);
+    EXPECT_EQ(mr.allocate(size3, stream), expected_pointer3);
+    mr.deallocate(expected_pointer1, size1, stream);
+    mr.deallocate(expected_pointer2, size2, stream);
+    mr.deallocate(expected_pointer3, size3, stream);
+  }
 }
 
 TEST(AlignedTest, AlignRealPointer)
 {
-  aligned_real mr{rmm::mr::get_current_device_resource(), 4096, 65536};
-  void* alloc        = mr.allocate(65536);
+  auto const alignment{4096};
+  auto const threshold{65536};
+  aligned_real mr{rmm::mr::get_current_device_resource(), alignment, threshold};
+  void* alloc        = mr.allocate(threshold);
   auto const address = reinterpret_cast<std::size_t>(alloc);
-  EXPECT_TRUE(address % 4096 == 0);
-  mr.deallocate(alloc, 65536);
+  EXPECT_TRUE(address % alignment == 0);
+  mr.deallocate(alloc, threshold);
 }
 
 }  // namespace

From 7aafe385e5adf59ddebd3dd7e13592ec006e8144 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 10:46:07 +1000
Subject: [PATCH 14/72] Use temporary fork of gtest

---
 cmake/thirdparty/get_gtest.cmake | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmake/thirdparty/get_gtest.cmake b/cmake/thirdparty/get_gtest.cmake
index 0b9287734..7dd02eb8b 100644
--- a/cmake/thirdparty/get_gtest.cmake
+++ b/cmake/thirdparty/get_gtest.cmake
@@ -23,8 +23,8 @@ function(find_and_configure_gtest VERSION)
     GTest ${VERSION}
     GLOBAL_TARGETS gmock gmock_main gtest gtest_main GTest::gmock GTest::gtest GTest::gtest_main
     CPM_ARGS
-    GIT_REPOSITORY https://github.com/google/googletest.git
-    GIT_TAG release-${VERSION}
+    GIT_REPOSITORY https://github.com/harrism/googletest.git
+    GIT_TAG fix-clang-tidy-nolint
     GIT_SHALLOW TRUE
     OPTIONS "INSTALL_GTEST OFF"
             # googletest >= 1.10.0 provides a cmake config file -- use it if it exists
@@ -40,4 +40,4 @@ function(find_and_configure_gtest VERSION)
 
 endfunction()
 
-find_and_configure_gtest(1.10.0)
+find_and_configure_gtest(1.11.0)

From b7b43007fd43cc07d50c3356503757681443e268 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 10:46:46 +1000
Subject: [PATCH 15/72] function-cognitive-complexity.IgnoreMacros=1

---
 .clang-tidy | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.clang-tidy b/.clang-tidy
index a0bf9994a..b8fba1f09 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -54,4 +54,6 @@ CheckOptions:
     value:           'mr|os'
   - key:             readability-identifier-length.IgnoredVariableNames
     value:           'mr|_'
+  - key:             readability-function-cognitive-complexity.IgnoreMacros
+    value:           '1'
 ...

From 6143ffa15287666fe3845898d24443e17384808a Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 10:47:00 +1000
Subject: [PATCH 16/72] Tidy cuda_async_mr_tests

---
 tests/mr/device/cuda_async_mr_tests.cpp | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/tests/mr/device/cuda_async_mr_tests.cpp b/tests/mr/device/cuda_async_mr_tests.cpp
index a43f68fab..4bf0c3d5b 100644
--- a/tests/mr/device/cuda_async_mr_tests.cpp
+++ b/tests/mr/device/cuda_async_mr_tests.cpp
@@ -19,8 +19,7 @@
 
 #include <gtest/gtest.h>
 
-namespace rmm {
-namespace test {
+namespace rmm::test {
 namespace {
 
 using cuda_async_mr = rmm::mr::cuda_async_memory_resource;
@@ -38,24 +37,24 @@ TEST(PoolTest, ThrowIfNotSupported)
 #if defined(RMM_CUDA_MALLOC_ASYNC_SUPPORT)
 TEST(PoolTest, ExplicitInitialPoolSize)
 {
-  cuda_async_mr mr{100};
-  void* p;
-  EXPECT_NO_THROW(p = mr.allocate(100));
-  EXPECT_NO_THROW(mr.deallocate(p, 100));
+  const auto pool_init_size{100};
+  cuda_async_mr mr{pool_init_size};
+  void* ptr = mr.allocate(pool_init_size);
+  mr.deallocate(ptr, pool_init_size);
   RMM_CUDA_TRY(cudaDeviceSynchronize());
 }
 
 TEST(PoolTest, ExplicitReleaseThreshold)
 {
-  cuda_async_mr mr{100, 1000};
-  void* p;
-  EXPECT_NO_THROW(p = mr.allocate(100));
-  EXPECT_NO_THROW(mr.deallocate(p, 100));
+  const auto pool_init_size{100};
+  const auto pool_release_threshold{1000};
+  cuda_async_mr mr{pool_init_size, pool_release_threshold};
+  void* ptr = mr.allocate(pool_init_size);
+  mr.deallocate(ptr, pool_init_size);
   RMM_CUDA_TRY(cudaDeviceSynchronize());
 }
 
 #endif
 
 }  // namespace
-}  // namespace test
-}  // namespace rmm
+}  // namespace rmm::test

From 2419b1c8e20a6e80637c00f59d88d5b70d6c0cf0 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 11:23:34 +1000
Subject: [PATCH 17/72] Factor out byte_literals for reuse

---
 tests/byte_literals.hpp     | 35 +++++++++++++++++++++++++++++++++++
 tests/mr/device/mr_test.hpp | 12 +++---------
 2 files changed, 38 insertions(+), 9 deletions(-)
 create mode 100644 tests/byte_literals.hpp

diff --git a/tests/byte_literals.hpp b/tests/byte_literals.hpp
new file mode 100644
index 000000000..b22e98a1c
--- /dev/null
+++ b/tests/byte_literals.hpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstdint>
+
+namespace rmm::test {
+
+constexpr auto kilo{long{1} << 10};
+constexpr auto mega{long{1} << 20};
+constexpr auto giga{long{1} << 30};
+constexpr auto tera{long{1} << 40};
+constexpr auto peta{long{1} << 50};
+
+// user-defined Byte literals
+constexpr unsigned long long operator""_B(unsigned long long val) { return val; }
+constexpr unsigned long long operator""_KiB(unsigned long long const val) { return kilo * val; }
+constexpr unsigned long long operator""_MiB(unsigned long long const val) { return mega * val; }
+constexpr unsigned long long operator""_GiB(unsigned long long const val) { return giga * val; }
+constexpr unsigned long long operator""_TiB(unsigned long long const val) { return tera * val; }
+constexpr unsigned long long operator""_PiB(unsigned long long const val) { return peta * val; }
+
+}  // namespace rmm::test
diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp
index 27da69fbe..72387d71d 100644
--- a/tests/mr/device/mr_test.hpp
+++ b/tests/mr/device/mr_test.hpp
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include <gtest/gtest.h>
+#include "../../byte_literals.hpp"
 
 #include <rmm/cuda_stream.hpp>
 #include <rmm/cuda_stream_view.hpp>
@@ -31,6 +31,8 @@
 #include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/mr/device/pool_memory_resource.hpp>
 
+#include <gtest/gtest.h>
+
 #include <cuda_runtime_api.h>
 
 #include <cstddef>
@@ -61,14 +63,6 @@ inline bool is_device_memory(void* p)
 #endif
 }
 
-// some useful allocation sizes
-constexpr long operator""_B(unsigned long long const x) { return x; }
-constexpr long operator""_KiB(unsigned long long const x) { return x * (long{1} << 10); }
-constexpr long operator""_MiB(unsigned long long const x) { return x * (long{1} << 20); }
-constexpr long operator""_GiB(unsigned long long const x) { return x * (long{1} << 30); }
-constexpr long operator""_TiB(unsigned long long const x) { return x * (long{1} << 40); }
-constexpr long operator""_PiB(unsigned long long const x) { return x * (long{1} << 50); }
-
 struct allocation {
   void* p{nullptr};
   std::size_t size{0};

From 507176a0aeca7b9d2379477f456f5152ec38fcda Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 11:23:47 +1000
Subject: [PATCH 18/72] tidy limiting_mr_tests

---
 tests/mr/device/limiting_mr_tests.cpp | 84 ++++++++++++++++-----------
 1 file changed, 50 insertions(+), 34 deletions(-)

diff --git a/tests/mr/device/limiting_mr_tests.cpp b/tests/mr/device/limiting_mr_tests.cpp
index 64ec688be..3bc643abc 100644
--- a/tests/mr/device/limiting_mr_tests.cpp
+++ b/tests/mr/device/limiting_mr_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,65 +14,81 @@
  * limitations under the License.
  */
 
+#include "../../byte_literals.hpp"
+
 #include <rmm/detail/error.hpp>
 #include <rmm/device_buffer.hpp>
 #include <rmm/mr/device/limiting_resource_adaptor.hpp>
 
 #include <gtest/gtest.h>
-#include "mr_test.hpp"
 
-namespace rmm {
-namespace test {
+namespace rmm::test {
 namespace {
+
 using Limiting_adaptor = rmm::mr::limiting_resource_adaptor<rmm::mr::device_memory_resource>;
+
 TEST(LimitingTest, ThrowOnNullUpstream)
 {
-  auto construct_nullptr = []() { Limiting_adaptor mr{nullptr, 5_MiB}; };
+  auto const max_size{5_MiB};
+  auto construct_nullptr = []() { Limiting_adaptor mr{nullptr, max_size}; };
   EXPECT_THROW(construct_nullptr(), rmm::logic_error);
 }
 
 TEST(LimitingTest, TooBig)
 {
-  Limiting_adaptor mr{rmm::mr::get_current_device_resource(), 1_MiB};
-  EXPECT_THROW(mr.allocate(5_MiB), rmm::bad_alloc);
+  auto const max_size{5_MiB};
+  Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
+  EXPECT_THROW(mr.allocate(max_size + 1), rmm::bad_alloc);
 }
 
 TEST(LimitingTest, UnderLimitDueToFrees)
 {
-  Limiting_adaptor mr{rmm::mr::get_current_device_resource(), 10_MiB};
-  auto p1 = mr.allocate(4_MiB);
-  EXPECT_EQ(mr.get_allocated_bytes(), 4_MiB);
-  EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 6_MiB);
-  auto p2 = mr.allocate(4_MiB);
-  EXPECT_EQ(mr.get_allocated_bytes(), 8_MiB);
-  EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 2_MiB);
-  mr.deallocate(p1, 4_MiB);
-  EXPECT_EQ(mr.get_allocated_bytes(), 4_MiB);
-  EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 6_MiB);
+  auto const max_size{10_MiB};
+  Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
+  auto const size1{4_MiB};
+  auto* ptr1           = mr.allocate(size1);
+  auto allocated_bytes = size1;
+  EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes);
+  EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes);
+  auto* ptr2 = mr.allocate(size1);
+  allocated_bytes += size1;
+  EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes);
+  EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes);
+  mr.deallocate(ptr1, size1);
+  allocated_bytes -= size1;
+  EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes);
+  EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes);
   // note that we don't keep track of fragmentation or things like page size
   // so this should fill 100% of the memory even though it is probably over.
-  EXPECT_NO_THROW(mr.allocate(6_MiB));
-  EXPECT_EQ(mr.get_allocated_bytes(), 10_MiB);
+  auto const size2{6_MiB};
+  auto* ptr3 = mr.allocate(size2);
+  allocated_bytes += size2;
+  EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes);
   EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 0);
-  mr.deallocate(p2, 4_MiB);
+  mr.deallocate(ptr2, size1);
+  mr.deallocate(ptr3, size2);
 }
 
 TEST(LimitingTest, OverLimit)
 {
-  Limiting_adaptor mr{rmm::mr::get_current_device_resource(), 10_MiB};
-  auto p1 = mr.allocate(4_MiB);
-  EXPECT_EQ(mr.get_allocated_bytes(), 4_MiB);
-  EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 6_MiB);
-  auto p2 = mr.allocate(4_MiB);
-  EXPECT_EQ(mr.get_allocated_bytes(), 8_MiB);
-  EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 2_MiB);
-  EXPECT_THROW(mr.allocate(3_MiB), rmm::bad_alloc);
-  EXPECT_EQ(mr.get_allocated_bytes(), 8_MiB);
-  EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 2_MiB);
-  mr.deallocate(p1, 4_MiB);
-  mr.deallocate(p2, 4_MiB);
+  auto const max_size{10_MiB};
+  Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
+  auto const size1{4_MiB};
+  auto* ptr1           = mr.allocate(size1);
+  auto allocated_bytes = size1;
+  EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes);
+  EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes);
+  auto* ptr2 = mr.allocate(size1);
+  allocated_bytes += size1;
+  EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes);
+  EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes);
+  auto const size2{3_MiB};
+  EXPECT_THROW(mr.allocate(size2), rmm::bad_alloc);
+  EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes);
+  EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes);
+  mr.deallocate(ptr1, 4_MiB);
+  mr.deallocate(ptr2, 4_MiB);
 }
 
 }  // namespace
-}  // namespace test
-}  // namespace rmm
+}  // namespace rmm::test

From 3820f1bbe53d632152723fb0ce247213ee318e2c Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 13:02:00 +1000
Subject: [PATCH 19/72] tidy logger_tests

---
 tests/logger_tests.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/logger_tests.cpp b/tests/logger_tests.cpp
index 1da32a137..498a96722 100644
--- a/tests/logger_tests.cpp
+++ b/tests/logger_tests.cpp
@@ -220,8 +220,8 @@ TEST(Adaptor, STDOUT)
 
   auto const size{100};
 
-  auto* p = log_mr.allocate(size);
-  log_mr.deallocate(p, size);
+  auto* ptr = log_mr.allocate(size);
+  log_mr.deallocate(ptr, size);
 
   std::string output = testing::internal::GetCapturedStdout();
   std::string header = output.substr(0, output.find('\n'));
@@ -238,8 +238,8 @@ TEST(Adaptor, STDERR)
 
   auto const size{100};
 
-  auto* p = log_mr.allocate(size);
-  log_mr.deallocate(p, size);
+  auto* ptr = log_mr.allocate(size);
+  log_mr.deallocate(ptr, size);
 
   std::string output = testing::internal::GetCapturedStderr();
   std::string header = output.substr(0, output.find('\n'));

From 2580056f8c16821cf09e6f872a0264209702cb24 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 13:57:56 +1000
Subject: [PATCH 20/72] tidy mr_tests

---
 include/rmm/detail/aligned.hpp             |  11 ++
 tests/mr/device/mr_multithreaded_tests.cpp |  53 +++++----
 tests/mr/device/mr_test.hpp                | 128 +++++++++++----------
 tests/mr/device/mr_tests.cpp               |  35 +++---
 4 files changed, 124 insertions(+), 103 deletions(-)

diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp
index 8b15d5463..c949ad035 100644
--- a/include/rmm/detail/aligned.hpp
+++ b/include/rmm/detail/aligned.hpp
@@ -90,6 +90,12 @@ constexpr bool is_aligned(std::size_t value, std::size_t align_bytes) noexcept
   return value == align_down(value, align_bytes);
 }
 
+inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT)
+{
+  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+  return rmm::detail::is_aligned(reinterpret_cast<ptrdiff_t>(ptr), alignment);
+}
+
 /**
  * @brief Allocates sufficient memory to satisfy the requested size `bytes` with
  * alignment `alignment` using the unary callable `alloc` to allocate memory.
@@ -129,6 +135,7 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc)
   char* const original = static_cast<char*>(alloc(padded_allocation_size));
 
   // account for storage of offset immediately prior to the aligned pointer
+  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
   void* aligned{original + sizeof(std::ptrdiff_t)};
 
   // std::align modifies `aligned` to point to the first aligned location
@@ -138,6 +145,7 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc)
   std::ptrdiff_t offset = static_cast<char*>(aligned) - original;
 
   // Store the offset immediately before the aligned pointer
+  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
   *(static_cast<std::ptrdiff_t*>(aligned) - 1) = offset;
 
   return aligned;
@@ -159,13 +167,16 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc)
  * @tparam Dealloc A unary callable type that deallocates memory.
  */
 template <typename Dealloc>
+// NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
 void aligned_deallocate(void* ptr, std::size_t bytes, std::size_t alignment, Dealloc dealloc)
 {
   (void)alignment;
 
   // Get offset from the location immediately prior to the aligned pointer
+  // NOLINTNEXTLINE
   std::ptrdiff_t const offset = *(reinterpret_cast<std::ptrdiff_t*>(ptr) - 1);
 
+  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
   void* const original = static_cast<char*>(ptr) - offset;
 
   dealloc(original);
diff --git a/tests/mr/device/mr_multithreaded_tests.cpp b/tests/mr/device/mr_multithreaded_tests.cpp
index 233686f7e..a80af38fe 100644
--- a/tests/mr/device/mr_multithreaded_tests.cpp
+++ b/tests/mr/device/mr_multithreaded_tests.cpp
@@ -28,8 +28,7 @@
 #include <thread>
 #include <vector>
 
-namespace rmm {
-namespace test {
+namespace rmm::test {
 namespace {
 
 struct mr_test_mt : public mr_test {
@@ -49,11 +48,13 @@ void spawn_n(std::size_t num_threads, Task task, Arguments&&... args)
 {
   std::vector<std::thread> threads;
   threads.reserve(num_threads);
-  for (std::size_t i = 0; i < num_threads; ++i)
+  for (std::size_t i = 0; i < num_threads; ++i) {
     threads.emplace_back(std::thread(task, std::forward<Arguments>(args)...));
+  }
 
-  for (auto& t : threads)
-    t.join();
+  for (auto& thread : threads) {
+    thread.join();
+  }
 }
 
 template <typename Task, typename... Arguments>
@@ -102,7 +103,7 @@ TEST_P(mr_test_mt, SetCurrentDeviceResource_mt)
 
 TEST_P(mr_test_mt, SetCurrentDeviceResourcePerThread_mt)
 {
-  int num_devices;
+  int num_devices{};
   RMM_CUDA_TRY(cudaGetDeviceCount(&num_devices));
 
   std::vector<std::thread> threads;
@@ -111,7 +112,7 @@ TEST_P(mr_test_mt, SetCurrentDeviceResourcePerThread_mt)
     threads.emplace_back(std::thread{
       [mr = this->mr.get()](auto dev_id) {
         RMM_CUDA_TRY(cudaSetDevice(dev_id));
-        rmm::mr::device_memory_resource* old;
+        rmm::mr::device_memory_resource* old{};
         EXPECT_NO_THROW(old = rmm::mr::set_current_device_resource(mr));
         EXPECT_NE(nullptr, old);
         // initial resource for this device should be CUDA mr
@@ -129,8 +130,9 @@ TEST_P(mr_test_mt, SetCurrentDeviceResourcePerThread_mt)
       i});
   }
 
-  for (auto& t : threads)
-    t.join();
+  for (auto& thread : threads) {
+    thread.join();
+  }
 }
 
 TEST_P(mr_test_mt, AllocateDefaultStream)
@@ -145,22 +147,31 @@ TEST_P(mr_test_mt, AllocateOnStream)
 
 TEST_P(mr_test_mt, RandomAllocationsDefaultStream)
 {
-  spawn(test_random_allocations, this->mr.get(), 100, 5_MiB, rmm::cuda_stream_view{});
+  spawn(test_random_allocations,
+        this->mr.get(),
+        default_num_allocations,
+        default_max_size,
+        rmm::cuda_stream_view{});
 }
 
 TEST_P(mr_test_mt, RandomAllocationsStream)
 {
-  spawn(test_random_allocations, this->mr.get(), 100, 5_MiB, this->stream.view());
+  spawn(test_random_allocations,
+        this->mr.get(),
+        default_num_allocations,
+        default_max_size,
+        this->stream.view());
 }
 
 TEST_P(mr_test_mt, MixedRandomAllocationFreeDefaultStream)
 {
-  spawn(test_mixed_random_allocation_free, this->mr.get(), 5_MiB, rmm::cuda_stream_view{});
+  spawn(
+    test_mixed_random_allocation_free, this->mr.get(), default_max_size, rmm::cuda_stream_view{});
 }
 
 TEST_P(mr_test_mt, MixedRandomAllocationFreeStream)
 {
-  spawn(test_mixed_random_allocation_free, this->mr.get(), 5_MiB, this->stream.view());
+  spawn(test_mixed_random_allocation_free, this->mr.get(), default_max_size, this->stream.view());
 }
 
 void allocate_loop(rmm::mr::device_memory_resource* mr,
@@ -193,14 +204,11 @@ void deallocate_loop(rmm::mr::device_memory_resource* mr,
 {
   for (std::size_t i = 0; i < num_allocations;) {
     std::lock_guard<std::mutex> lock(mtx);
-    if (allocations.empty())
-      continue;
-    else {
-      i++;
-      allocation alloc = allocations.front();
-      allocations.pop_front();
-      EXPECT_NO_THROW(mr->deallocate(alloc.p, alloc.size, stream));
-    }
+    if (allocations.empty()) { continue; }
+    i++;
+    allocation alloc = allocations.front();
+    allocations.pop_front();
+    EXPECT_NO_THROW(mr->deallocate(alloc.ptr, alloc.size, stream));
   }
 }
 
@@ -250,5 +258,4 @@ TEST_P(mr_test_mt, AllocFreeDifferentThreadsDifferentStream)
 }
 
 }  // namespace
-}  // namespace test
-}  // namespace rmm
+}  // namespace rmm::test
diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp
index 72387d71d..35ff7d544 100644
--- a/tests/mr/device/mr_test.hpp
+++ b/tests/mr/device/mr_test.hpp
@@ -20,6 +20,7 @@
 
 #include <rmm/cuda_stream.hpp>
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/detail/aligned.hpp>
 #include <rmm/mr/device/arena_memory_resource.hpp>
 #include <rmm/mr/device/binning_memory_resource.hpp>
 #include <rmm/mr/device/cuda_async_memory_resource.hpp>
@@ -39,23 +40,18 @@
 #include <cstdint>
 #include <functional>
 #include <random>
+#include <utility>
 
-namespace rmm {
-namespace test {
-
-inline bool is_pointer_aligned(void* p, std::size_t alignment = 256)
-{
-  return (0 == reinterpret_cast<uintptr_t>(p) % alignment);
-}
+namespace rmm::test {
 
 /**
  * @brief Returns if a pointer points to a device memory or managed memory
  * allocation.
  */
-inline bool is_device_memory(void* p)
+inline bool is_device_memory(void* ptr)
 {
   cudaPointerAttributes attributes{};
-  if (cudaSuccess != cudaPointerGetAttributes(&attributes, p)) { return false; }
+  if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; }
 #if CUDART_VERSION < 10000  // memoryType is deprecated in CUDA 10
   return attributes.memoryType == cudaMemoryTypeDevice;
 #else
@@ -63,10 +59,15 @@ inline bool is_device_memory(void* p)
 #endif
 }
 
+enum size_in_bytes : size_t {};
+
+constexpr auto default_num_allocations{100};
+constexpr size_in_bytes default_max_size{5_MiB};
+
 struct allocation {
-  void* p{nullptr};
+  void* ptr{nullptr};
   std::size_t size{0};
-  allocation(void* _p, std::size_t _size) : p{_p}, size{_size} {}
+  allocation(void* ptr, std::size_t size) : ptr{ptr}, size{size} {}
   allocation() = default;
 };
 
@@ -75,36 +76,33 @@ struct allocation {
 inline void test_get_current_device_resource()
 {
   EXPECT_NE(nullptr, rmm::mr::get_current_device_resource());
-  void* p{nullptr};
-  EXPECT_NO_THROW(p = rmm::mr::get_current_device_resource()->allocate(1_MiB));
-  EXPECT_NE(nullptr, p);
-  EXPECT_TRUE(is_pointer_aligned(p));
-  EXPECT_TRUE(is_device_memory(p));
-  EXPECT_NO_THROW(rmm::mr::get_current_device_resource()->deallocate(p, 1_MiB));
+  void* ptr = rmm::mr::get_current_device_resource()->allocate(1_MiB);
+  EXPECT_NE(nullptr, ptr);
+  EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr));
+  EXPECT_TRUE(is_device_memory(ptr));
+  rmm::mr::get_current_device_resource()->deallocate(ptr, 1_MiB);
 }
 
 inline void test_allocate(rmm::mr::device_memory_resource* mr,
                           std::size_t bytes,
                           cuda_stream_view stream = {})
 {
-  void* p{nullptr};
-  EXPECT_NO_THROW(p = mr->allocate(bytes));
-  if (not stream.is_default()) stream.synchronize();
-  EXPECT_NE(nullptr, p);
-  EXPECT_TRUE(is_pointer_aligned(p));
-  EXPECT_TRUE(is_device_memory(p));
-  EXPECT_NO_THROW(mr->deallocate(p, bytes));
-  if (not stream.is_default()) stream.synchronize();
+  void* ptr = mr->allocate(bytes);
+  if (not stream.is_default()) { stream.synchronize(); }
+  EXPECT_NE(nullptr, ptr);
+  EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr));
+  EXPECT_TRUE(is_device_memory(ptr));
+  mr->deallocate(ptr, bytes);
+  if (not stream.is_default()) { stream.synchronize(); }
 }
 
 inline void test_various_allocations(rmm::mr::device_memory_resource* mr, cuda_stream_view stream)
 {
   // test allocating zero bytes on non-default stream
   {
-    void* p{nullptr};
-    EXPECT_NO_THROW(p = mr->allocate(0, stream));
+    void* ptr = mr->allocate(0, stream);
     stream.synchronize();
-    EXPECT_NO_THROW(mr->deallocate(p, 0, stream));
+    EXPECT_NO_THROW(mr->deallocate(ptr, 0, stream));
     stream.synchronize();
   }
 
@@ -115,15 +113,15 @@ inline void test_various_allocations(rmm::mr::device_memory_resource* mr, cuda_s
 
   // should fail to allocate too much
   {
-    void* p{nullptr};
-    EXPECT_THROW(p = mr->allocate(1_PiB, stream), rmm::bad_alloc);
-    EXPECT_EQ(nullptr, p);
+    void* ptr{nullptr};
+    EXPECT_THROW(ptr = mr->allocate(1_PiB, stream), rmm::bad_alloc);
+    EXPECT_EQ(nullptr, ptr);
   }
 }
 
 inline void test_random_allocations(rmm::mr::device_memory_resource* mr,
-                                    std::size_t num_allocations = 100,
-                                    std::size_t max_size        = 5_MiB,
+                                    std::size_t num_allocations = default_num_allocations,
+                                    size_in_bytes max_size      = default_max_size,
                                     cuda_stream_view stream     = {})
 {
   std::vector<allocation> allocations(num_allocations);
@@ -131,24 +129,25 @@ inline void test_random_allocations(rmm::mr::device_memory_resource* mr,
   std::default_random_engine generator;
   std::uniform_int_distribution<std::size_t> distribution(1, max_size);
 
-  // 100 allocations from [0,5MB)
-  std::for_each(
-    allocations.begin(), allocations.end(), [&generator, &distribution, stream, mr](allocation& a) {
-      a.size = distribution(generator);
-      EXPECT_NO_THROW(a.p = mr->allocate(a.size, stream));
-      if (not stream.is_default()) stream.synchronize();
-      EXPECT_NE(nullptr, a.p);
-      EXPECT_TRUE(is_pointer_aligned(a.p));
-    });
-
-  std::for_each(allocations.begin(), allocations.end(), [stream, mr](allocation& a) {
-    EXPECT_NO_THROW(mr->deallocate(a.p, a.size, stream));
-    if (not stream.is_default()) stream.synchronize();
+  // num_allocations allocations from [0,max_size)
+  std::for_each(allocations.begin(),
+                allocations.end(),
+                [&generator, &distribution, stream, mr](allocation& alloc) {
+                  alloc.size = distribution(generator);
+                  EXPECT_NO_THROW(alloc.ptr = mr->allocate(alloc.size, stream));
+                  if (not stream.is_default()) { stream.synchronize(); }
+                  EXPECT_NE(nullptr, alloc.ptr);
+                  EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc.ptr));
+                });
+
+  std::for_each(allocations.begin(), allocations.end(), [stream, mr](allocation& alloc) {
+    EXPECT_NO_THROW(mr->deallocate(alloc.ptr, alloc.size, stream));
+    if (not stream.is_default()) { stream.synchronize(); }
   });
 }
 
 inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* mr,
-                                              std::size_t max_size    = 5_MiB,
+                                              size_in_bytes max_size  = default_max_size,
                                               cuda_stream_view stream = {})
 {
   std::default_random_engine generator;
@@ -156,8 +155,9 @@ inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* m
 
   std::uniform_int_distribution<std::size_t> size_distribution(1, max_size);
 
-  constexpr int allocation_probability = 53;  // percent
-  std::uniform_int_distribution<int> op_distribution(0, 99);
+  constexpr int allocation_probability{53};  // percent
+  constexpr int max_probability{99};
+  std::uniform_int_distribution<int> op_distribution(0, max_probability);
   std::uniform_int_distribution<int> index_distribution(0, num_allocations - 1);
 
   std::size_t active_allocations{0};
@@ -178,14 +178,14 @@ inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* m
       allocation_count++;
       EXPECT_NO_THROW(allocations.emplace_back(mr->allocate(size, stream), size));
       auto new_allocation = allocations.back();
-      EXPECT_NE(nullptr, new_allocation.p);
-      EXPECT_TRUE(is_pointer_aligned(new_allocation.p));
+      EXPECT_NE(nullptr, new_allocation.ptr);
+      EXPECT_TRUE(rmm::detail::is_pointer_aligned(new_allocation.ptr));
     } else {
-      std::size_t index = index_distribution(generator) % active_allocations;
+      auto const index = static_cast<int>(index_distribution(generator) % active_allocations);
       active_allocations--;
       allocation to_free = allocations[index];
       allocations.erase(std::next(allocations.begin(), index));
-      EXPECT_NO_THROW(mr->deallocate(to_free.p, to_free.size, stream));
+      EXPECT_NO_THROW(mr->deallocate(to_free.ptr, to_free.size, stream));
     }
   }
 
@@ -197,18 +197,21 @@ using MRFactoryFunc = std::function<std::shared_ptr<rmm::mr::device_memory_resou
 
 /// Encapsulates a `device_memory_resource` factory function and associated name
 struct mr_factory {
-  mr_factory(std::string const& name, MRFactoryFunc f) : name{name}, f{f} {}
+  mr_factory(std::string name, MRFactoryFunc factory)
+    : name{std::move(name)}, factory{std::move(factory)}
+  {
+  }
 
-  std::string name;  ///< Name to associate with tests that use this factory
-  MRFactoryFunc f;   ///< Factory function that returns shared_ptr to `device_memory_resource`
-                     ///< instance to use in test
+  std::string name;       ///< Name to associate with tests that use this factory
+  MRFactoryFunc factory;  ///< Factory function that returns shared_ptr to `device_memory_resource`
+                          ///< instance to use in test
 };
 
 /// Test fixture class value-parameterized on different `mr_factory`s
 struct mr_test : public ::testing::TestWithParam<mr_factory> {
   void SetUp() override
   {
-    auto factory = GetParam().f;
+    auto factory = GetParam().factory;
     mr           = factory();
   }
 
@@ -243,9 +246,12 @@ inline auto make_binning()
   auto pool = make_pool();
   // Add a binning_memory_resource with fixed-size bins of sizes 256, 512, 1024, 2048 and 4096KiB
   // Larger allocations will use the pool resource
-  auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(pool, 18, 22);
+  auto const bin_range_start{18};
+  auto const bin_range_end{22};
+
+  auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(
+    pool, bin_range_start, bin_range_end);
   return mr;
 }
 
-}  // namespace test
-}  // namespace rmm
+}  // namespace rmm::test
diff --git a/tests/mr/device/mr_tests.cpp b/tests/mr/device/mr_tests.cpp
index 5a8ac02b1..7dbe225b9 100644
--- a/tests/mr/device/mr_tests.cpp
+++ b/tests/mr/device/mr_tests.cpp
@@ -20,21 +20,20 @@
 
 #include <gtest/gtest.h>
 
-namespace rmm {
-namespace test {
+namespace rmm::test {
 namespace {
 
-INSTANTIATE_TEST_CASE_P(ResourceTests,
-                        mr_test,
-                        ::testing::Values(mr_factory{"CUDA", &make_cuda},
+INSTANTIATE_TEST_SUITE_P(ResourceTests,
+                         mr_test,
+                         ::testing::Values(mr_factory{"CUDA", &make_cuda},
 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
-                                          mr_factory{"CUDA_Async", &make_cuda_async},
+                                           mr_factory{"CUDA_Async", &make_cuda_async},
 #endif
-                                          mr_factory{"Managed", &make_managed},
-                                          mr_factory{"Pool", &make_pool},
-                                          mr_factory{"Arena", &make_arena},
-                                          mr_factory{"Binning", &make_binning}),
-                        [](auto const& info) { return info.param.name; });
+                                           mr_factory{"Managed", &make_managed},
+                                           mr_factory{"Pool", &make_pool},
+                                           mr_factory{"Arena", &make_arena},
+                                           mr_factory{"Binning", &make_binning}),
+                         [](auto const& info) { return info.param.name; });
 
 TEST(DefaultTest, CurrentDeviceResourceIsCUDA)
 {
@@ -46,8 +45,7 @@ TEST(DefaultTest, UseCurrentDeviceResource) { test_get_current_device_resource()
 
 TEST(DefaultTest, GetCurrentDeviceResource)
 {
-  rmm::mr::device_memory_resource* mr;
-  EXPECT_NO_THROW(mr = rmm::mr::get_current_device_resource());
+  auto* mr = rmm::mr::get_current_device_resource();
   EXPECT_NE(nullptr, mr);
   EXPECT_TRUE(mr->is_equal(rmm::mr::cuda_memory_resource{}));
 }
@@ -84,17 +82,17 @@ TEST_P(mr_test, RandomAllocations) { test_random_allocations(this->mr.get()); }
 
 TEST_P(mr_test, RandomAllocationsStream)
 {
-  test_random_allocations(this->mr.get(), 100, 5_MiB, this->stream);
+  test_random_allocations(this->mr.get(), default_num_allocations, default_max_size, this->stream);
 }
 
 TEST_P(mr_test, MixedRandomAllocationFree)
 {
-  test_mixed_random_allocation_free(this->mr.get(), 5_MiB, cuda_stream_view{});
+  test_mixed_random_allocation_free(this->mr.get(), default_max_size, cuda_stream_view{});
 }
 
 TEST_P(mr_test, MixedRandomAllocationFreeStream)
 {
-  test_mixed_random_allocation_free(this->mr.get(), 5_MiB, this->stream);
+  test_mixed_random_allocation_free(this->mr.get(), default_max_size, this->stream);
 }
 
 TEST_P(mr_test, GetMemInfo)
@@ -102,7 +100,7 @@ TEST_P(mr_test, GetMemInfo)
   if (this->mr->supports_get_mem_info()) {
     std::pair<std::size_t, std::size_t> mem_info;
     EXPECT_NO_THROW(mem_info = this->mr->get_mem_info(rmm::cuda_stream_view{}));
-    std::size_t allocation_size = 16 * 256;
+    const auto allocation_size{16 * 256};
     void* ptr{nullptr};
     EXPECT_NO_THROW(ptr = this->mr->allocate(allocation_size));
     EXPECT_NO_THROW(mem_info = this->mr->get_mem_info(rmm::cuda_stream_view{}));
@@ -111,5 +109,4 @@ TEST_P(mr_test, GetMemInfo)
   }
 }
 }  // namespace
-}  // namespace test
-}  // namespace rmm
+}  // namespace rmm::test

From 633261a0c88830cc3bfd519f3fb9f2efe15f0363 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 13:58:13 +1000
Subject: [PATCH 21/72] Fix device_scalar_test hang

---
 include/rmm/device_scalar.hpp | 4 ++--
 tests/device_scalar_tests.cpp | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp
index f44ba1c28..f48aba84b 100644
--- a/include/rmm/device_scalar.hpp
+++ b/include/rmm/device_scalar.hpp
@@ -47,9 +47,9 @@ class device_scalar {
   ~device_scalar() = default;
 
   RMM_EXEC_CHECK_DISABLE
-  device_scalar(device_scalar&&) = default;
+  device_scalar(device_scalar&&) noexcept = default;
 
-  device_scalar& operator=(device_scalar&&) = default;
+  device_scalar& operator=(device_scalar&&) noexcept = default;
 
   /**
    * @brief Copy ctor is deleted as it doesn't allow a stream argument
diff --git a/tests/device_scalar_tests.cpp b/tests/device_scalar_tests.cpp
index f58655951..65f963132 100644
--- a/tests/device_scalar_tests.cpp
+++ b/tests/device_scalar_tests.cpp
@@ -30,10 +30,10 @@
 
 template <typename T>
 struct DeviceScalarTest : public ::testing::Test {
+  std::default_random_engine generator{};
   T value{};
   rmm::cuda_stream stream{};
   rmm::mr::device_memory_resource* mr{rmm::mr::get_current_device_resource()};
-  std::default_random_engine generator{};
 
   DeviceScalarTest() : value{random_value()} {}
 

From 0390808482f2f98ba5f031000c2ba170f5b4ebf5 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 13:58:22 +1000
Subject: [PATCH 22/72] Fix arena.hpp debug build

---
 include/rmm/mr/device/detail/arena.hpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp
index f99c6bf97..6795fb3bd 100644
--- a/include/rmm/mr/device/detail/arena.hpp
+++ b/include/rmm/mr/device/detail/arena.hpp
@@ -98,12 +98,12 @@ class block {
   /**
    * @brief Split this block into two by the given size.
    *
-   * @param sz The size in bytes of the first block.
-   * @return std::pair<block, block> A pair of blocks split by sz.
+   * @param size The size in bytes of the first block.
+   * @return std::pair<block, block> A pair of blocks split by size.
    */
   [[nodiscard]] std::pair<block, block> split(std::size_t size) const
   {
-    RMM_LOGGING_ASSERT(size_ >= sz);
+    RMM_LOGGING_ASSERT(size_ >= size);
     if (size_ > size) { return {{pointer_, size}, {pointer_ + size, size_ - size}}; }
     return {*this, {}};
   }
@@ -113,12 +113,12 @@ class block {
    *
    * `this->is_contiguous_before(b)` must be true.
    *
-   * @param b block to merge.
+   * @param blk block to merge.
    * @return block The merged block.
    */
   [[nodiscard]] block merge(block const& blk) const
   {
-    RMM_LOGGING_ASSERT(is_contiguous_before(b));
+    RMM_LOGGING_ASSERT(is_contiguous_before(blk));
     return {pointer_, size_ + blk.size_};
   }
 

From 973bc998acf97cdc7638c4ae39dfe6415f969bf2 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 14:01:34 +1000
Subject: [PATCH 23/72] tidy device_scalar

---
 include/rmm/device_scalar.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp
index f48aba84b..ff8461599 100644
--- a/include/rmm/device_scalar.hpp
+++ b/include/rmm/device_scalar.hpp
@@ -186,9 +186,9 @@ class device_scalar {
    * @param v The host value which will be copied to device
    * @param stream CUDA stream on which to perform the copy
    */
-  void set_value_async(value_type const& v, cuda_stream_view s)
+  void set_value_async(value_type const& value, cuda_stream_view stream)
   {
-    _storage.set_element_async(0, v, s);
+    _storage.set_element_async(0, value, stream);
   }
 
   // Disallow passing literals to set_value to avoid race conditions where the memory holding the
@@ -209,9 +209,9 @@ class device_scalar {
    *
    * @param stream CUDA stream on which to perform the copy
    */
-  void set_value_to_zero_async(cuda_stream_view s)
+  void set_value_to_zero_async(cuda_stream_view stream)
   {
-    _storage.set_element_to_zero_async(value_type{0}, s);
+    _storage.set_element_to_zero_async(value_type{0}, stream);
   }
 
   /**

From c2402b6508b53c57d2f93a62d69feb3e08da2aaa Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 14:06:47 +1000
Subject: [PATCH 24/72] suppress pointer arith warnings

---
 include/rmm/mr/device/detail/arena.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp
index 6795fb3bd..29da10c28 100644
--- a/include/rmm/mr/device/detail/arena.hpp
+++ b/include/rmm/mr/device/detail/arena.hpp
@@ -84,6 +84,7 @@ class block {
    */
   [[nodiscard]] bool is_contiguous_before(block const& blk) const
   {
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
     return pointer_ + size_ == blk.pointer_;
   }
 
@@ -104,6 +105,7 @@ class block {
   [[nodiscard]] std::pair<block, block> split(std::size_t size) const
   {
     RMM_LOGGING_ASSERT(size_ >= size);
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
     if (size_ > size) { return {{pointer_, size}, {pointer_ + size, size_ - size}}; }
     return {*this, {}};
   }

From 27fbde47c39df0f487855594407e5087156df5d0 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 14:12:53 +1000
Subject: [PATCH 25/72] tidy device_buffer

---
 include/rmm/device_buffer.hpp | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp
index f034b28fe..ee8e4e927 100644
--- a/include/rmm/device_buffer.hpp
+++ b/include/rmm/device_buffer.hpp
@@ -88,10 +88,7 @@ class device_buffer {
   // `__host__ __device__` specifiers to the defaulted constructor when it is called within the
   // context of both host and device functions. Specifically, the `cudf::type_dispatcher` is a host-
   // device function. This causes warnings/errors because this ctor invokes host-only functions.
-  device_buffer()
-    : _data{nullptr}, _size{}, _capacity{}, _stream{}, _mr{rmm::mr::get_current_device_resource()}
-  {
-  }
+  device_buffer() : _mr{rmm::mr::get_current_device_resource()} {}
 
   /**
    * @brief Constructs a new device buffer of `size` uninitialized bytes
@@ -310,7 +307,7 @@ class device_buffer {
   /**
    * @brief Returns raw pointer to underlying device memory allocation
    */
-  void const* data() const noexcept { return _data; }
+  [[nodiscard]] void const* data() const noexcept { return _data; }
 
   /**
    * @brief Returns raw pointer to underlying device memory allocation
@@ -321,7 +318,7 @@ class device_buffer {
    * @brief Returns size in bytes that was requested for the device memory
    * allocation
    */
-  std::size_t size() const noexcept { return _size; }
+  [[nodiscard]] std::size_t size() const noexcept { return _size; }
 
   /**
    * @brief Returns whether the size in bytes of the `device_buffer` is zero.
@@ -330,19 +327,19 @@ class device_buffer {
    * if `capacity() > 0`.
    *
    */
-  bool is_empty() const noexcept { return 0 == size(); }
+  [[nodiscard]] bool is_empty() const noexcept { return 0 == size(); }
 
   /**
    * @brief Returns actual size in bytes of device memory allocation.
    *
    * The invariant `size() <= capacity()` holds.
    */
-  std::size_t capacity() const noexcept { return _capacity; }
+  [[nodiscard]] std::size_t capacity() const noexcept { return _capacity; }
 
   /**
    * @brief Returns stream most recently specified for allocation/deallocation
    */
-  cuda_stream_view stream() const noexcept { return _stream; }
+  [[nodiscard]] cuda_stream_view stream() const noexcept { return _stream; }
 
   /**
    * @brief Sets the stream to be used for deallocation
@@ -360,7 +357,7 @@ class device_buffer {
    * @brief Returns pointer to the memory resource used to allocate and
    * deallocate the device memory
    */
-  mr::device_memory_resource* memory_resource() const noexcept { return _mr; }
+  [[nodiscard]] mr::device_memory_resource* memory_resource() const noexcept { return _mr; }
 
  private:
   void* _data{nullptr};        ///< Pointer to device memory allocation

From c47674208170aec50a043634c15a46ee0e2a20dd Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 14:26:06 +1000
Subject: [PATCH 26/72] Suppress owning-memory warnings

---
 include/rmm/cuda_stream.hpp        | 8 ++++----
 include/rmm/thrust_rmm_allocator.h | 7 ++++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/include/rmm/cuda_stream.hpp b/include/rmm/cuda_stream.hpp
index 6a3304e2c..185cd049e 100644
--- a/include/rmm/cuda_stream.hpp
+++ b/include/rmm/cuda_stream.hpp
@@ -57,13 +57,13 @@ class cuda_stream {
    */
   cuda_stream()
     : stream_{[]() {
-                auto* s = new cudaStream_t;
-                RMM_CUDA_TRY(cudaStreamCreate(s));
-                return s;
+                auto* stream = new cudaStream_t;  // NOLINT(cppcoreguidelines-owning-memory)
+                RMM_CUDA_TRY(cudaStreamCreate(stream));
+                return stream;
               }(),
               [](cudaStream_t* stream) {
                 RMM_ASSERT_CUDA_SUCCESS(cudaStreamDestroy(*stream));
-                delete stream;
+                delete stream;  // NOLINT(cppcoreguidelines-owning-memory)
               }}
   {
   }
diff --git a/include/rmm/thrust_rmm_allocator.h b/include/rmm/thrust_rmm_allocator.h
index 889faa3bd..894f402a1 100644
--- a/include/rmm/thrust_rmm_allocator.h
+++ b/include/rmm/thrust_rmm_allocator.h
@@ -38,12 +38,13 @@ using exec_policy_t = std::unique_ptr<par_t, deleter_t>;
  * allocation.
  */
 [[deprecated("Use new exec_policy in rmm/exec_policy.hpp")]] inline exec_policy_t exec_policy(
-  cudaStream_t stream = 0)
+  cudaStream_t stream = nullptr)
 {
+  // NOLINTNEXTLINE(cppcoreguidelines-owning-memory)
   auto* alloc  = new rmm::mr::thrust_allocator<char>(cuda_stream_view{stream});
   auto deleter = [alloc](par_t* pointer) {
-    delete alloc;
-    delete pointer;
+    delete alloc;    // NOLINT(cppcoreguidelines-owning-memory)
+    delete pointer;  // NOLINT(cppcoreguidelines-owning-memory)
   };
 
   exec_policy_t policy{new par_t(*alloc), deleter};

From a1162b13a3b0ee4db96b5641fa62d9afc5ff12fb Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 14:30:23 +1000
Subject: [PATCH 27/72] NOLINT macro parentheses

---
 include/rmm/detail/error.hpp | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/rmm/detail/error.hpp b/include/rmm/detail/error.hpp
index 7c052ea8d..1f550f75e 100644
--- a/include/rmm/detail/error.hpp
+++ b/include/rmm/detail/error.hpp
@@ -100,10 +100,11 @@ class out_of_range : public std::out_of_range {
   GET_RMM_EXPECTS_MACRO(__VA_ARGS__, RMM_EXPECTS_3, RMM_EXPECTS_2) \
   (__VA_ARGS__)
 #define GET_RMM_EXPECTS_MACRO(_1, _2, _3, NAME, ...) NAME
-#define RMM_EXPECTS_3(_condition, _exception_type, _reason)              \
-  (!!(_condition)) ? static_cast<void>(0) : throw _exception_type        \
-  {                                                                      \
-    "RMM failure at: " __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _reason \
+#define RMM_EXPECTS_3(_condition, _exception_type, _reason)                       \
+  (!!(_condition)) ? static_cast<void>(0)                                         \
+                   : throw _exception_type /*NOLINT(bugprone-macro-parentheses)*/ \
+  {                                                                               \
+    "RMM failure at: " __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _reason          \
   }
 #define RMM_EXPECTS_2(_condition, _reason) RMM_EXPECTS_3(_condition, rmm::logic_error, _reason)
 
@@ -123,7 +124,8 @@ class out_of_range : public std::out_of_range {
   GET_RMM_FAIL_MACRO(__VA_ARGS__, RMM_FAIL_2, RMM_FAIL_1) \
   (__VA_ARGS__)
 #define GET_RMM_FAIL_MACRO(_1, _2, NAME, ...) NAME
-#define RMM_FAIL_2(_what, _exception_type) \
+#define RMM_FAIL_2(_what, _exception_type)       \
+  /*NOLINTNEXTLINE(bugprone-macro-parentheses)*/ \
   throw _exception_type{"RMM failure at:" __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _what};
 #define RMM_FAIL_1(_what) RMM_FAIL_2(_what, rmm::logic_error)
 
@@ -157,6 +159,7 @@ class out_of_range : public std::out_of_range {
     cudaError_t const error = (_call);                                                       \
     if (cudaSuccess != error) {                                                              \
       cudaGetLastError();                                                                    \
+      /*NOLINTNEXTLINE(bugprone-macro-parentheses)*/                                         \
       throw _exception_type{std::string{"CUDA error at: "} + __FILE__ + ":" +                \
                             RMM_STRINGIFY(__LINE__) + ": " + cudaGetErrorName(error) + " " + \
                             cudaGetErrorString(error)};                                      \

From 77fdc940f76714e871bd678a6a24fad3096f4b6a Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 14:59:18 +1000
Subject: [PATCH 28/72] tidy free lists

---
 .../mr/device/detail/coalescing_free_list.hpp | 60 +++++++++----------
 .../mr/device/detail/fixed_size_free_list.hpp | 27 ++++-----
 include/rmm/mr/device/detail/free_list.hpp    | 28 ++++-----
 3 files changed, 51 insertions(+), 64 deletions(-)

diff --git a/include/rmm/mr/device/detail/coalescing_free_list.hpp b/include/rmm/mr/device/detail/coalescing_free_list.hpp
index d0c0f399e..a32469c73 100644
--- a/include/rmm/mr/device/detail/coalescing_free_list.hpp
+++ b/include/rmm/mr/device/detail/coalescing_free_list.hpp
@@ -25,9 +25,7 @@
 #include <iostream>
 #include <list>
 
-namespace rmm {
-namespace mr {
-namespace detail {
+namespace rmm::mr::detail {
 
 /**
  * @brief A simple block structure specifying the size and location of a block
@@ -46,14 +44,14 @@ struct block : public block_base {
    *
    * @return the pointer to the memory represented by this block.
    */
-  inline char* pointer() const { return static_cast<char*>(ptr); }
+  [[nodiscard]] inline char* pointer() const { return static_cast<char*>(ptr); }
 
   /**
    * @brief Returns the size of the memory represented by this block.
    *
    * @return the size in bytes of the memory represented by this block.
    */
-  inline std::size_t size() const { return size_bytes; }
+  [[nodiscard]] inline std::size_t size() const { return size_bytes; }
 
   /**
    * @brief Returns whether this block is the start of an allocation from an upstream allocator.
@@ -62,7 +60,7 @@ struct block : public block_base {
    *
    * @return true if this block is the start of an allocation from an upstream allocator.
    */
-  inline bool is_head() const { return head; }
+  [[nodiscard]] inline bool is_head() const { return head; }
 
   /**
    * @brief Comparison operator to enable comparing blocks and storing in ordered containers.
@@ -84,10 +82,10 @@ struct block : public block_base {
    * @param b block to merge
    * @return block The merged block
    */
-  inline block merge(block const& b) const noexcept
+  [[nodiscard]] inline block merge(block const& blk) const noexcept
   {
     assert(is_contiguous_before(b));
-    return block(pointer(), size() + b.size(), is_head());
+    return {pointer(), size() + blk.size(), is_head()};
   }
 
   /**
@@ -97,9 +95,10 @@ struct block : public block_base {
    * @return true Returns true if this blocks's `ptr` + `size` == `b.ptr`, and `not b.is_head`,
                   false otherwise.
    */
-  inline bool is_contiguous_before(block const& b) const noexcept
+  [[nodiscard]] inline bool is_contiguous_before(block const& blk) const noexcept
   {
-    return (pointer() + size() == b.ptr) and not(b.is_head());
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+    return (pointer() + size() == blk.ptr) and not(blk.is_head());
   }
 
   /**
@@ -108,7 +107,7 @@ struct block : public block_base {
    * @param sz The size in bytes to check for fit.
    * @return true if this block is at least `sz` bytes
    */
-  inline bool fits(std::size_t sz) const noexcept { return size() >= sz; }
+  [[nodiscard]] inline bool fits(std::size_t bytes) const noexcept { return size() >= bytes; }
 
   /**
    * @brief Is this block a better fit for `sz` bytes than block `b`?
@@ -118,9 +117,9 @@ struct block : public block_base {
    * @return true If this block is a tighter fit for `sz` bytes than block `b`.
    * @return false If this block does not fit `sz` bytes or `b` is a tighter fit.
    */
-  inline bool is_better_fit(std::size_t sz, block const& b) const noexcept
+  [[nodiscard]] inline bool is_better_fit(std::size_t bytes, block const& blk) const noexcept
   {
-    return fits(sz) && (size() < b.size() || b.size() < sz);
+    return fits(bytes) && (size() < blk.size() || blk.size() < bytes);
   }
 
   /**
@@ -128,7 +127,7 @@ struct block : public block_base {
    */
   inline void print() const
   {
-    std::cout << reinterpret_cast<void*>(pointer()) << " " << size() << " B\n";
+    std::cout << fmt::format("{} {} B", fmt::ptr(pointer()), size()) << std::endl;
   }
 
  private:
@@ -137,9 +136,9 @@ struct block : public block_base {
 };
 
 /// Print block on an ostream
-inline std::ostream& operator<<(std::ostream& out, const block& b)
+inline std::ostream& operator<<(std::ostream& out, const block& blk)
 {
-  out << b.pointer() << " " << b.size() << " B\n";
+  out << fmt::format("{} {} B\n", fmt::ptr(blk.pointer()), blk.size());
   return out;
 }
 
@@ -166,8 +165,8 @@ struct compare_blocks {
  * @tparam list_type the type of the internal list data structure.
  */
 struct coalescing_free_list : free_list<block> {
-  coalescing_free_list()  = default;
-  ~coalescing_free_list() = default;
+  coalescing_free_list()           = default;
+  ~coalescing_free_list() override = default;
 
   coalescing_free_list(coalescing_free_list const&) = delete;
   coalescing_free_list& operator=(coalescing_free_list const&) = delete;
@@ -180,31 +179,32 @@ struct coalescing_free_list : free_list<block> {
    *
    * @param b The block to insert.
    */
-  void insert(block_type const& b)
+  void insert(block_type const& block)
   {
     if (is_empty()) {
-      free_list::insert(cend(), b);
+      free_list::insert(cend(), block);
       return;
     }
 
     // Find the right place (in ascending ptr order) to insert the block
     // Can't use binary_search because it's a linked list and will be quadratic
-    auto const next     = std::find_if(begin(), end(), [b](block_type const& i) { return b < i; });
+    auto const next =
+      std::find_if(begin(), end(), [block](block_type const& blk) { return block < blk; });
     auto const previous = (next == cbegin()) ? next : std::prev(next);
 
     // Coalesce with neighboring blocks or insert the new block if it can't be coalesced
-    bool const merge_prev = previous->is_contiguous_before(b);
-    bool const merge_next = (next != cend()) && b.is_contiguous_before(*next);
+    bool const merge_prev = previous->is_contiguous_before(block);
+    bool const merge_next = (next != cend()) && block.is_contiguous_before(*next);
 
     if (merge_prev && merge_next) {
-      *previous = previous->merge(b).merge(*next);
+      *previous = previous->merge(block).merge(*next);
       erase(next);
     } else if (merge_prev) {
-      *previous = previous->merge(b);
+      *previous = previous->merge(block);
     } else if (merge_next) {
-      *next = b.merge(*next);
+      *next = block.merge(*next);
     } else {
-      free_list::insert(next, b);  // cannot be coalesced, just insert
+      free_list::insert(next, block);  // cannot be coalesced, just insert
     }
   }
 
@@ -220,7 +220,7 @@ struct coalescing_free_list : free_list<block> {
   {
     std::for_each(std::make_move_iterator(other.begin()),
                   std::make_move_iterator(other.end()),
-                  [this](block_type&& b) { this->insert(std::move(b)); });
+                  [this](block_type&& block) { this->insert(block); });
   }
 
   /**
@@ -259,6 +259,4 @@ struct coalescing_free_list : free_list<block> {
   }
 };  // coalescing_free_list
 
-}  // namespace detail
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr::detail
diff --git a/include/rmm/mr/device/detail/fixed_size_free_list.hpp b/include/rmm/mr/device/detail/fixed_size_free_list.hpp
index a7794c9b2..4d2f7253f 100644
--- a/include/rmm/mr/device/detail/fixed_size_free_list.hpp
+++ b/include/rmm/mr/device/detail/fixed_size_free_list.hpp
@@ -21,13 +21,11 @@
 #include <cstddef>
 #include <iostream>
 
-namespace rmm {
-namespace mr {
-namespace detail {
+namespace rmm::mr::detail {
 
 struct fixed_size_free_list : free_list<block_base> {
-  fixed_size_free_list()  = default;
-  ~fixed_size_free_list() = default;
+  fixed_size_free_list()           = default;
+  ~fixed_size_free_list() override = default;
 
   fixed_size_free_list(fixed_size_free_list const&) = delete;
   fixed_size_free_list& operator=(fixed_size_free_list const&) = delete;
@@ -44,7 +42,7 @@ struct fixed_size_free_list : free_list<block_base> {
   template <class InputIt>
   fixed_size_free_list(InputIt first, InputIt last)
   {
-    std::for_each(first, last, [this](block_type const& b) { insert(b); });
+    std::for_each(first, last, [this](block_type const& block) { insert(block); });
   }
 
   /**
@@ -53,7 +51,7 @@ struct fixed_size_free_list : free_list<block_base> {
    *
    * @param b The block to insert.
    */
-  void insert(block_type const& b) { push_back(b); }
+  void insert(block_type const& block) { push_back(block); }
 
   /**
    * @brief Splices blocks from range `[first, last)` onto the free_list.
@@ -71,16 +69,11 @@ struct fixed_size_free_list : free_list<block_base> {
    */
   block_type get_block(std::size_t size)
   {
-    if (is_empty())
-      return block_type{};
-    else {
-      block_type b = *begin();
-      pop_front();
-      return b;
-    }
+    if (is_empty()) { return block_type{}; }
+    block_type block = *begin();
+    pop_front();
+    return block;
   }
 };
 
-}  // namespace detail
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr::detail
diff --git a/include/rmm/mr/device/detail/free_list.hpp b/include/rmm/mr/device/detail/free_list.hpp
index e6f4effc0..6f302e7b9 100644
--- a/include/rmm/mr/device/detail/free_list.hpp
+++ b/include/rmm/mr/device/detail/free_list.hpp
@@ -20,25 +20,23 @@
 #include <iostream>
 #include <list>
 
-namespace rmm {
-namespace mr {
-namespace detail {
+namespace rmm::mr::detail {
 
 struct block_base {
   void* ptr{};  ///< Raw memory pointer
 
   /// Returns the raw pointer for this block
-  inline void* pointer() const { return ptr; }
+  [[nodiscard]] inline void* pointer() const { return ptr; }
   /// Returns true if this block is valid (non-null), false otherwise
-  inline bool is_valid() const { return pointer() != nullptr; }
+  [[nodiscard]] inline bool is_valid() const { return pointer() != nullptr; }
   /// Prints the block to stdout
   inline void print() const { std::cout << pointer(); }
 };
 
 /// Print block_base on an ostream
-inline std::ostream& operator<<(std::ostream& out, const block_base& b)
+inline std::ostream& operator<<(std::ostream& out, const block_base& block)
 {
-  out << b.pointer();
+  out << block.pointer();
   return out;
 }
 
@@ -93,7 +91,7 @@ class free_list {
    * @return true If there are blocks in the free_list.
    * @return false If there are no blocks in the free_list.
    */
-  bool is_empty() const noexcept { return blocks.empty(); }
+  [[nodiscard]] bool is_empty() const noexcept { return blocks.empty(); }
 
   /**
    * @brief Removes the block indicated by `iter` from the free list.
@@ -114,8 +112,8 @@ class free_list {
   void print() const
   {
     std::cout << size() << std::endl;
-    for (auto const& b : blocks) {
-      std::cout << b << std::endl;
+    for (auto const& block : blocks) {
+      std::cout << block << std::endl;
     }
   }
 
@@ -126,7 +124,7 @@ class free_list {
    * @param pos iterator before which the block will be inserted. pos may be the end() iterator.
    * @param b The block to insert.
    */
-  void insert(const_iterator pos, block_type const& b) { blocks.insert(pos, b); }
+  void insert(const_iterator pos, block_type const& block) { blocks.insert(pos, block); }
 
   /**
    * @brief Inserts a list of blocks in the free list before the specified position
@@ -144,14 +142,14 @@ class free_list {
    *
    * @param b The block to append.
    */
-  void push_back(const block_type& b) { blocks.push_back(b); }
+  void push_back(const block_type& block) { blocks.push_back(block); }
 
   /**
    * @brief Appends the given block to the end of the free list. `b` is moved to the new element.
    *
    * @param b The block to append.
    */
-  void push_back(block_type&& b) { blocks.push_back(std::move(b)); }
+  void push_back(block_type&& block) { blocks.push_back(std::move(block)); }
 
   /**
    * @brief Removes the first element of the free list. If there are no elements in the free list,
@@ -165,6 +163,4 @@ class free_list {
   list_type blocks;  // The internal container of blocks
 };
 
-}  // namespace detail
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr::detail

From c54c513a9a16b63dcdc9f19090be6027b6f4bdec Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 7 Sep 2021 15:27:09 +1000
Subject: [PATCH 29/72] tidy stream_ordered_memory_resource and free_lists

---
 .../mr/device/detail/coalescing_free_list.hpp |  29 +++--
 .../mr/device/detail/fixed_size_free_list.hpp |   9 +-
 include/rmm/mr/device/detail/free_list.hpp    |   8 +-
 .../detail/stream_ordered_memory_resource.hpp | 105 ++++++++++--------
 4 files changed, 79 insertions(+), 72 deletions(-)

diff --git a/include/rmm/mr/device/detail/coalescing_free_list.hpp b/include/rmm/mr/device/detail/coalescing_free_list.hpp
index a32469c73..bbdd98ec9 100644
--- a/include/rmm/mr/device/detail/coalescing_free_list.hpp
+++ b/include/rmm/mr/device/detail/coalescing_free_list.hpp
@@ -79,8 +79,8 @@ struct block : public block_base {
    * `this` must immediately precede `b` and both `this` and `b` must be from the same upstream
    * allocation. That is, `this->is_contiguous_before(b)`. Otherwise behavior is undefined.
    *
-   * @param b block to merge
-   * @return block The merged block
+   * @param blk block to merge
+   * @return The merged block
    */
   [[nodiscard]] inline block merge(block const& blk) const noexcept
   {
@@ -91,9 +91,9 @@ struct block : public block_base {
   /**
    * @brief Verifies whether this block can be merged to the beginning of block b.
    *
-   * @param b The block to check for contiguity.
-   * @return true Returns true if this blocks's `ptr` + `size` == `b.ptr`, and `not b.is_head`,
-                  false otherwise.
+   * @param blk The block to check for contiguity.
+   * @return Returns true if this blocks's `ptr` + `size` == `b.ptr`, and `not b.is_head`,
+             false otherwise.
    */
   [[nodiscard]] inline bool is_contiguous_before(block const& blk) const noexcept
   {
@@ -104,18 +104,18 @@ struct block : public block_base {
   /**
    * @brief Is this block large enough to fit `sz` bytes?
    *
-   * @param sz The size in bytes to check for fit.
-   * @return true if this block is at least `sz` bytes
+   * @param bytes The size in bytes to check for fit.
+   * @return true if this block is at least `bytes` bytes
    */
   [[nodiscard]] inline bool fits(std::size_t bytes) const noexcept { return size() >= bytes; }
 
   /**
    * @brief Is this block a better fit for `sz` bytes than block `b`?
    *
-   * @param sz The size in bytes to check for best fit.
-   * @param b The other block to check for fit.
-   * @return true If this block is a tighter fit for `sz` bytes than block `b`.
-   * @return false If this block does not fit `sz` bytes or `b` is a tighter fit.
+   * @param bytes The size in bytes to check for best fit.
+   * @param blk The other block to check for fit.
+   * @return true If this block is a tighter fit for `bytes` bytes than block `blk`.
+   * @return false If this block does not fit `bytes` bytes or `blk` is a tighter fit.
    */
   [[nodiscard]] inline bool is_better_fit(std::size_t bytes, block const& blk) const noexcept
   {
@@ -209,12 +209,11 @@ struct coalescing_free_list : free_list<block> {
   }
 
   /**
-   * @brief Moves blocks from range `[first, last)` into the free_list in their correct order,
+   * @brief Moves blocks from free_list `other` into this free_list in their correct order,
    *        coalescing them with their preceding and following blocks if they are contiguous.
    *
    * @tparam InputIt iterator type
-   * @param first The beginning of the range of blocks to insert
-   * @param last The end of the range of blocks to insert.
+   * @param other free_list of blocks to insert
    */
   void insert(free_list&& other)
   {
@@ -229,7 +228,7 @@ struct coalescing_free_list : free_list<block> {
    * This is a "best fit" search.
    *
    * @param size The size in bytes of the desired block.
-   * @return block A block large enough to store `size` bytes.
+   * @return A block large enough to store `size` bytes.
    */
   block_type get_block(std::size_t size)
   {
diff --git a/include/rmm/mr/device/detail/fixed_size_free_list.hpp b/include/rmm/mr/device/detail/fixed_size_free_list.hpp
index 4d2f7253f..1ca1656b0 100644
--- a/include/rmm/mr/device/detail/fixed_size_free_list.hpp
+++ b/include/rmm/mr/device/detail/fixed_size_free_list.hpp
@@ -49,15 +49,14 @@ struct fixed_size_free_list : free_list<block_base> {
    * @brief Inserts a block into the `free_list` in the correct order, coalescing it with the
    *        preceding and following blocks if either is contiguous.
    *
-   * @param b The block to insert.
+   * @param block The block to insert.
    */
   void insert(block_type const& block) { push_back(block); }
 
   /**
-   * @brief Splices blocks from range `[first, last)` onto the free_list.
+   * @brief Inserts blocks from another free list into this free_list.
    *
-   * @param first The beginning of the range of blocks to insert
-   * @param last The end of the range of blocks to insert.
+   * @param other The free_list to insert into this free_list.
    */
   void insert(free_list&& other) { splice(cend(), std::move(other)); }
 
@@ -65,7 +64,7 @@ struct fixed_size_free_list : free_list<block_base> {
    * @brief Returns the first block in the free list.
    *
    * @param size The size in bytes of the desired block (unused).
-   * @return block A block large enough to store `size` bytes.
+   * @return A block large enough to store `size` bytes.
    */
   block_type get_block(std::size_t size)
   {
diff --git a/include/rmm/mr/device/detail/free_list.hpp b/include/rmm/mr/device/detail/free_list.hpp
index 6f302e7b9..f249c2f25 100644
--- a/include/rmm/mr/device/detail/free_list.hpp
+++ b/include/rmm/mr/device/detail/free_list.hpp
@@ -122,7 +122,7 @@ class free_list {
    * @brief Insert a block in the free list before the specified position
    *
    * @param pos iterator before which the block will be inserted. pos may be the end() iterator.
-   * @param b The block to insert.
+   * @param block The block to insert.
    */
   void insert(const_iterator pos, block_type const& block) { blocks.insert(pos, block); }
 
@@ -130,7 +130,7 @@ class free_list {
    * @brief Inserts a list of blocks in the free list before the specified position
    *
    * @param pos iterator before which the block will be inserted. pos may be the end() iterator.
-   * @param b The block to insert.
+   * @param other The free list to insert.
    */
   void splice(const_iterator pos, free_list&& other)
   {
@@ -140,14 +140,14 @@ class free_list {
   /**
    * @brief Appends the given block to the end of the free list.
    *
-   * @param b The block to append.
+   * @param block The block to append.
    */
   void push_back(const block_type& block) { blocks.push_back(block); }
 
   /**
    * @brief Appends the given block to the end of the free list. `b` is moved to the new element.
    *
-   * @param b The block to append.
+   * @param block The block to append.
    */
   void push_back(block_type&& block) { blocks.push_back(std::move(block)); }
 
diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
index 8824e0ad9..f9106e17d 100644
--- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
+++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
@@ -31,9 +31,7 @@
 #include <thread>
 #include <unordered_map>
 
-namespace rmm {
-namespace mr {
-namespace detail {
+namespace rmm::mr::detail {
 
 /**
  * @brief A CRTP helper function
@@ -76,7 +74,7 @@ struct crtp {
 template <typename PoolResource, typename FreeListType>
 class stream_ordered_memory_resource : public crtp<PoolResource>, public device_memory_resource {
  public:
-  ~stream_ordered_memory_resource() { release(); }
+  ~stream_ordered_memory_resource() override { release(); }
 
   stream_ordered_memory_resource()                                      = default;
   stream_ordered_memory_resource(stream_ordered_memory_resource const&) = delete;
@@ -148,12 +146,12 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
   /**
    * @brief Returns the block `b` (last used on stream `stream_event`) to the pool.
    *
-   * @param b The block to insert into the pool.
+   * @param block The block to insert into the pool.
    * @param stream The stream on which the memory was last used.
    */
-  void insert_block(block_type const& b, cuda_stream_view stream)
+  void insert_block(block_type const& block, cuda_stream_view stream)
   {
-    stream_free_blocks_[get_event(stream)].insert(b);
+    stream_free_blocks_[get_event(stream)].insert(block);
   }
 
   void insert_blocks(free_list&& blocks, cuda_stream_view stream)
@@ -164,9 +162,10 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
   void print_free_blocks() const
   {
     std::cout << "stream free blocks: ";
-    for (auto& s : stream_free_blocks_) {
-      std::cout << "stream: " << s.first.stream << " event: " << s.first.event << " ";
-      s.second.print();
+    for (auto& free_blocks : stream_free_blocks_) {
+      std::cout << "stream: " << free_blocks.first.stream << " event: " << free_blocks.first.event
+                << " ";
+      free_blocks.second.print();
       std::cout << std::endl;
     }
     std::cout << std::endl;
@@ -193,32 +192,34 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
    *
    * @throws `std::bad_alloc` if the requested allocation could not be fulfilled
    *
-   * @param bytes The size in bytes of the allocation
-   * @param stream The stream to associate this allocation with
+   * @param size The size in bytes of the allocation
+   * @param stream The stream in which to order this allocation
    * @return void* Pointer to the newly allocated memory
    */
-  virtual void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
+  void* do_allocate(std::size_t size, cuda_stream_view stream) override
   {
     RMM_LOG_TRACE("[A][stream {:p}][{}B]", fmt::ptr(stream.value()), bytes);
 
-    if (bytes <= 0) return nullptr;
+    if (size <= 0) { return nullptr; }
 
     lock_guard lock(mtx_);
 
     auto stream_event = get_event(stream);
 
-    bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
-    RMM_EXPECTS(bytes <= this->underlying().get_maximum_allocation_size(),
+    size = rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
+    RMM_EXPECTS(size <= this->underlying().get_maximum_allocation_size(),
                 rmm::bad_alloc,
                 "Maximum allocation size exceeded");
-    auto const b = this->underlying().get_block(bytes, stream_event);
+    auto const block = this->underlying().get_block(size, stream_event);
 
-    RMM_LOG_TRACE(
-      "[A][stream {:p}][{}B][{:p}]", fmt::ptr(stream_event.stream), bytes, fmt::ptr(b.pointer()));
+    RMM_LOG_TRACE("[A][stream {:p}][{}B][{:p}]",
+                  fmt::ptr(stream_event.stream),
+                  bytes,
+                  fmt::ptr(block.pointer()));
 
     log_summary_trace();
 
-    return b.pointer();
+    return block.pointer();
   }
 
   /**
@@ -227,25 +228,27 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
    * @throws nothing
    *
    * @param p Pointer to be deallocated
+   * @param size The size in bytes of the allocation to deallocate
+   * @param stream The stream in which to order this deallocation
    */
-  virtual void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override
+  void do_deallocate(void* ptr, std::size_t size, cuda_stream_view stream) override
   {
     RMM_LOG_TRACE("[D][stream {:p}][{}B][{:p}]", fmt::ptr(stream.value()), bytes, p);
 
-    if (bytes <= 0 || p == nullptr) return;
+    if (size <= 0 || ptr == nullptr) { return; }
 
     lock_guard lock(mtx_);
     auto stream_event = get_event(stream);
 
-    bytes        = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
-    auto const b = this->underlying().free_block(p, bytes);
+    size             = rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
+    auto const block = this->underlying().free_block(ptr, size);
 
     // TODO: cudaEventRecord has significant overhead on deallocations. For the non-PTDS case
     // we may be able to delay recording the event in some situations. But using events rather than
     // streams allows stealing from deleted streams.
     RMM_ASSERT_CUDA_SUCCESS(cudaEventRecord(stream_event.event, stream.value()));
 
-    stream_free_blocks_[stream_event].insert(b);
+    stream_free_blocks_[stream_event].insert(block);
 
     log_summary_trace();
   }
@@ -261,6 +264,11 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
     }
     ~event_wrapper() { RMM_ASSERT_CUDA_SUCCESS(cudaEventDestroy(event)); }
     cudaEvent_t event{};
+
+    event_wrapper(event_wrapper const&) = delete;
+    event_wrapper& operator=(event_wrapper const&) = delete;
+    event_wrapper(event_wrapper&&) noexcept        = delete;
+    event_wrapper& operator=(event_wrapper&&) = delete;
   };
 
   /**
@@ -287,13 +295,15 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
     // user explicitly passes it, so it is used as the default location for the free list
     // at construction. For consistency, the same key is used for null stream free lists in non-PTDS
     // mode.
-    auto const stream_to_store = stream.is_default() ? cudaStreamLegacy : stream.value();
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
+    auto* const stream_to_store = stream.is_default() ? cudaStreamLegacy : stream.value();
 
     auto const iter = stream_events_.find(stream_to_store);
     return (iter != stream_events_.end()) ? iter->second : [&]() {
       stream_event_pair stream_event{stream_to_store};
       RMM_ASSERT_CUDA_SUCCESS(
         cudaEventCreateWithFlags(&stream_event.event, cudaEventDisableTiming));
+      // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
       stream_events_[stream_to_store] = stream_event;
       return stream_event;
     }();
@@ -303,14 +313,14 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
    * @brief Splits a block into an allocated block of `size` bytes and a remainder block, and
    * inserts the remainder into a free list.
    *
-   * @param b The block to split into allocated and remainder portions.
+   * @param block The block to split into allocated and remainder portions.
    * @param size The size of the block to allocate from `b`.
    * @param blocks The `free_list` in which to insert the remainder block.
    * @return The allocated block.
    */
-  block_type allocate_and_insert_remainder(block_type b, std::size_t size, free_list& blocks)
+  block_type allocate_and_insert_remainder(block_type block, std::size_t size, free_list& blocks)
   {
-    auto const [allocated, remainder] = this->underlying().allocate_from_block(b, size);
+    auto const [allocated, remainder] = this->underlying().allocate_from_block(block, size);
     if (remainder.is_valid()) { blocks.insert(remainder); }
     return allocated;
   }
@@ -327,8 +337,8 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
     // Try to find a satisfactory block in free list for the same stream (no sync required)
     auto iter = stream_free_blocks_.find(stream_event);
     if (iter != stream_free_blocks_.end()) {
-      block_type const b = iter->second.get_block(size);
-      if (b.is_valid()) { return allocate_and_insert_remainder(b, size, iter->second); }
+      block_type const block = iter->second.get_block(size);
+      if (block.is_valid()) { return allocate_and_insert_remainder(block, size, iter->second); }
     }
 
     free_list& blocks =
@@ -336,23 +346,23 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
 
     // Try to find an existing block in another stream
     {
-      block_type const b = get_block_from_other_stream(size, stream_event, blocks, false);
-      if (b.is_valid()) return b;
+      block_type const block = get_block_from_other_stream(size, stream_event, blocks, false);
+      if (block.is_valid()) { return block; }
     }
 
     // no large enough blocks available on other streams, so sync and merge until we find one
     {
-      block_type const b = get_block_from_other_stream(size, stream_event, blocks, true);
-      if (b.is_valid()) return b;
+      block_type const block = get_block_from_other_stream(size, stream_event, blocks, true);
+      if (block.is_valid()) { return block; }
     }
 
     log_summary_trace();
 
     // no large enough blocks available after merging, so grow the pool
-    block_type const b =
+    block_type const block =
       this->underlying().expand_pool(size, blocks, cuda_stream_view{stream_event.stream});
 
-    return allocate_and_insert_remainder(b, size, blocks);
+    return allocate_and_insert_remainder(block, size, blocks);
   }
 
   /**
@@ -380,7 +390,7 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
       if (other_event != stream_event.event) {
         free_list& other_blocks = it->second;
 
-        block_type const b = [&]() {
+        block_type const block = [&]() {
           if (merge_first) {
             merge_lists(stream_event, blocks, other_event, std::move(other_blocks));
 
@@ -391,27 +401,28 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
 
             stream_free_blocks_.erase(it);
 
-            block_type const b = blocks.get_block(size);  // get the best fit block in merged lists
-            if (b.is_valid()) { return allocate_and_insert_remainder(b, size, blocks); }
+            block_type const block =
+              blocks.get_block(size);  // get the best fit block in merged lists
+            if (block.is_valid()) { return allocate_and_insert_remainder(block, size, blocks); }
           } else {
-            block_type const b = other_blocks.get_block(size);
-            if (b.is_valid()) {
+            block_type const block = other_blocks.get_block(size);
+            if (block.is_valid()) {
               // Since we found a block associated with a different stream, we have to insert a wait
               // on the stream's associated event into the allocating stream.
               RMM_CUDA_TRY(cudaStreamWaitEvent(stream_event.stream, other_event, 0));
-              return allocate_and_insert_remainder(b, size, other_blocks);
+              return allocate_and_insert_remainder(block, size, other_blocks);
             }
           }
           return block_type{};
         }();
 
-        if (b.is_valid()) {
+        if (block.is_valid()) {
           RMM_LOG_DEBUG((merge_first) ? "[A][Stream {:p}][{}B][Found after merging stream {:p}]"
                                       : "[A][Stream {:p}][{}B][Taken from stream {:p}]",
                         fmt::ptr(stream_event.stream),
                         size,
                         fmt::ptr(it->first.stream));
-          return b;
+          return block;
         }
       }
     }
@@ -486,6 +497,4 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
   std::mutex mtx_;  // mutex for thread-safe access
 };                  // namespace detail
 
-}  // namespace detail
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr::detail

From a4e0d9a510d6c639d1731fc432f7e0e3318d4cfd Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 09:52:12 +1000
Subject: [PATCH 30/72] tidy device_memory_resource and
 aligned_resource_adaptor

---
 include/rmm/detail/aligned.hpp                | 32 +++++-----
 .../mr/device/aligned_resource_adaptor.hpp    | 58 +++++++++----------
 .../rmm/mr/device/device_memory_resource.hpp  | 39 ++++++++-----
 3 files changed, 70 insertions(+), 59 deletions(-)

diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp
index c949ad035..6a8d70597 100644
--- a/include/rmm/detail/aligned.hpp
+++ b/include/rmm/detail/aligned.hpp
@@ -24,17 +24,19 @@
 
 namespace rmm::detail {
 
+enum alignment_type : std::size_t {};
+
 /**
  * @brief Default alignment used for host memory allocated by RMM.
  *
  */
-static constexpr std::size_t RMM_DEFAULT_HOST_ALIGNMENT{alignof(std::max_align_t)};
+static constexpr alignment_type RMM_DEFAULT_HOST_ALIGNMENT{alignof(std::max_align_t)};
 
 /**
  * @brief Default alignment used for CUDA memory allocation.
  *
  */
-static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
+static constexpr alignment_type CUDA_ALLOCATION_ALIGNMENT{256};
 
 /**
  * @brief Returns whether or not `n` is a power of 2.
@@ -46,7 +48,7 @@ constexpr bool is_pow2(std::size_t value) { return (0 == (value & (value - 1)));
  * @brief Returns whether or not `alignment` is a valid memory alignment.
  *
  */
-constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(alignment); }
+constexpr bool is_supported_alignment(alignment_type alignment) { return is_pow2(alignment); }
 
 /**
  * @brief Align up to nearest multiple of specified power of 2
@@ -56,10 +58,10 @@ constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(al
  *
  * @return Return the aligned value, as one would expect
  */
-constexpr std::size_t align_up(std::size_t value, std::size_t align_bytes) noexcept
+constexpr std::size_t align_up(std::size_t value, alignment_type alignment) noexcept
 {
-  assert(is_supported_alignment(align_bytes));
-  return (value + (align_bytes - 1)) & ~(align_bytes - 1);
+  assert(is_supported_alignment(alignment));
+  return (value + (alignment - 1)) & ~(alignment - 1);
 }
 
 /**
@@ -70,10 +72,10 @@ constexpr std::size_t align_up(std::size_t value, std::size_t align_bytes) noexc
  *
  * @return Return the aligned value, as one would expect
  */
-constexpr std::size_t align_down(std::size_t value, std::size_t align_bytes) noexcept
+constexpr std::size_t align_down(std::size_t value, alignment_type alignment) noexcept
 {
-  assert(is_supported_alignment(align_bytes));
-  return value & ~(align_bytes - 1);
+  assert(is_supported_alignment(alignment));
+  return value & ~(alignment - 1);
 }
 
 /**
@@ -84,13 +86,13 @@ constexpr std::size_t align_down(std::size_t value, std::size_t align_bytes) noe
  *
  * @return true if aligned
  */
-constexpr bool is_aligned(std::size_t value, std::size_t align_bytes) noexcept
+constexpr bool is_aligned(std::size_t value, alignment_type alignment) noexcept
 {
-  assert(is_supported_alignment(align_bytes));
-  return value == align_down(value, align_bytes);
+  assert(is_supported_alignment(alignment));
+  return value == align_down(value, alignment);
 }
 
-inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT)
+inline bool is_pointer_aligned(void* ptr, alignment_type alignment = CUDA_ALLOCATION_ALIGNMENT)
 {
   // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
   return rmm::detail::is_aligned(reinterpret_cast<ptrdiff_t>(ptr), alignment);
@@ -124,7 +126,7 @@ inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATIO
  * `alignment`.
  */
 template <typename Alloc>
-void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc)
+void* aligned_allocate(std::size_t bytes, alignment_type alignment, Alloc alloc)
 {
   assert(is_pow2(alignment));
 
@@ -168,7 +170,7 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc)
  */
 template <typename Dealloc>
 // NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
-void aligned_deallocate(void* ptr, std::size_t bytes, std::size_t alignment, Dealloc dealloc)
+void aligned_deallocate(void* ptr, std::size_t bytes, alignment_type alignment, Dealloc dealloc)
 {
   (void)alignment;
 
diff --git a/include/rmm/mr/device/aligned_resource_adaptor.hpp b/include/rmm/mr/device/aligned_resource_adaptor.hpp
index 4e29b90b3..3d70596ff 100644
--- a/include/rmm/mr/device/aligned_resource_adaptor.hpp
+++ b/include/rmm/mr/device/aligned_resource_adaptor.hpp
@@ -61,8 +61,8 @@ class aligned_resource_adaptor final : public device_memory_resource {
    */
   explicit aligned_resource_adaptor(
     Upstream* upstream,
-    std::size_t allocation_alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT,
-    std::size_t alignment_threshold  = default_alignment_threshold)
+    rmm::detail::alignment_type allocation_alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT,
+    std::size_t alignment_threshold                  = default_alignment_threshold)
     : upstream_{upstream},
       allocation_alignment_{allocation_alignment},
       alignment_threshold_{alignment_threshold}
@@ -124,18 +124,20 @@ class aligned_resource_adaptor final : public device_memory_resource {
     if (allocation_alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT ||
         bytes < alignment_threshold_) {
       return upstream_->allocate(bytes, stream);
-    } else {
-      auto const size            = upstream_allocation_size(bytes);
-      void* pointer              = upstream_->allocate(size, stream);
-      auto const address         = reinterpret_cast<std::size_t>(pointer);
-      auto const aligned_address = rmm::detail::align_up(address, allocation_alignment_);
-      void* aligned_pointer      = reinterpret_cast<void*>(aligned_address);
-      if (pointer != aligned_pointer) {
-        lock_guard lock(mtx_);
-        pointers_.emplace(aligned_pointer, pointer);
-      }
-      return aligned_pointer;
     }
+    auto const size = upstream_allocation_size(bytes);
+    void* pointer   = upstream_->allocate(size, stream);
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+    auto const address = reinterpret_cast<std::size_t>(pointer);
+    auto const aligned_address =
+      rmm::detail::align_up(address, rmm::detail::alignment_type{allocation_alignment_});
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr)
+    void* aligned_pointer = reinterpret_cast<void*>(aligned_address);
+    if (pointer != aligned_pointer) {
+      lock_guard lock(mtx_);
+      pointers_.emplace(aligned_pointer, pointer);
+    }
+    return aligned_pointer;
   }
 
   /**
@@ -147,21 +149,21 @@ class aligned_resource_adaptor final : public device_memory_resource {
    * @param bytes Size of the allocation
    * @param stream Stream on which to perform the deallocation
    */
-  void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override
+  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
     if (allocation_alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT ||
         bytes < alignment_threshold_) {
-      upstream_->deallocate(p, bytes, stream);
+      upstream_->deallocate(ptr, bytes, stream);
     } else {
       {
         lock_guard lock(mtx_);
-        auto const i = pointers_.find(p);
-        if (i != pointers_.end()) {
-          p = i->second;
-          pointers_.erase(i);
+        auto const iter = pointers_.find(ptr);
+        if (iter != pointers_.end()) {
+          ptr = iter->second;
+          pointers_.erase(iter);
         }
       }
-      upstream_->deallocate(p, upstream_allocation_size(bytes), stream);
+      upstream_->deallocate(ptr, upstream_allocation_size(bytes), stream);
     }
   }
 
@@ -176,14 +178,11 @@ class aligned_resource_adaptor final : public device_memory_resource {
    */
   [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
   {
-    if (this == &other)
-      return true;
-    else {
-      auto cast = dynamic_cast<aligned_resource_adaptor<Upstream> const*>(&other);
-      return cast != nullptr && upstream_->is_equal(*cast->get_upstream()) &&
-             allocation_alignment_ == cast->allocation_alignment_ &&
-             alignment_threshold_ == cast->alignment_threshold_;
-    }
+    if (this == &other) { return true; }
+    auto cast = dynamic_cast<aligned_resource_adaptor<Upstream> const*>(&other);
+    return cast != nullptr && upstream_->is_equal(*cast->get_upstream()) &&
+           allocation_alignment_ == cast->allocation_alignment_ &&
+           alignment_threshold_ == cast->alignment_threshold_;
   }
 
   /**
@@ -211,7 +210,8 @@ class aligned_resource_adaptor final : public device_memory_resource {
    */
   std::size_t upstream_allocation_size(std::size_t bytes) const
   {
-    auto const aligned_size = rmm::detail::align_up(bytes, allocation_alignment_);
+    auto const aligned_size =
+      rmm::detail::align_up(bytes, rmm::detail::alignment_type{allocation_alignment_});
     return aligned_size + allocation_alignment_ - rmm::detail::CUDA_ALLOCATION_ALIGNMENT;
   }
 
diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp
index d9817a933..9200dfd00 100644
--- a/include/rmm/mr/device/device_memory_resource.hpp
+++ b/include/rmm/mr/device/device_memory_resource.hpp
@@ -21,9 +21,8 @@
 #include <cstddef>
 #include <utility>
 
-namespace rmm {
+namespace rmm::mr {
 
-namespace mr {
 /**
  * @brief Base class for all libcudf device memory allocation.
  *
@@ -82,7 +81,11 @@ namespace mr {
  */
 class device_memory_resource {
  public:
-  virtual ~device_memory_resource() = default;
+  virtual ~device_memory_resource()                     = default;
+  device_memory_resource(device_memory_resource const&) = default;
+  device_memory_resource& operator=(device_memory_resource const&) = default;
+  device_memory_resource(device_memory_resource&&)                 = default;
+  device_memory_resource& operator=(device_memory_resource&&) = default;
 
   /**
    * @brief Allocates memory of size at least \p bytes.
@@ -101,7 +104,7 @@ class device_memory_resource {
    */
   void* allocate(std::size_t bytes, cuda_stream_view stream = cuda_stream_view{})
   {
-    return do_allocate(rmm::detail::align_up(bytes, 8), stream);
+    return do_allocate(rmm::detail::align_up(bytes, allocation_size_alignment), stream);
   }
 
   /**
@@ -122,9 +125,9 @@ class device_memory_resource {
    * value of `bytes` that was passed to the `allocate` call that returned `p`.
    * @param stream Stream on which to perform deallocation
    */
-  void deallocate(void* p, std::size_t bytes, cuda_stream_view stream = cuda_stream_view{})
+  void deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream = cuda_stream_view{})
   {
-    do_deallocate(p, rmm::detail::align_up(bytes, 8), stream);
+    do_deallocate(ptr, rmm::detail::align_up(bytes, allocation_size_alignment), stream);
   }
 
   /**
@@ -140,7 +143,10 @@ class device_memory_resource {
    * @param other The other resource to compare to
    * @returns If the two resources are equivalent
    */
-  bool is_equal(device_memory_resource const& other) const noexcept { return do_is_equal(other); }
+  [[nodiscard]] bool is_equal(device_memory_resource const& other) const noexcept
+  {
+    return do_is_equal(other);
+  }
 
   /**
    * @brief Query whether the resource supports use of non-null CUDA streams for
@@ -148,14 +154,14 @@ class device_memory_resource {
    *
    * @returns bool true if the resource supports non-null CUDA streams.
    */
-  virtual bool supports_streams() const noexcept = 0;
+  [[nodiscard]] virtual bool supports_streams() const noexcept = 0;
 
   /**
    * @brief Query whether the resource supports the get_mem_info API.
    *
    * @return bool true if the resource supports get_mem_info, false otherwise.
    */
-  virtual bool supports_get_mem_info() const noexcept = 0;
+  [[nodiscard]] virtual bool supports_get_mem_info() const noexcept = 0;
 
   /**
    * @brief Queries the amount of free and total memory for the resource.
@@ -165,12 +171,15 @@ class device_memory_resource {
    * @returns a pair containing the free memory in bytes in .first and total amount of memory in
    * .second
    */
-  std::pair<std::size_t, std::size_t> get_mem_info(cuda_stream_view stream) const
+  [[nodiscard]] std::pair<std::size_t, std::size_t> get_mem_info(cuda_stream_view stream) const
   {
     return do_get_mem_info(stream);
   }
 
  private:
+  // All allocations are padded to a multiple of allocation_size_alignment bytes.
+  static constexpr auto allocation_size_alignment = rmm::detail::alignment_type{8};
+
   /**
    * @brief Allocates memory of size at least \p bytes.
    *
@@ -196,7 +205,7 @@ class device_memory_resource {
    * value of `bytes` that was passed to the `allocate` call that returned `p`.
    * @param stream Stream on which to perform deallocation
    */
-  virtual void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) = 0;
+  virtual void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) = 0;
 
   /**
    * @brief Compare this resource to another.
@@ -212,7 +221,7 @@ class device_memory_resource {
    * @return true If the two resources are equivalent
    * @return false If the two resources are not equal
    */
-  virtual bool do_is_equal(device_memory_resource const& other) const noexcept
+  [[nodiscard]] virtual bool do_is_equal(device_memory_resource const& other) const noexcept
   {
     return this == &other;
   }
@@ -225,7 +234,7 @@ class device_memory_resource {
    * @param stream the stream being executed on
    * @return std::pair with available and free memory for resource
    */
-  virtual std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view stream) const = 0;
+  [[nodiscard]] virtual std::pair<std::size_t, std::size_t> do_get_mem_info(
+    cuda_stream_view stream) const = 0;
 };
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr

From a6a0cab4470b6f95e62f978fb24faaa14c73958a Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 10:16:40 +1000
Subject: [PATCH 31/72] Fix nodiscard compilation error

---
 tests/mr/device/mr_tests.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/mr/device/mr_tests.cpp b/tests/mr/device/mr_tests.cpp
index 0e2b63dcc..f1248dbc0 100644
--- a/tests/mr/device/mr_tests.cpp
+++ b/tests/mr/device/mr_tests.cpp
@@ -109,14 +109,20 @@ TEST_P(mr_test, MixedRandomAllocationFreeStream)
 TEST_P(mr_test, GetMemInfo)
 {
   if (this->mr->supports_get_mem_info()) {
-    this->mr->get_mem_info(rmm::cuda_stream_view{});
     const auto allocation_size{16 * 256};
+    {
+      auto const [free, total] = this->mr->get_mem_info(rmm::cuda_stream_view{});
+      EXPECT_TRUE(free >= allocation_size);
+    }
+
     void* ptr{nullptr};
     ptr = this->mr->allocate(allocation_size);
+
     {
       auto const [free, total] = this->mr->get_mem_info(rmm::cuda_stream_view{});
       EXPECT_TRUE(free >= allocation_size);
     }
+
     this->mr->deallocate(ptr, allocation_size);
   }
 }

From a12bb90c2c58cc4ce068b1e3f3982f3467227ce1 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 10:17:00 +1000
Subject: [PATCH 32/72] tidying more MRs

---
 include/rmm/detail/aligned.hpp                |  3 +-
 .../mr/device/cuda_async_memory_resource.hpp  | 50 +++++++++----------
 .../rmm/mr/device/cuda_memory_resource.hpp    | 33 ++++++------
 .../rmm/mr/device/device_memory_resource.hpp  |  1 +
 include/rmm/mr/host/new_delete_resource.hpp   |  7 +--
 tests/mr/device/cuda_async_mr_tests.cpp       |  2 +-
 6 files changed, 49 insertions(+), 47 deletions(-)

diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp
index 6a8d70597..954bf082c 100644
--- a/include/rmm/detail/aligned.hpp
+++ b/include/rmm/detail/aligned.hpp
@@ -24,7 +24,8 @@
 
 namespace rmm::detail {
 
-enum alignment_type : std::size_t {};
+// enum alignment_type : std::size_t {};
+using alignment_type = std::size_t;
 
 /**
  * @brief Default alignment used for host memory allocated by RMM.
diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp
index 57ec97ff8..19d52b16b 100644
--- a/include/rmm/mr/device/cuda_async_memory_resource.hpp
+++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp
@@ -32,8 +32,7 @@
 #define RMM_CUDA_MALLOC_ASYNC_SUPPORT
 #endif
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 
 /**
  * @brief `device_memory_resource` derived class that uses `cudaMallocAsync`/`cudaFreeAsync` for
@@ -41,6 +40,7 @@ namespace mr {
  */
 class cuda_async_memory_resource final : public device_memory_resource {
  public:
+  enum release_threshold_size_type : std::size_t {};
   /**
    * @brief Constructs a cuda_async_memory_resource with the optionally specified initial pool size
    * and release threshold.
@@ -55,16 +55,16 @@ class cuda_async_memory_resource final : public device_memory_resource {
    * @param release_threshold Optional release threshold size in bytes of the pool. If no value is
    * provided, the release threshold is set to the total amount of memory on the current device.
    */
-  cuda_async_memory_resource(thrust::optional<std::size_t> initial_pool_size = {},
-                             thrust::optional<std::size_t> release_threshold = {})
+  cuda_async_memory_resource(thrust::optional<std::size_t> initial_pool_size                 = {},
+                             thrust::optional<release_threshold_size_type> release_threshold = {})
   {
 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
     // Check if cudaMallocAsync Memory pool supported
     auto const device = rmm::detail::current_device();
     int cuda_pool_supported{};
-    auto e =
+    auto result =
       cudaDeviceGetAttribute(&cuda_pool_supported, cudaDevAttrMemoryPoolsSupported, device.value());
-    RMM_EXPECTS(e == cudaSuccess && cuda_pool_supported,
+    RMM_EXPECTS(result == cudaSuccess && cuda_pool_supported,
                 "cudaMallocAsync not supported with this CUDA driver/runtime version");
 
     // Construct explicit pool
@@ -78,15 +78,15 @@ class cuda_async_memory_resource final : public device_memory_resource {
     auto const [free, total] = rmm::detail::available_device_memory();
 
     // Need an l-value to take address to pass to cudaMemPoolSetAttribute
-    uint64_t threshold = release_threshold.value_or(total);
+    uint64_t threshold = release_threshold.value_or(release_threshold_size_type{total});
     RMM_CUDA_TRY(
       cudaMemPoolSetAttribute(cuda_pool_handle_, cudaMemPoolAttrReleaseThreshold, &threshold));
 
     // Allocate and immediately deallocate the initial_pool_size to prime the pool with the
     // specified size
-    auto const pool_size = initial_pool_size.value_or(free * 0.5);
-    auto p               = do_allocate(pool_size, cuda_stream_default);
-    do_deallocate(p, pool_size, cuda_stream_default);
+    auto const pool_size = initial_pool_size.value_or(free / 2);
+    auto* ptr            = do_allocate(pool_size, cuda_stream_default);
+    do_deallocate(ptr, pool_size, cuda_stream_default);
 
 #else
     RMM_FAIL(
@@ -99,10 +99,10 @@ class cuda_async_memory_resource final : public device_memory_resource {
    * @brief Returns the underlying native handle to the CUDA pool
    *
    */
-  cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; }
+  [[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; }
 #endif
 
-  ~cuda_async_memory_resource()
+  ~cuda_async_memory_resource() override
   {
 #if defined(RMM_CUDA_MALLOC_ASYNC_SUPPORT)
     RMM_ASSERT_CUDA_SUCCESS(cudaMemPoolDestroy(pool_handle()));
@@ -119,18 +119,18 @@ class cuda_async_memory_resource final : public device_memory_resource {
    *
    * @returns bool true
    */
-  bool supports_streams() const noexcept override { return true; }
+  [[nodiscard]] bool supports_streams() const noexcept override { return true; }
 
   /**
    * @brief Query whether the resource supports the get_mem_info API.
    *
    * @return true
    */
-  bool supports_get_mem_info() const noexcept override { return false; }
+  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; }
 
  private:
 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
-  cudaMemPool_t cuda_pool_handle_;
+  cudaMemPool_t cuda_pool_handle_{};
 #endif
 
   /**
@@ -145,17 +145,17 @@ class cuda_async_memory_resource final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override
   {
-    void* p{nullptr};
+    void* ptr{nullptr};
 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
     if (bytes > 0) {
-      RMM_CUDA_TRY(cudaMallocFromPoolAsync(&p, bytes, pool_handle(), stream.value()),
+      RMM_CUDA_TRY(cudaMallocFromPoolAsync(&ptr, bytes, pool_handle(), stream.value()),
                    rmm::bad_alloc);
     }
 #else
     (void)bytes;
     (void)stream;
 #endif
-    return p;
+    return ptr;
   }
 
   /**
@@ -165,12 +165,12 @@ class cuda_async_memory_resource final : public device_memory_resource {
    *
    * @param p Pointer to be deallocated
    */
-  void do_deallocate(void* p, std::size_t, rmm::cuda_stream_view stream) override
+  void do_deallocate(void* ptr, std::size_t /*bytes*/, rmm::cuda_stream_view stream) override
   {
 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
-    if (p != nullptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeAsync(p, stream.value())); }
+    if (ptr != nullptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeAsync(ptr, stream.value())); }
 #else
-    (void)p;
+    (void)ptr;
     (void)stream;
 #endif
   }
@@ -184,7 +184,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
    * @return true If the two resources are equivalent
    * @return false If the two resources are not equal
    */
-  bool do_is_equal(device_memory_resource const& other) const noexcept override
+  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
   {
     return dynamic_cast<cuda_async_memory_resource const*>(&other) != nullptr;
   }
@@ -196,11 +196,11 @@ class cuda_async_memory_resource final : public device_memory_resource {
    *
    * @return std::pair contaiing free_size and total_size of memory
    */
-  std::pair<std::size_t, std::size_t> do_get_mem_info(rmm::cuda_stream_view) const override
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
+    rmm::cuda_stream_view /*stream*/) const override
   {
     return std::make_pair(0, 0);
   }
 };
 
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr
diff --git a/include/rmm/mr/device/cuda_memory_resource.hpp b/include/rmm/mr/device/cuda_memory_resource.hpp
index d419ce335..59a729297 100644
--- a/include/rmm/mr/device/cuda_memory_resource.hpp
+++ b/include/rmm/mr/device/cuda_memory_resource.hpp
@@ -22,8 +22,7 @@
 
 #include <cstddef>
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 /**
  * @brief `device_memory_resource` derived class that uses cudaMalloc/Free for
  * allocation/deallocation.
@@ -31,7 +30,7 @@ namespace mr {
 class cuda_memory_resource final : public device_memory_resource {
  public:
   cuda_memory_resource()                            = default;
-  ~cuda_memory_resource()                           = default;
+  ~cuda_memory_resource() override                  = default;
   cuda_memory_resource(cuda_memory_resource const&) = default;
   cuda_memory_resource(cuda_memory_resource&&)      = default;
   cuda_memory_resource& operator=(cuda_memory_resource const&) = default;
@@ -43,14 +42,14 @@ class cuda_memory_resource final : public device_memory_resource {
    *
    * @returns bool false
    */
-  bool supports_streams() const noexcept override { return false; }
+  [[nodiscard]] bool supports_streams() const noexcept override { return false; }
 
   /**
    * @brief Query whether the resource supports the get_mem_info API.
    *
    * @return true
    */
-  bool supports_get_mem_info() const noexcept override { return true; }
+  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }
 
  private:
   /**
@@ -65,11 +64,11 @@ class cuda_memory_resource final : public device_memory_resource {
    * @param bytes The size, in bytes, of the allocation
    * @return void* Pointer to the newly allocated memory
    */
-  void* do_allocate(std::size_t bytes, cuda_stream_view) override
+  void* do_allocate(std::size_t bytes, cuda_stream_view /*stream*/) override
   {
-    void* p{nullptr};
-    RMM_CUDA_TRY(cudaMalloc(&p, bytes), rmm::bad_alloc);
-    return p;
+    void* ptr{nullptr};
+    RMM_CUDA_TRY(cudaMalloc(&ptr, bytes), rmm::bad_alloc);
+    return ptr;
   }
 
   /**
@@ -81,9 +80,9 @@ class cuda_memory_resource final : public device_memory_resource {
    *
    * @param p Pointer to be deallocated
    */
-  void do_deallocate(void* p, std::size_t, cuda_stream_view) override
+  void do_deallocate(void* ptr, std::size_t /*bytes*/, cuda_stream_view /*stream*/) override
   {
-    RMM_ASSERT_CUDA_SUCCESS(cudaFree(p));
+    RMM_ASSERT_CUDA_SUCCESS(cudaFree(ptr));
   }
 
   /**
@@ -98,7 +97,7 @@ class cuda_memory_resource final : public device_memory_resource {
    * @return true If the two resources are equivalent
    * @return false If the two resources are not equal
    */
-  bool do_is_equal(device_memory_resource const& other) const noexcept override
+  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
   {
     return dynamic_cast<cuda_memory_resource const*>(&other) != nullptr;
   }
@@ -110,13 +109,13 @@ class cuda_memory_resource final : public device_memory_resource {
    *
    * @return std::pair contaiing free_size and total_size of memory
    */
-  std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view) const override
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
+    cuda_stream_view /*stream*/) const override
   {
-    std::size_t free_size;
-    std::size_t total_size;
+    std::size_t free_size{};
+    std::size_t total_size{};
     RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
     return std::make_pair(free_size, total_size);
   }
 };
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr
diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp
index 9200dfd00..6105db207 100644
--- a/include/rmm/mr/device/device_memory_resource.hpp
+++ b/include/rmm/mr/device/device_memory_resource.hpp
@@ -81,6 +81,7 @@ namespace rmm::mr {
  */
 class device_memory_resource {
  public:
+  device_memory_resource()                              = default;
   virtual ~device_memory_resource()                     = default;
   device_memory_resource(device_memory_resource const&) = default;
   device_memory_resource& operator=(device_memory_resource const&) = default;
diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp
index 0f27cbf3c..6028e0ead 100644
--- a/include/rmm/mr/host/new_delete_resource.hpp
+++ b/include/rmm/mr/host/new_delete_resource.hpp
@@ -56,11 +56,12 @@ class new_delete_resource final : public host_memory_resource {
                     std::size_t alignment = detail::RMM_DEFAULT_HOST_ALIGNMENT) override
   {
     // If the requested alignment isn't supported, use default
-    alignment =
-      (detail::is_supported_alignment(alignment)) ? alignment : detail::RMM_DEFAULT_HOST_ALIGNMENT;
+    auto align = (detail::is_supported_alignment(rmm::detail::alignment_type{alignment}))
+                   ? rmm::detail::alignment_type{alignment}
+                   : detail::RMM_DEFAULT_HOST_ALIGNMENT;
 
     return detail::aligned_allocate(
-      bytes, alignment, [](std::size_t size) { return ::operator new(size); });
+      bytes, align, [](std::size_t size) { return ::operator new(size); });
   }
 
   /**---------------------------------------------------------------------------*
diff --git a/tests/mr/device/cuda_async_mr_tests.cpp b/tests/mr/device/cuda_async_mr_tests.cpp
index 4bf0c3d5b..5a507162c 100644
--- a/tests/mr/device/cuda_async_mr_tests.cpp
+++ b/tests/mr/device/cuda_async_mr_tests.cpp
@@ -47,7 +47,7 @@ TEST(PoolTest, ExplicitInitialPoolSize)
 TEST(PoolTest, ExplicitReleaseThreshold)
 {
   const auto pool_init_size{100};
-  const auto pool_release_threshold{1000};
+  const auto pool_release_threshold = cuda_async_mr::release_threshold_size_type{1000};
   cuda_async_mr mr{pool_init_size, pool_release_threshold};
   void* ptr = mr.allocate(pool_init_size);
   mr.deallocate(ptr, pool_init_size);

From 2b1b49d69a81db46f99691492cb8aeb574767da2 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 10:43:45 +1000
Subject: [PATCH 33/72] Remove `alignment_type` and ignore swappable parameters
 when one is called `alignment`.

---
 .clang-tidy                                   |  2 ++
 include/rmm/detail/aligned.hpp                | 21 +++++------
 .../mr/device/aligned_resource_adaptor.hpp    | 36 ++++++++-----------
 .../rmm/mr/device/device_memory_resource.hpp  |  2 +-
 include/rmm/mr/host/new_delete_resource.hpp   | 19 +++++-----
 5 files changed, 34 insertions(+), 46 deletions(-)

diff --git a/.clang-tidy b/.clang-tidy
index b8fba1f09..b76743aeb 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -56,4 +56,6 @@ CheckOptions:
     value:           'mr|_'
   - key:             readability-function-cognitive-complexity.IgnoreMacros
     value:           '1'
+  - key:             bugprone-easily-swappable-parameters.IgnoredParameterNames
+    value:           'alignment'
 ...
diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp
index 954bf082c..321be53b5 100644
--- a/include/rmm/detail/aligned.hpp
+++ b/include/rmm/detail/aligned.hpp
@@ -24,20 +24,17 @@
 
 namespace rmm::detail {
 
-// enum alignment_type : std::size_t {};
-using alignment_type = std::size_t;
-
 /**
  * @brief Default alignment used for host memory allocated by RMM.
  *
  */
-static constexpr alignment_type RMM_DEFAULT_HOST_ALIGNMENT{alignof(std::max_align_t)};
+static constexpr std::size_t RMM_DEFAULT_HOST_ALIGNMENT{alignof(std::max_align_t)};
 
 /**
  * @brief Default alignment used for CUDA memory allocation.
  *
  */
-static constexpr alignment_type CUDA_ALLOCATION_ALIGNMENT{256};
+static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
 
 /**
  * @brief Returns whether or not `n` is a power of 2.
@@ -49,7 +46,7 @@ constexpr bool is_pow2(std::size_t value) { return (0 == (value & (value - 1)));
  * @brief Returns whether or not `alignment` is a valid memory alignment.
  *
  */
-constexpr bool is_supported_alignment(alignment_type alignment) { return is_pow2(alignment); }
+constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(alignment); }
 
 /**
  * @brief Align up to nearest multiple of specified power of 2
@@ -59,7 +56,7 @@ constexpr bool is_supported_alignment(alignment_type alignment) { return is_pow2
  *
  * @return Return the aligned value, as one would expect
  */
-constexpr std::size_t align_up(std::size_t value, alignment_type alignment) noexcept
+constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept
 {
   assert(is_supported_alignment(alignment));
   return (value + (alignment - 1)) & ~(alignment - 1);
@@ -73,7 +70,7 @@ constexpr std::size_t align_up(std::size_t value, alignment_type alignment) noex
  *
  * @return Return the aligned value, as one would expect
  */
-constexpr std::size_t align_down(std::size_t value, alignment_type alignment) noexcept
+constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept
 {
   assert(is_supported_alignment(alignment));
   return value & ~(alignment - 1);
@@ -87,13 +84,13 @@ constexpr std::size_t align_down(std::size_t value, alignment_type alignment) no
  *
  * @return true if aligned
  */
-constexpr bool is_aligned(std::size_t value, alignment_type alignment) noexcept
+constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept
 {
   assert(is_supported_alignment(alignment));
   return value == align_down(value, alignment);
 }
 
-inline bool is_pointer_aligned(void* ptr, alignment_type alignment = CUDA_ALLOCATION_ALIGNMENT)
+inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT)
 {
   // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
   return rmm::detail::is_aligned(reinterpret_cast<ptrdiff_t>(ptr), alignment);
@@ -127,7 +124,7 @@ inline bool is_pointer_aligned(void* ptr, alignment_type alignment = CUDA_ALLOCA
  * `alignment`.
  */
 template <typename Alloc>
-void* aligned_allocate(std::size_t bytes, alignment_type alignment, Alloc alloc)
+void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc)
 {
   assert(is_pow2(alignment));
 
@@ -171,7 +168,7 @@ void* aligned_allocate(std::size_t bytes, alignment_type alignment, Alloc alloc)
  */
 template <typename Dealloc>
 // NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
-void aligned_deallocate(void* ptr, std::size_t bytes, alignment_type alignment, Dealloc dealloc)
+void aligned_deallocate(void* ptr, std::size_t bytes, std::size_t alignment, Dealloc dealloc)
 {
   (void)alignment;
 
diff --git a/include/rmm/mr/device/aligned_resource_adaptor.hpp b/include/rmm/mr/device/aligned_resource_adaptor.hpp
index 3d70596ff..31b23a442 100644
--- a/include/rmm/mr/device/aligned_resource_adaptor.hpp
+++ b/include/rmm/mr/device/aligned_resource_adaptor.hpp
@@ -55,20 +55,17 @@ class aligned_resource_adaptor final : public device_memory_resource {
    * @throws `rmm::logic_error` if `allocation_alignment` is not a power of 2
    *
    * @param upstream The resource used for allocating/deallocating device memory.
-   * @param allocation_alignment The size used for allocation alignment.
+   * @param alignment The size used for allocation alignment.
    * @param alignment_threshold Only allocations with a size larger than or equal to this threshold
    * are aligned.
    */
-  explicit aligned_resource_adaptor(
-    Upstream* upstream,
-    rmm::detail::alignment_type allocation_alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT,
-    std::size_t alignment_threshold                  = default_alignment_threshold)
-    : upstream_{upstream},
-      allocation_alignment_{allocation_alignment},
-      alignment_threshold_{alignment_threshold}
+  explicit aligned_resource_adaptor(Upstream* upstream,
+                                    std::size_t alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT,
+                                    std::size_t alignment_threshold = default_alignment_threshold)
+    : upstream_{upstream}, alignment_{alignment}, alignment_threshold_{alignment_threshold}
   {
     RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
-    RMM_EXPECTS(rmm::detail::is_supported_alignment(allocation_alignment),
+    RMM_EXPECTS(rmm::detail::is_supported_alignment(alignment),
                 "Allocation alignment is not a power of 2.");
   }
 
@@ -121,16 +118,14 @@ class aligned_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    if (allocation_alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT ||
-        bytes < alignment_threshold_) {
+    if (alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {
       return upstream_->allocate(bytes, stream);
     }
     auto const size = upstream_allocation_size(bytes);
     void* pointer   = upstream_->allocate(size, stream);
     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
-    auto const address = reinterpret_cast<std::size_t>(pointer);
-    auto const aligned_address =
-      rmm::detail::align_up(address, rmm::detail::alignment_type{allocation_alignment_});
+    auto const address         = reinterpret_cast<std::size_t>(pointer);
+    auto const aligned_address = rmm::detail::align_up(address, alignment_);
     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr)
     void* aligned_pointer = reinterpret_cast<void*>(aligned_address);
     if (pointer != aligned_pointer) {
@@ -151,8 +146,7 @@ class aligned_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    if (allocation_alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT ||
-        bytes < alignment_threshold_) {
+    if (alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {
       upstream_->deallocate(ptr, bytes, stream);
     } else {
       {
@@ -181,8 +175,7 @@ class aligned_resource_adaptor final : public device_memory_resource {
     if (this == &other) { return true; }
     auto cast = dynamic_cast<aligned_resource_adaptor<Upstream> const*>(&other);
     return cast != nullptr && upstream_->is_equal(*cast->get_upstream()) &&
-           allocation_alignment_ == cast->allocation_alignment_ &&
-           alignment_threshold_ == cast->alignment_threshold_;
+           alignment_ == cast->alignment_ && alignment_threshold_ == cast->alignment_threshold_;
   }
 
   /**
@@ -210,14 +203,13 @@ class aligned_resource_adaptor final : public device_memory_resource {
    */
   std::size_t upstream_allocation_size(std::size_t bytes) const
   {
-    auto const aligned_size =
-      rmm::detail::align_up(bytes, rmm::detail::alignment_type{allocation_alignment_});
-    return aligned_size + allocation_alignment_ - rmm::detail::CUDA_ALLOCATION_ALIGNMENT;
+    auto const aligned_size = rmm::detail::align_up(bytes, alignment_);
+    return aligned_size + alignment_ - rmm::detail::CUDA_ALLOCATION_ALIGNMENT;
   }
 
   Upstream* upstream_;  ///< The upstream resource used for satisfying allocation requests
   std::unordered_map<void*, void*> pointers_;  ///< Map of aligned pointers to upstream pointers.
-  std::size_t allocation_alignment_;           ///< The size used for allocation alignment
+  std::size_t alignment_;                      ///< The size used for allocation alignment
   std::size_t alignment_threshold_;  ///< The size above which allocations should be aligned
   mutable std::mutex mtx_;           ///< Mutex for exclusive lock.
 };
diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp
index 6105db207..e0e97b86d 100644
--- a/include/rmm/mr/device/device_memory_resource.hpp
+++ b/include/rmm/mr/device/device_memory_resource.hpp
@@ -179,7 +179,7 @@ class device_memory_resource {
 
  private:
   // All allocations are padded to a multiple of allocation_size_alignment bytes.
-  static constexpr auto allocation_size_alignment = rmm::detail::alignment_type{8};
+  static constexpr auto allocation_size_alignment = std::size_t{8};
 
   /**
    * @brief Allocates memory of size at least \p bytes.
diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp
index 6028e0ead..694450798 100644
--- a/include/rmm/mr/host/new_delete_resource.hpp
+++ b/include/rmm/mr/host/new_delete_resource.hpp
@@ -22,8 +22,7 @@
 #include <cstddef>
 #include <utility>
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 
 /**---------------------------------------------------------------------------*
  * @brief A `host_memory_resource` that uses the global `operator new` and
@@ -32,7 +31,7 @@ namespace mr {
 class new_delete_resource final : public host_memory_resource {
  public:
   new_delete_resource()                           = default;
-  ~new_delete_resource()                          = default;
+  ~new_delete_resource() override                 = default;
   new_delete_resource(new_delete_resource const&) = default;
   new_delete_resource(new_delete_resource&&)      = default;
   new_delete_resource& operator=(new_delete_resource const&) = default;
@@ -56,12 +55,11 @@ class new_delete_resource final : public host_memory_resource {
                     std::size_t alignment = detail::RMM_DEFAULT_HOST_ALIGNMENT) override
   {
     // If the requested alignment isn't supported, use default
-    auto align = (detail::is_supported_alignment(rmm::detail::alignment_type{alignment}))
-                   ? rmm::detail::alignment_type{alignment}
-                   : detail::RMM_DEFAULT_HOST_ALIGNMENT;
+    alignment =
+      (detail::is_supported_alignment(alignment)) ? alignment : detail::RMM_DEFAULT_HOST_ALIGNMENT;
 
     return detail::aligned_allocate(
-      bytes, align, [](std::size_t size) { return ::operator new(size); });
+      bytes, alignment, [](std::size_t size) { return ::operator new(size); });
   }
 
   /**---------------------------------------------------------------------------*
@@ -82,12 +80,11 @@ class new_delete_resource final : public host_memory_resource {
    *`p`.
    * @param stream Stream on which to perform deallocation
    *---------------------------------------------------------------------------**/
-  void do_deallocate(void* p,
+  void do_deallocate(void* ptr,
                      std::size_t bytes,
                      std::size_t alignment = detail::RMM_DEFAULT_HOST_ALIGNMENT) override
   {
-    detail::aligned_deallocate(p, bytes, alignment, [](void* p) { ::operator delete(p); });
+    detail::aligned_deallocate(ptr, bytes, alignment, [](void* ptr) { ::operator delete(ptr); });
   }
 };
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr

From b149037eb05f7ccf8f512644417be1d6ed6947a7 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 10:48:49 +1000
Subject: [PATCH 34/72] tidy arena_memory_resource

---
 .../rmm/mr/device/arena_memory_resource.hpp   | 62 +++++++++----------
 1 file changed, 30 insertions(+), 32 deletions(-)

diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp
index 28376142c..eab0a7bc7 100644
--- a/include/rmm/mr/device/arena_memory_resource.hpp
+++ b/include/rmm/mr/device/arena_memory_resource.hpp
@@ -25,8 +25,7 @@
 #include <map>
 #include <shared_mutex>
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 
 /**
  * @brief A suballocator that emphasizes fragmentation avoidance and scalable concurrency support.
@@ -92,9 +91,12 @@ class arena_memory_resource final : public device_memory_resource {
   {
   }
 
+  ~arena_memory_resource() override = default;
   // Disable copy (and move) semantics.
   arena_memory_resource(arena_memory_resource const&) = delete;
   arena_memory_resource& operator=(arena_memory_resource const&) = delete;
+  arena_memory_resource(arena_memory_resource&&)                 = delete;
+  arena_memory_resource& operator=(arena_memory_resource&&) = delete;
 
   /**
    * @brief Queries whether the resource supports use of non-null CUDA streams for
@@ -130,44 +132,44 @@ class arena_memory_resource final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    if (bytes <= 0) return nullptr;
+    if (bytes <= 0) { return nullptr; }
 
     bytes = detail::arena::align_up(bytes);
     return get_arena(stream).allocate(bytes);
   }
 
   /**
-   * @brief Deallocate memory pointed to by `p`.
+   * @brief Deallocate memory pointed to by `ptr`.
    *
-   * @param p Pointer to be deallocated.
+   * @param ptr Pointer to be deallocated.
    * @param bytes The size in bytes of the allocation. This must be equal to the
    * value of `bytes` that was passed to the `allocate` call that returned `p`.
    * @param stream Stream on which to perform deallocation.
    */
-  void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override
+  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    if (p == nullptr || bytes <= 0) return;
+    if (ptr == nullptr || bytes <= 0) { return; }
 
     bytes = detail::arena::align_up(bytes);
 #ifdef RMM_POOL_TRACK_ALLOCATIONS
-    if (!get_arena(stream).deallocate(p, bytes, stream)) {
-      deallocate_from_other_arena(p, bytes, stream);
+    if (!get_arena(stream).deallocate(ptr, bytes, stream)) {
+      deallocate_from_other_arena(ptr, bytes, stream);
     }
 #else
-    get_arena(stream).deallocate(p, bytes, stream);
+    get_arena(stream).deallocate(ptr, bytes, stream);
 #endif
   }
 
 #ifdef RMM_POOL_TRACK_ALLOCATIONS
   /**
-   * @brief Deallocate memory pointed to by `p` that was allocated in a different arena.
+   * @brief Deallocate memory pointed to by `ptr` that was allocated in a different arena.
    *
-   * @param p Pointer to be deallocated.
+   * @param ptr Pointer to be deallocated.
    * @param bytes The size in bytes of the allocation. This must be equal to the
    * value of `bytes` that was passed to the `allocate` call that returned `p`.
    * @param stream Stream on which to perform deallocation.
    */
-  void deallocate_from_other_arena(void* p, std::size_t bytes, cuda_stream_view stream)
+  void deallocate_from_other_arena(void* ptr, std::size_t bytes, cuda_stream_view stream)
   {
     stream.synchronize_no_throw();
 
@@ -178,19 +180,19 @@ class arena_memory_resource final : public device_memory_resource {
       for (auto& kv : thread_arenas_) {
         // If the arena does not belong to the current thread, try to deallocate from it, and return
         // if successful.
-        if (kv.first != id && kv.second->deallocate(p, bytes)) return;
+        if (kv.first != id && kv.second->deallocate(ptr, bytes)) return;
       }
     } else {
       for (auto& kv : stream_arenas_) {
         // If the arena does not belong to the current stream, try to deallocate from it, and return
         // if successful.
-        if (stream != kv.first && kv.second.deallocate(p, bytes)) return;
+        if (stream != kv.first && kv.second.deallocate(ptr, bytes)) return;
       }
     }
 
     // The thread that originally allocated the block has terminated, deallocate directly in the
     // global arena.
-    global_arena_.deallocate({p, bytes});
+    global_arena_.deallocate({ptr, bytes});
   }
 #endif
 
@@ -202,11 +204,8 @@ class arena_memory_resource final : public device_memory_resource {
    */
   arena& get_arena(cuda_stream_view stream)
   {
-    if (use_per_thread_arena(stream)) {
-      return get_thread_arena();
-    } else {
-      return get_stream_arena(stream);
-    }
+    if (use_per_thread_arena(stream)) { return get_thread_arena(); }
+    return get_stream_arena(stream);
   }
 
   /**
@@ -216,18 +215,18 @@ class arena_memory_resource final : public device_memory_resource {
    */
   arena& get_thread_arena()
   {
-    auto const id = std::this_thread::get_id();
+    auto const thread_id = std::this_thread::get_id();
     {
       read_lock lock(mtx_);
-      auto const it = thread_arenas_.find(id);
-      if (it != thread_arenas_.end()) { return *it->second; }
+      auto const iter = thread_arenas_.find(thread_id);
+      if (iter != thread_arenas_.end()) { return *iter->second; }
     }
     {
       write_lock lock(mtx_);
-      auto a = std::make_shared<arena>(global_arena_);
-      thread_arenas_.emplace(id, a);
-      thread_local detail::arena::arena_cleaner<Upstream> cleaner{a};
-      return *a;
+      auto thread_arena = std::make_shared<arena>(global_arena_);
+      thread_arenas_.emplace(thread_id, thread_arena);
+      thread_local detail::arena::arena_cleaner<Upstream> cleaner{thread_arena};
+      return *thread_arena;
     }
   }
 
@@ -241,8 +240,8 @@ class arena_memory_resource final : public device_memory_resource {
     RMM_LOGGING_ASSERT(!use_per_thread_arena(stream));
     {
       read_lock lock(mtx_);
-      auto const it = stream_arenas_.find(stream.value());
-      if (it != stream_arenas_.end()) { return it->second; }
+      auto const iter = stream_arenas_.find(stream.value());
+      if (iter != stream_arenas_.end()) { return iter->second; }
     }
     {
       write_lock lock(mtx_);
@@ -285,5 +284,4 @@ class arena_memory_resource final : public device_memory_resource {
   mutable std::shared_timed_mutex mtx_;
 };
 
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr

From 2d410e33075d6b515999b46026ceb5f9509e408e Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 10:56:26 +1000
Subject: [PATCH 35/72] tidy binning_memory_resource

---
 .../rmm/mr/device/binning_memory_resource.hpp | 40 +++++++++----------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/include/rmm/mr/device/binning_memory_resource.hpp b/include/rmm/mr/device/binning_memory_resource.hpp
index 7b0d9f48f..46a7e204d 100644
--- a/include/rmm/mr/device/binning_memory_resource.hpp
+++ b/include/rmm/mr/device/binning_memory_resource.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -27,8 +27,7 @@
 #include <memory>
 #include <vector>
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 
 /**
  * @brief Allocates memory from upstream resources associated with bin sizes.
@@ -77,15 +76,16 @@ class binning_memory_resource final : public device_memory_resource {
         return upstream_resource;
       }()}
   {
-    for (auto i = min_size_exponent; i <= max_size_exponent; i++)
+    for (auto i = min_size_exponent; i <= max_size_exponent; i++) {
       add_bin(1 << i);
+    }
   }
 
   /**
    * @brief Destroy the binning_memory_resource and free all memory allocated from the upstream
    * resource.
    */
-  ~binning_memory_resource() = default;
+  ~binning_memory_resource() override = default;
 
   binning_memory_resource()                               = delete;
   binning_memory_resource(binning_memory_resource const&) = delete;
@@ -99,14 +99,14 @@ class binning_memory_resource final : public device_memory_resource {
    *
    * @returns true
    */
-  bool supports_streams() const noexcept override { return true; }
+  [[nodiscard]] bool supports_streams() const noexcept override { return true; }
 
   /**
    * @brief Query whether the resource supports the get_mem_info API.
    *
    * @return bool true if the resource supports get_mem_info, false otherwise.
    */
-  bool supports_get_mem_info() const noexcept override { return false; }
+  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; }
 
   /**
    * @brief Get the upstream memory_resource object.
@@ -136,15 +136,13 @@ class binning_memory_resource final : public device_memory_resource {
     allocation_size =
       rmm::detail::align_up(allocation_size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
 
-    if (nullptr != bin_resource)
+    if (nullptr != bin_resource) {
       resource_bins_.insert({allocation_size, bin_resource});
-    else {
-      // If the bin already exists, do nothing.
-      if (resource_bins_.count(allocation_size) == 0) {
-        owned_bin_resources_.push_back(
-          std::make_unique<fixed_size_memory_resource<Upstream>>(upstream_mr_, allocation_size));
-        resource_bins_.insert({allocation_size, owned_bin_resources_.back().get()});
-      }
+    } else if (resource_bins_.count(allocation_size) == 0) {  // do nothing if bin already exists
+
+      owned_bin_resources_.push_back(
+        std::make_unique<fixed_size_memory_resource<Upstream>>(upstream_mr_, allocation_size));
+      resource_bins_.insert({allocation_size, owned_bin_resources_.back().get()});
     }
   }
 
@@ -175,7 +173,7 @@ class binning_memory_resource final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    if (bytes <= 0) return nullptr;
+    if (bytes <= 0) { return nullptr; }
     return get_resource(bytes)->allocate(bytes, stream);
   }
 
@@ -189,10 +187,10 @@ class binning_memory_resource final : public device_memory_resource {
    * value of `bytes` that was passed to the `allocate` call that returned `p`.
    * @param stream Stream on which to perform deallocation
    */
-  void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override
+  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
     auto res = get_resource(bytes);
-    if (res != nullptr) res->deallocate(p, bytes, stream);
+    if (res != nullptr) { res->deallocate(ptr, bytes, stream); }
   }
 
   /**
@@ -203,7 +201,8 @@ class binning_memory_resource final : public device_memory_resource {
    * @param stream the stream being executed on
    * @return std::pair with available and free memory for resource
    */
-  std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view stream) const override
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
+    cuda_stream_view stream) const override
   {
     return std::make_pair(0, 0);
   }
@@ -215,5 +214,4 @@ class binning_memory_resource final : public device_memory_resource {
   std::map<std::size_t, device_memory_resource*> resource_bins_;
 };
 
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr

From 1811fe3b82f7bbcd65319ae82fa449e4b46f1ba9 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 11:04:14 +1000
Subject: [PATCH 36/72] tidy fixed_size_mr

---
 .../mr/device/fixed_size_memory_resource.hpp  | 61 ++++++++++---------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/include/rmm/mr/device/fixed_size_memory_resource.hpp b/include/rmm/mr/device/fixed_size_memory_resource.hpp
index fb87691e8..6ff02bcc3 100644
--- a/include/rmm/mr/device/fixed_size_memory_resource.hpp
+++ b/include/rmm/mr/device/fixed_size_memory_resource.hpp
@@ -33,9 +33,7 @@
 #include <utility>
 #include <vector>
 
-namespace rmm {
-
-namespace mr {
+namespace rmm::mr {
 
 /**
  * @brief A `device_memory_resource` which allocates memory blocks of a single fixed size.
@@ -97,14 +95,14 @@ class fixed_size_memory_resource
    *
    * @returns true
    */
-  bool supports_streams() const noexcept override { return true; }
+  [[nodiscard]] bool supports_streams() const noexcept override { return true; }
 
   /**
    * @brief Query whether the resource supports the get_mem_info API.
    *
    * @return bool true if the resource supports get_mem_info, false otherwise.
    */
-  bool supports_get_mem_info() const noexcept override { return false; }
+  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; }
 
   /**
    * @brief Get the upstream memory_resource object.
@@ -118,7 +116,7 @@ class fixed_size_memory_resource
    *
    * @return std::size_t size in bytes of allocated blocks.
    */
-  std::size_t get_block_size() const noexcept { return block_size_; }
+  [[nodiscard]] std::size_t get_block_size() const noexcept { return block_size_; }
 
  protected:
   using free_list  = detail::fixed_size_free_list;
@@ -133,7 +131,7 @@ class fixed_size_memory_resource
    * @return std::size_t The (fixed) maximum size of a single allocation supported by this memory
    * resource
    */
-  std::size_t get_maximum_allocation_size() const { return get_block_size(); }
+  [[nodiscard]] std::size_t get_maximum_allocation_size() const { return get_block_size(); }
 
   /**
    * @brief Allocate a block from upstream to supply the suballocation pool.
@@ -160,49 +158,53 @@ class fixed_size_memory_resource
    */
   free_list blocks_from_upstream(cuda_stream_view stream)
   {
-    void* p = upstream_mr_->allocate(upstream_chunk_size_, stream);
-    block_type b{p};
-    upstream_blocks_.push_back(b);
+    void* ptr = upstream_mr_->allocate(upstream_chunk_size_, stream);
+    block_type block{ptr};
+    upstream_blocks_.push_back(block);
 
     auto num_blocks = upstream_chunk_size_ / block_size_;
 
-    auto g     = [p, this](int i) { return block_type{static_cast<char*>(p) + i * block_size_}; };
-    auto first = thrust::make_transform_iterator(thrust::make_counting_iterator(std::size_t{0}), g);
+    auto block_gen = [ptr, this](int index) {
+      // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+      return block_type{static_cast<char*>(ptr) + index * block_size_};
+    };
+    auto first =
+      thrust::make_transform_iterator(thrust::make_counting_iterator(std::size_t{0}), block_gen);
     return free_list(first, first + num_blocks);
   }
 
   /**
-   * @brief Splits block `b` if necessary to return a pointer to memory of `size` bytes.
+   * @brief Splits block if necessary to return a pointer to memory of `size` bytes.
    *
    * If the block is split, the remainder is returned to the pool.
    *
-   * @param b The block to allocate from.
+   * @param block The block to allocate from.
    * @param size The size in bytes of the requested allocation.
    * @param stream_event The stream and associated event on which the allocation will be used.
    * @return A pair comprising the allocated pointer and any unallocated remainder of the input
    * block.
    */
-  split_block allocate_from_block(block_type const& b, std::size_t size)
+  split_block allocate_from_block(block_type const& block, std::size_t size)
   {
-    return {b, block_type{nullptr}};
+    return {block, block_type{nullptr}};
   }
 
   /**
-   * @brief Finds, frees and returns the block associated with pointer `p`.
+   * @brief Finds, frees and returns the block associated with pointer.
    *
-   * @param p The pointer to the memory to free.
+   * @param ptr The pointer to the memory to free.
    * @param size The size of the memory to free. Must be equal to the original allocation size.
    * @param stream The stream-event pair for the stream on which the memory was last used.
    * @return The (now freed) block associated with `p`. The caller is expected to return the block
    * to the pool.
    */
-  block_type free_block(void* p, std::size_t size) noexcept
+  block_type free_block(void* ptr, std::size_t size) noexcept
   {
     // Deallocating a fixed-size block just inserts it in the free list, which is
     // handled by the parent class
     RMM_LOGGING_ASSERT(rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT) <=
                        block_size_);
-    return block_type{p};
+    return block_type{ptr};
   }
 
   /**
@@ -213,7 +215,8 @@ class fixed_size_memory_resource
    * @param stream the stream being executed on
    * @return std::pair with available and free memory for resource
    */
-  std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view stream) const override
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
+    cuda_stream_view stream) const override
   {
     return std::make_pair(0, 0);
   }
@@ -226,8 +229,9 @@ class fixed_size_memory_resource
   {
     lock_guard lock(this->get_mutex());
 
-    for (auto b : upstream_blocks_)
-      upstream_mr_->deallocate(b.pointer(), upstream_chunk_size_);
+    for (auto block : upstream_blocks_) {
+      upstream_mr_->deallocate(block.pointer(), upstream_chunk_size_);
+    }
     upstream_blocks_.clear();
   }
 
@@ -235,15 +239,14 @@ class fixed_size_memory_resource
   {
     lock_guard lock(this->get_mutex());
 
-    std::size_t free, total;
-    std::tie(free, total) = upstream_mr_->get_mem_info(0);
+    auto const [free, total] = upstream_mr_->get_mem_info(0);
     std::cout << "GPU free memory: " << free << " total: " << total << "\n";
 
     std::cout << "upstream_blocks: " << upstream_blocks_.size() << "\n";
     std::size_t upstream_total{0};
 
-    for (auto h : upstream_blocks_) {
-      h.print();
+    for (auto blocks : upstream_blocks_) {
+      blocks.print();
       upstream_total += upstream_chunk_size_;
     }
     std::cout << "total upstream: " << upstream_total << " B\n";
@@ -265,6 +268,7 @@ class fixed_size_memory_resource
                              : std::make_pair(block_size_, blocks.size() * block_size_);
   }
 
+ private:
   Upstream* upstream_mr_;  // The resource from which to allocate new blocks
 
   std::size_t const block_size_;           // size of blocks this MR allocates
@@ -274,5 +278,4 @@ class fixed_size_memory_resource
   std::vector<block_type> upstream_blocks_;
 };
 
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr

From d23681035934ef8d3815a840546dde00bd0b66e7 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 11:15:51 +1000
Subject: [PATCH 37/72] tidy limiting_resource_adaptor

---
 .../mr/device/limiting_resource_adaptor.hpp   | 79 +++++++++----------
 1 file changed, 36 insertions(+), 43 deletions(-)

diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp
index 5002962d5..810228715 100644
--- a/include/rmm/mr/device/limiting_resource_adaptor.hpp
+++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp
@@ -21,8 +21,7 @@
 
 #include <cstddef>
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 /**
  * @brief Resource that uses `Upstream` to allocate memory and limits the total
  * allocations possible.
@@ -47,24 +46,23 @@ class limiting_resource_adaptor final : public device_memory_resource {
    * @param upstream The resource used for allocating/deallocating device memory
    * @param allocation_limit Maximum memory allowed for this allocator.
    */
-  limiting_resource_adaptor(
-    Upstream* upstream,
-    std::size_t allocation_limit,
-    std::size_t allocation_alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT)
+  limiting_resource_adaptor(Upstream* upstream,
+                            std::size_t allocation_limit,
+                            std::size_t alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT)
     : allocation_limit_{allocation_limit},
       allocated_bytes_(0),
-      allocation_alignment_(allocation_alignment),
+      alignment_(alignment),
       upstream_{upstream}
   {
     RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
   }
 
   limiting_resource_adaptor()                                 = delete;
-  ~limiting_resource_adaptor()                                = default;
+  ~limiting_resource_adaptor() override                       = default;
   limiting_resource_adaptor(limiting_resource_adaptor const&) = delete;
-  limiting_resource_adaptor(limiting_resource_adaptor&&)      = default;
   limiting_resource_adaptor& operator=(limiting_resource_adaptor const&) = delete;
-  limiting_resource_adaptor& operator=(limiting_resource_adaptor&&) = default;
+  limiting_resource_adaptor(limiting_resource_adaptor&&) noexcept        = default;
+  limiting_resource_adaptor& operator=(limiting_resource_adaptor&&) noexcept = default;
 
   /**
    * @brief Return pointer to the upstream resource.
@@ -79,14 +77,17 @@ class limiting_resource_adaptor final : public device_memory_resource {
    * @return true The upstream resource supports streams
    * @return false The upstream resource does not support streams.
    */
-  bool supports_streams() const noexcept override { return upstream_->supports_streams(); }
+  [[nodiscard]] bool supports_streams() const noexcept override
+  {
+    return upstream_->supports_streams();
+  }
 
   /**
    * @brief Query whether the resource supports the get_mem_info API.
    *
    * @return bool true if the upstream resource supports get_mem_info, false otherwise.
    */
-  bool supports_get_mem_info() const noexcept override
+  [[nodiscard]] bool supports_get_mem_info() const noexcept override
   {
     return upstream_->supports_get_mem_info();
   }
@@ -100,7 +101,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
    * @return std::size_t number of bytes that have been allocated through this
    * allocator.
    */
-  std::size_t get_allocated_bytes() const { return allocated_bytes_; }
+  [[nodiscard]] std::size_t get_allocated_bytes() const { return allocated_bytes_; }
 
   /**
    * @brief Query the maximum number of bytes that this allocator is allowed
@@ -109,7 +110,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
    *
    * @return std::size_t max number of bytes allowed for this allocator
    */
-  std::size_t get_allocation_limit() const { return allocation_limit_; }
+  [[nodiscard]] std::size_t get_allocation_limit() const { return allocation_limit_; }
 
  private:
   /**
@@ -127,32 +128,30 @@ class limiting_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    void* p = nullptr;
+    std::size_t proposed_size = rmm::detail::align_up(bytes, alignment_);
+    RMM_EXPECTS(proposed_size + allocated_bytes_ <= allocation_limit_,
+                rmm::bad_alloc,
+                "Exceeded memory limit");
 
-    std::size_t proposed_size = rmm::detail::align_up(bytes, allocation_alignment_);
-    if (proposed_size + allocated_bytes_ <= allocation_limit_) {
-      p = upstream_->allocate(bytes, stream);
-      allocated_bytes_ += proposed_size;
-    } else {
-      throw rmm::bad_alloc{"Exceeded memory limit"};
-    }
+    auto* const ptr = upstream_->allocate(bytes, stream);
+    allocated_bytes_ += proposed_size;
 
-    return p;
+    return ptr;
   }
 
   /**
-   * @brief Free allocation of size `bytes` pointed to by `p`
+   * @brief Free allocation of size `bytes` pointed to by `ptr`
    *
    * @throws Nothing.
    *
-   * @param p Pointer to be deallocated
+   * @param ptr Pointer to be deallocated
    * @param bytes Size of the allocation
    * @param stream Stream on which to perform the deallocation
    */
-  void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override
+  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    std::size_t allocated_size = rmm::detail::align_up(bytes, allocation_alignment_);
-    upstream_->deallocate(p, bytes, stream);
+    std::size_t allocated_size = rmm::detail::align_up(bytes, alignment_);
+    upstream_->deallocate(ptr, bytes, stream);
     allocated_bytes_ -= allocated_size;
   }
 
@@ -165,18 +164,12 @@ class limiting_resource_adaptor final : public device_memory_resource {
    * @return true If the two resources are equivalent
    * @return false If the two resources are not equal
    */
-  bool do_is_equal(device_memory_resource const& other) const noexcept override
+  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
   {
-    if (this == &other)
-      return true;
-    else {
-      limiting_resource_adaptor<Upstream> const* cast =
-        dynamic_cast<limiting_resource_adaptor<Upstream> const*>(&other);
-      if (cast != nullptr)
-        return upstream_->is_equal(*cast->get_upstream());
-      else
-        return upstream_->is_equal(other);
-    }
+    if (this == &other) { return true; }
+    auto const* cast = dynamic_cast<limiting_resource_adaptor<Upstream> const*>(&other);
+    if (cast != nullptr) { return upstream_->is_equal(*cast->get_upstream()); }
+    return upstream_->is_equal(other);
   }
 
   /**
@@ -187,7 +180,8 @@ class limiting_resource_adaptor final : public device_memory_resource {
    * @param stream Stream on which to get the mem info.
    * @return std::pair contaiing free_size and total_size of memory
    */
-  std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view stream) const override
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
+    cuda_stream_view stream) const override
   {
     return {allocation_limit_ - allocated_bytes_, allocation_limit_};
   }
@@ -199,7 +193,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
   std::atomic<std::size_t> allocated_bytes_;
 
   // todo: should be some way to ask the upstream...
-  std::size_t allocation_alignment_;
+  std::size_t alignment_;
 
   Upstream* upstream_;  ///< The upstream resource used for satisfying
                         ///< allocation requests
@@ -220,5 +214,4 @@ limiting_resource_adaptor<Upstream> make_limiting_adaptor(Upstream* upstream,
   return limiting_resource_adaptor<Upstream>{upstream, allocation_limit};
 }
 
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr

From 5c21ec4a11a441f6041b7bc644d1e51a5927eb79 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 11:20:35 +1000
Subject: [PATCH 38/72] tidy logging_resource_adaptr

---
 .../mr/device/logging_resource_adaptor.hpp    | 61 +++++++++----------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp
index 1148afa88..0bb707a6c 100644
--- a/include/rmm/mr/device/logging_resource_adaptor.hpp
+++ b/include/rmm/mr/device/logging_resource_adaptor.hpp
@@ -28,8 +28,7 @@
 #include <memory>
 #include <sstream>
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 /**
  * @brief Resource that uses `Upstream` to allocate memory and logs information
  * about the requested allocation/deallocations.
@@ -113,11 +112,11 @@ class logging_resource_adaptor final : public device_memory_resource {
   }
 
   logging_resource_adaptor()                                = delete;
-  ~logging_resource_adaptor()                               = default;
+  ~logging_resource_adaptor() override                      = default;
   logging_resource_adaptor(logging_resource_adaptor const&) = delete;
-  logging_resource_adaptor(logging_resource_adaptor&&)      = default;
   logging_resource_adaptor& operator=(logging_resource_adaptor const&) = delete;
-  logging_resource_adaptor& operator=(logging_resource_adaptor&&) = default;
+  logging_resource_adaptor(logging_resource_adaptor&&) noexcept        = default;
+  logging_resource_adaptor& operator=(logging_resource_adaptor&&) noexcept = default;
 
   /**
    * @brief Return pointer to the upstream resource.
@@ -132,14 +131,17 @@ class logging_resource_adaptor final : public device_memory_resource {
    * @return true The upstream resource supports streams
    * @return false The upstream resource does not support streams.
    */
-  bool supports_streams() const noexcept override { return upstream_->supports_streams(); }
+  [[nodiscard]] bool supports_streams() const noexcept override
+  {
+    return upstream_->supports_streams();
+  }
 
   /**
    * @brief Query whether the resource supports the get_mem_info API.
    *
    * @return bool true if the upstream resource supports get_mem_info, false otherwise.
    */
-  bool supports_get_mem_info() const noexcept override
+  [[nodiscard]] bool supports_get_mem_info() const noexcept override
   {
     return upstream_->supports_get_mem_info();
   }
@@ -154,7 +156,10 @@ class logging_resource_adaptor final : public device_memory_resource {
    *
    * @return CSV formatted header string of column names
    */
-  std::string header() const { return std::string{"Thread,Time,Action,Pointer,Size,Stream"}; }
+  [[nodiscard]] std::string header() const
+  {
+    return std::string{"Thread,Time,Action,Pointer,Size,Stream"};
+  }
 
  private:
   // make_logging_adaptor needs access to private get_default_filename
@@ -172,7 +177,7 @@ class logging_resource_adaptor final : public device_memory_resource {
    */
   static std::string get_default_filename()
   {
-    auto filename = std::getenv("RMM_LOG_FILE");
+    auto* filename = std::getenv("RMM_LOG_FILE");
     RMM_EXPECTS(filename != nullptr,
                 "RMM logging requested without an explicit file name, but RMM_LOG_FILE is unset");
     return std::string{filename};
@@ -210,13 +215,13 @@ class logging_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    auto const p = upstream_->allocate(bytes, stream);
-    logger_->info("allocate,{},{},{}", p, bytes, fmt::ptr(stream.value()));
-    return p;
+    auto const ptr = upstream_->allocate(bytes, stream);
+    logger_->info("allocate,{},{},{}", ptr, bytes, fmt::ptr(stream.value()));
+    return ptr;
   }
 
   /**
-   * @brief Free allocation of size `bytes` pointed to by `p` and log the
+   * @brief Free allocation of size `bytes` pointed to by `ptr` and log the
    * deallocation.
    *
    * Every invocation of `logging_resource_adaptor::do_deallocate` will write
@@ -227,14 +232,14 @@ class logging_resource_adaptor final : public device_memory_resource {
    *
    * @throws Nothing.
    *
-   * @param p Pointer to be deallocated
+   * @param ptr Pointer to be deallocated
    * @param bytes Size of the allocation
    * @param stream Stream on which to perform the deallocation
    */
-  void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override
+  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    logger_->info("free,{},{},{}", p, bytes, fmt::ptr(stream.value()));
-    upstream_->deallocate(p, bytes, stream);
+    logger_->info("free,{},{},{}", ptr, bytes, fmt::ptr(stream.value()));
+    upstream_->deallocate(ptr, bytes, stream);
   }
 
   /**
@@ -246,18 +251,12 @@ class logging_resource_adaptor final : public device_memory_resource {
    * @return true If the two resources are equivalent
    * @return false If the two resources are not equal
    */
-  bool do_is_equal(device_memory_resource const& other) const noexcept override
+  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
   {
-    if (this == &other)
-      return true;
-    else {
-      logging_resource_adaptor<Upstream> const* cast =
-        dynamic_cast<logging_resource_adaptor<Upstream> const*>(&other);
-      if (cast != nullptr)
-        return upstream_->is_equal(*cast->get_upstream());
-      else
-        return upstream_->is_equal(other);
-    }
+    if (this == &other) { return true; }
+    auto const* cast = dynamic_cast<logging_resource_adaptor<Upstream> const*>(&other);
+    if (cast != nullptr) { return upstream_->is_equal(*cast->get_upstream()); }
+    return upstream_->is_equal(other);
   }
 
   /**
@@ -268,7 +267,8 @@ class logging_resource_adaptor final : public device_memory_resource {
    * @param stream Stream on which to get the mem info.
    * @return std::pair contaiing free_size and total_size of memory
    */
-  std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view stream) const override
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
+    cuda_stream_view stream) const override
   {
     return upstream_->get_mem_info(stream);
   }
@@ -313,5 +313,4 @@ logging_resource_adaptor<Upstream> make_logging_adaptor(Upstream* upstream,
   return logging_resource_adaptor<Upstream>{upstream, stream, auto_flush};
 }
 
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr

From afc9e7642252f7d162eee6f9ac0e4e612906cd67 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 11:22:55 +1000
Subject: [PATCH 39/72] tidy managed_mr

---
 .../rmm/mr/device/managed_memory_resource.hpp | 35 +++++++++----------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/include/rmm/mr/device/managed_memory_resource.hpp b/include/rmm/mr/device/managed_memory_resource.hpp
index ebce40bf5..3ed44a528 100644
--- a/include/rmm/mr/device/managed_memory_resource.hpp
+++ b/include/rmm/mr/device/managed_memory_resource.hpp
@@ -22,8 +22,7 @@
 
 #include <cstddef>
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 /**
  * @brief `device_memory_resource` derived class that uses
  * cudaMallocManaged/Free for allocation/deallocation.
@@ -31,7 +30,7 @@ namespace mr {
 class managed_memory_resource final : public device_memory_resource {
  public:
   managed_memory_resource()                               = default;
-  ~managed_memory_resource()                              = default;
+  ~managed_memory_resource() override                     = default;
   managed_memory_resource(managed_memory_resource const&) = default;
   managed_memory_resource(managed_memory_resource&&)      = default;
   managed_memory_resource& operator=(managed_memory_resource const&) = default;
@@ -43,18 +42,18 @@ class managed_memory_resource final : public device_memory_resource {
    *
    * @returns false
    */
-  bool supports_streams() const noexcept override { return false; }
+  [[nodiscard]] bool supports_streams() const noexcept override { return false; }
 
   /**
    * @brief Query whether the resource supports the get_mem_info API.
    *
    * @return bool true if the resource supports get_mem_info, false otherwise.
    */
-  bool supports_get_mem_info() const noexcept override { return true; }
+  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }
 
  private:
   /**
-   * @brief Allocates memory of size at least \p bytes using cudaMallocManaged.
+   * @brief Allocates memory of size at least `bytes` using cudaMallocManaged.
    *
    * The returned pointer has at least 256B alignment.
    *
@@ -65,29 +64,29 @@ class managed_memory_resource final : public device_memory_resource {
    * @param bytes The size, in bytes, of the allocation
    * @return void* Pointer to the newly allocated memory
    */
-  void* do_allocate(std::size_t bytes, cuda_stream_view) override
+  void* do_allocate(std::size_t bytes, cuda_stream_view /*stream*/) override
   {
     // FIXME: Unlike cudaMalloc, cudaMallocManaged will throw an error for 0
     // size allocations.
     if (bytes == 0) { return nullptr; }
 
-    void* p{nullptr};
-    RMM_CUDA_TRY(cudaMallocManaged(&p, bytes), rmm::bad_alloc);
-    return p;
+    void* ptr{nullptr};
+    RMM_CUDA_TRY(cudaMallocManaged(&ptr, bytes), rmm::bad_alloc);
+    return ptr;
   }
 
   /**
-   * @brief Deallocate memory pointed to by \p p.
+   * @brief Deallocate memory pointed to by `ptr`.
    *
    * @note Stream argument is ignored.
    *
    * @throws Nothing.
    *
-   * @param p Pointer to be deallocated
+   * @param ptr Pointer to be deallocated
    */
-  void do_deallocate(void* p, std::size_t, cuda_stream_view) override
+  void do_deallocate(void* ptr, std::size_t /*bytes*/, cuda_stream_view /*stream*/) override
   {
-    RMM_ASSERT_CUDA_SUCCESS(cudaFree(p));
+    RMM_ASSERT_CUDA_SUCCESS(cudaFree(ptr));
   }
 
   /**
@@ -102,7 +101,7 @@ class managed_memory_resource final : public device_memory_resource {
    * @return true If the two resources are equivalent
    * @return false If the two resources are not equal
    */
-  bool do_is_equal(device_memory_resource const& other) const noexcept override
+  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
   {
     return dynamic_cast<managed_memory_resource const*>(&other) != nullptr;
   }
@@ -115,7 +114,8 @@ class managed_memory_resource final : public device_memory_resource {
    * @param stream to execute on
    * @return std::pair contaiing free_size and total_size of memory
    */
-  std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view stream) const override
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
+    cuda_stream_view stream) const override
   {
     std::size_t free_size{};
     std::size_t total_size{};
@@ -124,5 +124,4 @@ class managed_memory_resource final : public device_memory_resource {
   }
 };
 
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr

From 86fcffd721298d43ae9abd5ea9733290ed0d289e Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 11:36:37 +1000
Subject: [PATCH 40/72] tidy owning_wrapper and per_device_resource

---
 include/rmm/mr/device/owning_wrapper.hpp      | 54 ++++++++++---------
 include/rmm/mr/device/per_device_resource.hpp | 24 ++++-----
 2 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/include/rmm/mr/device/owning_wrapper.hpp b/include/rmm/mr/device/owning_wrapper.hpp
index 6abe950b0..7bcc3b2fa 100644
--- a/include/rmm/mr/device/owning_wrapper.hpp
+++ b/include/rmm/mr/device/owning_wrapper.hpp
@@ -22,21 +22,23 @@
 #include <memory>
 #include <utility>
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 namespace detail {
 /// Converts a tuple into a parameter pack
 template <typename Resource, typename UpstreamTuple, std::size_t... Indices, typename... Args>
-auto make_resource_impl(UpstreamTuple const& t, std::index_sequence<Indices...>, Args&&... args)
+auto make_resource_impl(UpstreamTuple const& upstreams,
+                        std::index_sequence<Indices...> /*indices*/,
+                        Args&&... args)
 {
-  return std::make_unique<Resource>(std::get<Indices>(t).get()..., std::forward<Args>(args)...);
+  return std::make_unique<Resource>(std::get<Indices>(upstreams).get()...,
+                                    std::forward<Args>(args)...);
 }
 
 template <typename Resource, typename... Upstreams, typename... Args>
-auto make_resource(std::tuple<std::shared_ptr<Upstreams>...> const& t, Args&&... args)
+auto make_resource(std::tuple<std::shared_ptr<Upstreams>...> const& upstreams, Args&&... args)
 {
   return make_resource_impl<Resource>(
-    t, std::index_sequence_for<Upstreams...>{}, std::forward<Args>(args)...);
+    upstreams, std::index_sequence_for<Upstreams...>{}, std::forward<Args>(args)...);
 }
 }  // namespace detail
 
@@ -128,14 +130,20 @@ class owning_wrapper : public device_memory_resource {
   /**
    * @copydoc rmm::mr::device_memory_resource::supports_streams()
    */
-  bool supports_streams() const noexcept override { return wrapped().supports_streams(); }
+  [[nodiscard]] bool supports_streams() const noexcept override
+  {
+    return wrapped().supports_streams();
+  }
 
   /**
    * @brief Query whether the resource supports the get_mem_info API.
    *
    * @return true if the upstream resource supports get_mem_info, false otherwise.
    */
-  bool supports_get_mem_info() const noexcept override { return wrapped().supports_get_mem_info(); }
+  [[nodiscard]] bool supports_get_mem_info() const noexcept override
+  {
+    return wrapped().supports_get_mem_info();
+  }
 
  private:
   /**
@@ -156,17 +164,17 @@ class owning_wrapper : public device_memory_resource {
   /**
    * @brief Returns an allocation to the wrapped resource.
    *
-   * `p` must have been returned from a prior call to `do_allocate(bytes)`.
+   * `ptr` must have been returned from a prior call to `do_allocate(bytes)`.
    *
    * @throws Nothing.
    *
-   * @param p Pointer to the allocation to free.
+   * @param ptr Pointer to the allocation to free.
    * @param bytes Size of the allocation
    * @param stream Stream on which to deallocate the memory
    */
-  void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override
+  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    wrapped().deallocate(p, bytes, stream);
+    wrapped().deallocate(ptr, bytes, stream);
   }
 
   /**
@@ -180,18 +188,12 @@ class owning_wrapper : public device_memory_resource {
    * @return true If the two resources are equal
    * @return false If the two resources are not equal
    */
-  bool do_is_equal(device_memory_resource const& other) const noexcept override
+  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
   {
-    if (this == &other) {
-      return true;
-    } else {
-      auto casted = dynamic_cast<owning_wrapper<Resource, Upstreams...> const*>(&other);
-      if (nullptr != casted) {
-        return wrapped().is_equal(casted->wrapped());
-      } else {
-        return wrapped().is_equal(other);
-      }
-    }
+    if (this == &other) { return true; }
+    auto casted = dynamic_cast<owning_wrapper<Resource, Upstreams...> const*>(&other);
+    if (nullptr != casted) { return wrapped().is_equal(casted->wrapped()); }
+    return wrapped().is_equal(other);
   }
 
   /**
@@ -202,7 +204,8 @@ class owning_wrapper : public device_memory_resource {
    * @param stream Stream on which to get the mem info.
    * @return std::pair contaiing free_size and total_size of memory
    */
-  std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view stream) const override
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
+    cuda_stream_view stream) const override
   {
     return wrapped().get_mem_info(stream);
   }
@@ -272,5 +275,4 @@ auto make_owning_wrapper(std::shared_ptr<Upstream> upstream, Args&&... args)
                                        std::forward<Args>(args)...);
 }
 
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr
diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp
index 0f10b7f53..4ddbd874a 100644
--- a/include/rmm/mr/device/per_device_resource.hpp
+++ b/include/rmm/mr/device/per_device_resource.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -71,9 +71,7 @@
  * @endcode
  */
 
-namespace rmm {
-
-namespace mr {
+namespace rmm::mr {
 
 namespace detail {
 
@@ -126,13 +124,14 @@ RMM_EXPORT inline auto& get_map()
  * @param id The id of the target device
  * @return Pointer to the current `device_memory_resource` for device `id`
  */
-inline device_memory_resource* get_per_device_resource(cuda_device_id id)
+inline device_memory_resource* get_per_device_resource(cuda_device_id device_id)
 {
   std::lock_guard<std::mutex> lock{detail::map_lock()};
   auto& map = detail::get_map();
   // If a resource was never set for `id`, set to the initial resource
-  auto const found = map.find(id.value());
-  return (found == map.end()) ? (map[id.value()] = detail::initial_resource()) : found->second;
+  auto const found = map.find(device_id.value());
+  return (found == map.end()) ? (map[device_id.value()] = detail::initial_resource())
+                              : found->second;
 }
 
 /**
@@ -162,15 +161,15 @@ inline device_memory_resource* get_per_device_resource(cuda_device_id id)
  * for `id`
  * @return Pointer to the previous memory resource for `id`
  */
-inline device_memory_resource* set_per_device_resource(cuda_device_id id,
+inline device_memory_resource* set_per_device_resource(cuda_device_id device_id,
                                                        device_memory_resource* new_mr)
 {
   std::lock_guard<std::mutex> lock{detail::map_lock()};
   auto& map          = detail::get_map();
-  auto const old_itr = map.find(id.value());
+  auto const old_itr = map.find(device_id.value());
   // If a resource didn't previously exist for `id`, return pointer to initial_resource
-  auto old_mr     = (old_itr == map.end()) ? detail::initial_resource() : old_itr->second;
-  map[id.value()] = (new_mr == nullptr) ? detail::initial_resource() : new_mr;
+  auto* old_mr           = (old_itr == map.end()) ? detail::initial_resource() : old_itr->second;
+  map[device_id.value()] = (new_mr == nullptr) ? detail::initial_resource() : new_mr;
   return old_mr;
 }
 
@@ -228,5 +227,4 @@ inline device_memory_resource* set_current_device_resource(device_memory_resourc
 {
   return set_per_device_resource(rmm::detail::current_device(), new_mr);
 }
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr

From b808afd64e2998f170083ab8e8d73de9b2613058 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 11:38:35 +1000
Subject: [PATCH 41/72] tidy polymorphic_allocator

---
 .../rmm/mr/device/polymorphic_allocator.hpp   | 76 +++++++------------
 1 file changed, 28 insertions(+), 48 deletions(-)

diff --git a/include/rmm/mr/device/polymorphic_allocator.hpp b/include/rmm/mr/device/polymorphic_allocator.hpp
index 4f97cf568..643d1b6fb 100644
--- a/include/rmm/mr/device/polymorphic_allocator.hpp
+++ b/include/rmm/mr/device/polymorphic_allocator.hpp
@@ -24,8 +24,7 @@
 #include <memory>
 #include <type_traits>
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 
 /**
  * @brief A stream ordered Allocator using a `rmm::mr::device_memory_resource` to satisfy
@@ -45,7 +44,6 @@ template <typename T>
 class polymorphic_allocator {
  public:
   using value_type = T;
-
   /**
    * @brief Construct a `polymorphic_allocator` using the return value of
    * `rmm::mr::get_current_device_resource()` as the underlying memory resource.
@@ -53,15 +51,6 @@ class polymorphic_allocator {
    */
   polymorphic_allocator() = default;
 
-  /**
-   * @brief Construct a `polymorphic_allocator` using `other.resource()` as the underlying memory
-   * resource.
-   *
-   * @param other The `polymorphic_resource` whose `resource()` will be used as the underlying
-   * resource of the new `polymorphic_allocator`.
-   */
-  polymorphic_allocator(polymorphic_allocator<T> const& other) = default;
-
   /**
    * @brief Construct a `polymorphic_allocator` using the provided memory resource.
    *
@@ -84,30 +73,30 @@ class polymorphic_allocator {
   }
 
   /**
-   * @brief Allocates storage for `n` objects of type `T` using the underlying memory resource.
+   * @brief Allocates storage for `num` objects of type `T` using the underlying memory resource.
    *
-   * @param n The number of objects to allocate storage for
+   * @param num The number of objects to allocate storage for
    * @param stream The stream on which to perform the allocation
    * @return Pointer to the allocated storage
    */
-  value_type* allocate(std::size_t n, cuda_stream_view stream)
+  value_type* allocate(std::size_t num, cuda_stream_view stream)
   {
-    return static_cast<value_type*>(resource()->allocate(n * sizeof(T), stream));
+    return static_cast<value_type*>(resource()->allocate(num * sizeof(T), stream));
   }
 
   /**
-   * @brief Deallocates storage pointed to by `p`.
+   * @brief Deallocates storage pointed to by `ptr`.
    *
-   * `p` must have been allocated from a `rmm::mr::device_memory_resource` `r` that compares equal
+   * `ptr` must have been allocated from a `rmm::mr::device_memory_resource` `r` that compares equal
    * to `*resource()` using `r.allocate(n * sizeof(T))`.
    *
-   * @param p Pointer to memory to deallocate
-   * @param n Number of objects originally allocated
+   * @param ptr Pointer to memory to deallocate
+   * @param num Number of objects originally allocated
    * @param stream Stream on which to perform the deallocation
    */
-  void deallocate(value_type* p, std::size_t n, cuda_stream_view stream)
+  void deallocate(value_type* ptr, std::size_t num, cuda_stream_view stream)
   {
-    resource()->deallocate(p, n * sizeof(T), stream);
+    resource()->deallocate(ptr, num * sizeof(T), stream);
   }
 
   /**
@@ -115,7 +104,7 @@ class polymorphic_allocator {
    *
    * @return Pointer to the underlying resource.
    */
-  device_memory_resource* resource() const noexcept { return mr_; }
+  [[nodiscard]] device_memory_resource* resource() const noexcept { return mr_; }
 
  private:
   device_memory_resource* mr_{
@@ -169,22 +158,14 @@ class stream_allocator_adaptor {
    * @note: The `stream` must not be destroyed before the `stream_allocator_adaptor`, otherwise
    * behavior is undefined.
    *
-   * @param a The stream ordered allocator to use as the underlying allocator
+   * @param allocator The stream ordered allocator to use as the underlying allocator
    * @param stream The stream used with the underlying allocator
    */
-  stream_allocator_adaptor(Allocator const& a, cuda_stream_view stream) : alloc_{a}, stream_{stream}
+  stream_allocator_adaptor(Allocator const& allocator, cuda_stream_view stream)
+    : alloc_{allocator}, stream_{stream}
   {
   }
 
-  /**
-   * @brief Construct a `stream_allocator_adaptor` using `other.underlying_allocator()` and
-   * `other.stream()` as the underlying allocator and stream.
-   *
-   * @param other The other `stream_allocator_adaptor` whose underlying allocator and stream will be
-   * copied
-   */
-  stream_allocator_adaptor(stream_allocator_adaptor<Allocator> const& other) = default;
-
   /**
    * @brief Construct a `stream_allocator_adaptor` using `other.underlying_allocator()` and
    * `other.stream()` as the underlying allocator and stream.
@@ -211,30 +192,30 @@ class stream_allocator_adaptor {
   };
 
   /**
-   * @brief Allocates storage for `n` objects of type `T` using the underlying allocator on
+   * @brief Allocates storage for `num` objects of type `T` using the underlying allocator on
    * `stream()`.
    *
-   * @param n The number of objects to allocate storage for
+   * @param num The number of objects to allocate storage for
    * @return Pointer to the allocated storage
    */
-  value_type* allocate(std::size_t n) { return alloc_.allocate(n, stream()); }
+  value_type* allocate(std::size_t num) { return alloc_.allocate(num, stream()); }
 
   /**
-   * @brief Deallocates storage pointed to by `p` using the underlying allocator on `stream()`.
+   * @brief Deallocates storage pointed to by `ptr` using the underlying allocator on `stream()`.
    *
-   * `p` must have been allocated from by an allocator `a` that compares equal to
+   * `ptr` must have been allocated from by an allocator `a` that compares equal to
    * `underlying_allocator()` using `a.allocate(n)`.
    *
-   * @param p Pointer to memory to deallocate
-   * @param n Number of objects originally allocated
+   * @param ptr Pointer to memory to deallocate
+   * @param num Number of objects originally allocated
    */
-  void deallocate(value_type* p, std::size_t n) { alloc_.deallocate(p, n, stream()); }
+  void deallocate(value_type* ptr, std::size_t num) { alloc_.deallocate(ptr, num, stream()); }
 
   /**
    * @brief Returns the underlying stream on which calls to the underlying allocator are made.
    *
    */
-  cuda_stream_view stream() const noexcept { return stream_; }
+  [[nodiscard]] cuda_stream_view stream() const noexcept { return stream_; }
 
   /**
    * @brief Returns the underlying stream-ordered allocator
@@ -266,14 +247,13 @@ bool operator!=(stream_allocator_adaptor<A> const& lhs, stream_allocator_adaptor
  * @tparam Allocator Type of the stream-ordered allocator
  * @param allocator The allocator to use as the underlying allocator of the
  * `stream_allocator_adaptor`
- * @param s The stream on which the `stream_allocator_adaptor` will perform (de)allocations
+ * @param stream The stream on which the `stream_allocator_adaptor` will perform (de)allocations
  * @return A `stream_allocator_adaptor` wrapping `allocator` and `s`
  */
 template <typename Allocator>
-auto make_stream_allocator_adaptor(Allocator const& allocator, cuda_stream_view s)
+auto make_stream_allocator_adaptor(Allocator const& allocator, cuda_stream_view stream)
 {
-  return stream_allocator_adaptor<Allocator>{allocator, s};
+  return stream_allocator_adaptor<Allocator>{allocator, stream};
 }
 
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr

From 9543e3fd720246c5e3901ee97af8174430fa6a95 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 11:53:31 +1000
Subject: [PATCH 42/72] tidy pool_mr

---
 .../rmm/mr/device/pool_memory_resource.hpp    | 118 +++++++++---------
 1 file changed, 59 insertions(+), 59 deletions(-)

diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp
index 7a2a5b9c7..e446a9332 100644
--- a/include/rmm/mr/device/pool_memory_resource.hpp
+++ b/include/rmm/mr/device/pool_memory_resource.hpp
@@ -41,8 +41,7 @@
 #include <unordered_map>
 #include <vector>
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 
 /**
  * @brief A coalescing best-fit suballocator which uses a pool of memory allocated from
@@ -114,14 +113,14 @@ class pool_memory_resource final
    *
    * @returns bool true.
    */
-  bool supports_streams() const noexcept override { return true; }
+  [[nodiscard]] bool supports_streams() const noexcept override { return true; }
 
   /**
    * @brief Query whether the resource supports the get_mem_info API.
    *
    * @return bool false
    */
-  bool supports_get_mem_info() const noexcept override { return false; }
+  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; }
 
   /**
    * @brief Get the upstream memory_resource object.
@@ -145,7 +144,7 @@ class pool_memory_resource final
    *
    * @return std::size_t The maximum size of a single allocation supported by this memory resource
    */
-  std::size_t get_maximum_allocation_size() const
+  [[nodiscard]] std::size_t get_maximum_allocation_size() const
   {
     return std::numeric_limits<std::size_t>::max();
   }
@@ -168,12 +167,14 @@ class pool_memory_resource final
   block_type try_to_expand(std::size_t try_size, std::size_t min_size, cuda_stream_view stream)
   {
     while (try_size >= min_size) {
-      auto b = block_from_upstream(try_size, stream);
-      if (b.has_value()) {
-        current_pool_size_ += b.value().size();
-        return b.value();
+      auto block = block_from_upstream(try_size, stream);
+      if (block.has_value()) {
+        current_pool_size_ += block.value().size();
+        return block.value();
+      }
+      if (try_size == min_size) {
+        break;  // only try `size` once
       }
-      if (try_size == min_size) break;  // only try `size` once
       try_size = std::max(min_size, try_size / 2);
     }
     RMM_LOG_ERROR("[A][Stream {}][Upstream {}B][FAILURE maximum pool size exceeded]",
@@ -199,15 +200,13 @@ class pool_memory_resource final
   {
     auto const try_size = [&]() {
       if (not initial_size.has_value()) {
-        std::size_t free{}, total{};
-        std::tie(free, total) = (get_upstream()->supports_get_mem_info())
-                                  ? get_upstream()->get_mem_info(cuda_stream_legacy)
-                                  : rmm::detail::available_device_memory();
+        auto const [free, total] = (get_upstream()->supports_get_mem_info())
+                                     ? get_upstream()->get_mem_info(cuda_stream_legacy)
+                                     : rmm::detail::available_device_memory();
         return rmm::detail::align_up(std::min(free, total / 2),
                                      rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
-      } else {
-        return initial_size.value();
       }
+      return initial_size.value();
     }();
 
     current_pool_size_ = 0;  // try_to_expand will set this if it succeeds
@@ -217,8 +216,8 @@ class pool_memory_resource final
                 "Initial pool size exceeds the maximum pool size!");
 
     if (try_size > 0) {
-      auto const b = try_to_expand(try_size, try_size, cuda_stream_legacy);
-      this->insert_block(b, cuda_stream_legacy);
+      auto const block = try_to_expand(try_size, try_size, cuda_stream_legacy);
+      this->insert_block(block, cuda_stream_legacy);
     }
   }
 
@@ -252,7 +251,7 @@ class pool_memory_resource final
    * @param size The size of the minimum allocation immediately needed
    * @return std::size_t The computed size to grow the pool.
    */
-  std::size_t size_to_grow(std::size_t size) const
+  [[nodiscard]] std::size_t size_to_grow(std::size_t size) const
   {
     if (maximum_pool_size_.has_value()) {
       auto const unaligned_remaining = maximum_pool_size_.value() - pool_size();
@@ -260,8 +259,8 @@ class pool_memory_resource final
         rmm::detail::align_up(unaligned_remaining, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
       auto const aligned_size = rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
       return (aligned_size <= remaining) ? std::max(aligned_size, remaining / 2) : 0;
-    } else
-      return std::max(size, pool_size());
+    }
+    return std::max(size, pool_size());
   };
 
   /**
@@ -275,64 +274,66 @@ class pool_memory_resource final
   {
     RMM_LOG_DEBUG("[A][Stream {}][Upstream {}B]", fmt::ptr(stream.value()), size);
 
-    if (size == 0) return {};
+    if (size == 0) { return {}; }
 
     try {
-      void* p = upstream_mr_->allocate(size, stream);
+      void* ptr = upstream_mr_->allocate(size, stream);
       return thrust::optional<block_type>{
-        *upstream_blocks_.emplace(reinterpret_cast<char*>(p), size, true).first};
+        *upstream_blocks_.emplace(static_cast<char*>(ptr), size, true).first};
     } catch (std::exception const& e) {
       return thrust::nullopt;
     }
   }
 
   /**
-   * @brief Splits block `b` if necessary to return a pointer to memory of `size` bytes.
+   * @brief Splits `block` if necessary to return a pointer to memory of `size` bytes.
    *
    * If the block is split, the remainder is returned to the pool.
    *
-   * @param b The block to allocate from.
+   * @param block The block to allocate from.
    * @param size The size in bytes of the requested allocation.
    * @param stream_event The stream and associated event on which the allocation will be used.
    * @return A pair comprising the allocated pointer and any unallocated remainder of the input
    * block.
    */
-  split_block allocate_from_block(block_type const& b, std::size_t size)
+  split_block allocate_from_block(block_type const& block, std::size_t size)
   {
-    block_type const alloc{b.pointer(), size, b.is_head()};
+    block_type const alloc{block.pointer(), size, block.is_head()};
 #ifdef RMM_POOL_TRACK_ALLOCATIONS
     allocated_blocks_.insert(alloc);
 #endif
 
-    auto rest =
-      (b.size() > size) ? block_type{b.pointer() + size, b.size() - size, false} : block_type{};
+    auto rest = (block.size() > size)
+                  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+                  ? block_type{block.pointer() + size, block.size() - size, false}
+                  : block_type{};
     return {alloc, rest};
   }
 
   /**
-   * @brief Finds, frees and returns the block associated with pointer `p`.
+   * @brief Finds, frees and returns the block associated with pointer `ptr`.
    *
-   * @param p The pointer to the memory to free.
+   * @param ptr The pointer to the memory to free.
    * @param size The size of the memory to free. Must be equal to the original allocation size.
    * @param stream The stream-event pair for the stream on which the memory was last used.
    * @return The (now freed) block associated with `p`. The caller is expected to return the block
    * to the pool.
    */
-  block_type free_block(void* p, std::size_t size) noexcept
+  block_type free_block(void* ptr, std::size_t size) noexcept
   {
 #ifdef RMM_POOL_TRACK_ALLOCATIONS
-    if (p == nullptr) return block_type{};
-    auto const i = allocated_blocks_.find(static_cast<char*>(p));
-    RMM_LOGGING_ASSERT(i != allocated_blocks_.end());
+    if (ptr == nullptr) return block_type{};
+    auto const iter = allocated_blocks_.find(static_cast<char*>(ptr));
+    RMM_LOGGING_ASSERT(iter != allocated_blocks_.end());
 
-    auto block = *i;
+    auto block = *iter;
     RMM_LOGGING_ASSERT(block.size() == rmm::detail::align_up(size, allocation_alignment));
-    allocated_blocks_.erase(i);
+    allocated_blocks_.erase(iter);
 
     return block;
 #else
-    auto const i = upstream_blocks_.find(static_cast<char*>(p));
-    return block_type{static_cast<char*>(p), size, (i != upstream_blocks_.end())};
+    auto const iter = upstream_blocks_.find(static_cast<char*>(ptr));
+    return block_type{static_cast<char*>(ptr), size, (iter != upstream_blocks_.end())};
 #endif
   }
 
@@ -343,7 +344,7 @@ class pool_memory_resource final
    *
    * @return std::size_t The total size of the currently allocated pool.
    */
-  std::size_t pool_size() const noexcept { return current_pool_size_; }
+  [[nodiscard]] std::size_t pool_size() const noexcept { return current_pool_size_; }
 
   /**
    * @brief Free all memory allocated from the upstream memory_resource.
@@ -353,8 +354,9 @@ class pool_memory_resource final
   {
     lock_guard lock(this->get_mutex());
 
-    for (auto b : upstream_blocks_)
-      upstream_mr_->deallocate(b.pointer(), b.size());
+    for (auto block : upstream_blocks_) {
+      upstream_mr_->deallocate(block.pointer(), block.size());
+    }
     upstream_blocks_.clear();
 #ifdef RMM_POOL_TRACK_ALLOCATIONS
     allocated_blocks_.clear();
@@ -373,23 +375,22 @@ class pool_memory_resource final
   {
     lock_guard lock(this->get_mutex());
 
-    std::size_t free, total;
-    std::tie(free, total) = upstream_mr_->get_mem_info(0);
+    auto const [free, total] = upstream_mr_->get_mem_info(0);
     std::cout << "GPU free memory: " << free << " total: " << total << "\n";
 
     std::cout << "upstream_blocks: " << upstream_blocks_.size() << "\n";
     std::size_t upstream_total{0};
 
-    for (auto h : upstream_blocks_) {
-      h.print();
-      upstream_total += h.size();
+    for (auto blocks : upstream_blocks_) {
+      blocks.print();
+      upstream_total += blocks.size();
     }
     std::cout << "total upstream: " << upstream_total << " B\n";
 
 #ifdef RMM_POOL_TRACK_ALLOCATIONS
     std::cout << "allocated_blocks: " << allocated_blocks_.size() << "\n";
-    for (auto b : allocated_blocks_)
-      b.print();
+    for (auto block : allocated_blocks_)
+      block.print();
 #endif
 
     this->print_free_blocks();
@@ -407,9 +408,9 @@ class pool_memory_resource final
   {
     std::size_t largest{};
     std::size_t total{};
-    std::for_each(blocks.cbegin(), blocks.cend(), [&largest, &total](auto const& b) {
-      total += b.size();
-      largest = std::max(largest, b.size());
+    std::for_each(blocks.cbegin(), blocks.cend(), [&largest, &total](auto const& block) {
+      total += block.size();
+      largest = std::max(largest, block.size());
     });
     return {largest, total};
   }
@@ -422,14 +423,14 @@ class pool_memory_resource final
    * @param stream to execute on
    * @return std::pair contaiing free_size and total_size of memory
    */
-  std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view stream) const override
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
+    cuda_stream_view stream) const override
   {
-    std::size_t free_size{};
-    std::size_t total_size{};
     // TODO implement this
-    return std::make_pair(free_size, total_size);
+    return {0, 0};
   }
 
+ private:
   Upstream* upstream_mr_;  // The "heap" to allocate the pool from
   std::size_t current_pool_size_{};
   thrust::optional<std::size_t> maximum_pool_size_{};
@@ -442,5 +443,4 @@ class pool_memory_resource final
   std::set<block_type, rmm::mr::detail::compare_blocks<block_type>> upstream_blocks_;
 };  // namespace mr
 
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr

From e8e1eae80f7e77f96dd4d1552d13fb7e0265d430 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 11:56:22 +1000
Subject: [PATCH 43/72] tidy statistics_resource_adaptor

---
 .../mr/device/statistics_resource_adaptor.hpp | 45 +++++++++----------
 1 file changed, 20 insertions(+), 25 deletions(-)

diff --git a/include/rmm/mr/device/statistics_resource_adaptor.hpp b/include/rmm/mr/device/statistics_resource_adaptor.hpp
index bcc0bf10b..298ac8bc1 100644
--- a/include/rmm/mr/device/statistics_resource_adaptor.hpp
+++ b/include/rmm/mr/device/statistics_resource_adaptor.hpp
@@ -21,8 +21,7 @@
 #include <mutex>
 #include <shared_mutex>
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 /**
  * @brief Resource that uses `Upstream` to allocate memory and tracks statistics
  * on memory allocations.
@@ -52,17 +51,17 @@ class statistics_resource_adaptor final : public device_memory_resource {
     int64_t peak{0};   // Max value of `value`
     int64_t total{0};  // Sum of all added values
 
-    counter& operator+=(int64_t x)
+    counter& operator+=(int64_t val)
     {
-      value += x;
-      total += x;
+      value += val;
+      total += val;
       peak = std::max(value, peak);
       return *this;
     }
 
-    counter& operator-=(int64_t x)
+    counter& operator-=(int64_t val)
     {
-      value -= x;
+      value -= val;
       return *this;
     }
   };
@@ -81,11 +80,11 @@ class statistics_resource_adaptor final : public device_memory_resource {
   }
 
   statistics_resource_adaptor()                                   = delete;
-  virtual ~statistics_resource_adaptor()                          = default;
+  ~statistics_resource_adaptor() override                         = default;
   statistics_resource_adaptor(statistics_resource_adaptor const&) = delete;
-  statistics_resource_adaptor(statistics_resource_adaptor&&)      = default;
   statistics_resource_adaptor& operator=(statistics_resource_adaptor const&) = delete;
-  statistics_resource_adaptor& operator=(statistics_resource_adaptor&&) = default;
+  statistics_resource_adaptor(statistics_resource_adaptor&&) noexcept        = default;
+  statistics_resource_adaptor& operator=(statistics_resource_adaptor&&) noexcept = default;
 
   /**
    * @brief Return pointer to the upstream resource.
@@ -156,7 +155,7 @@ class statistics_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    void* p = upstream_->allocate(bytes, stream);
+    void* ptr = upstream_->allocate(bytes, stream);
 
     // increment the stats
     {
@@ -167,21 +166,21 @@ class statistics_resource_adaptor final : public device_memory_resource {
       allocations_ += 1;
     }
 
-    return p;
+    return ptr;
   }
 
   /**
-   * @brief Free allocation of size `bytes` pointed to by `p`
+   * @brief Free allocation of size `bytes` pointed to by `ptr`
    *
    * @throws Nothing.
    *
-   * @param p Pointer to be deallocated
+   * @param ptr Pointer to be deallocated
    * @param bytes Size of the allocation
    * @param stream Stream on which to perform the deallocation
    */
-  void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override
+  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    upstream_->deallocate(p, bytes, stream);
+    upstream_->deallocate(ptr, bytes, stream);
 
     {
       write_lock_t lock(mtx_);
@@ -203,13 +202,10 @@ class statistics_resource_adaptor final : public device_memory_resource {
    */
   bool do_is_equal(device_memory_resource const& other) const noexcept override
   {
-    if (this == &other)
-      return true;
-    else {
-      auto cast = dynamic_cast<statistics_resource_adaptor<Upstream> const*>(&other);
-      return cast != nullptr ? upstream_->is_equal(*cast->get_upstream())
-                             : upstream_->is_equal(other);
-    }
+    if (this == &other) { return true; }
+    auto cast = dynamic_cast<statistics_resource_adaptor<Upstream> const*>(&other);
+    return cast != nullptr ? upstream_->is_equal(*cast->get_upstream())
+                           : upstream_->is_equal(other);
   }
 
   /**
@@ -244,5 +240,4 @@ statistics_resource_adaptor<Upstream> make_statistics_adaptor(Upstream* upstream
   return statistics_resource_adaptor<Upstream>{upstream};
 }
 
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr

From b26b9aaa61d479f36b83f5387f0b540ccf88b6d4 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 12:00:37 +1000
Subject: [PATCH 44/72] tidy thread_safe_resource_adaptor

---
 .../device/thread_safe_resource_adaptor.hpp   | 30 ++++++++-----------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
index 2675a4df2..b1f898c5e 100644
--- a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
+++ b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
@@ -22,8 +22,7 @@
 #include <cstddef>
 #include <mutex>
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 /**
  * @brief Resource that adapts `Upstream` memory resource adaptor to be thread safe.
  *
@@ -54,7 +53,7 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
   }
 
   thread_safe_resource_adaptor()                                    = delete;
-  ~thread_safe_resource_adaptor()                                   = default;
+  ~thread_safe_resource_adaptor() override                          = default;
   thread_safe_resource_adaptor(thread_safe_resource_adaptor const&) = delete;
   thread_safe_resource_adaptor(thread_safe_resource_adaptor&&)      = delete;
   thread_safe_resource_adaptor& operator=(thread_safe_resource_adaptor const&) = delete;
@@ -98,19 +97,18 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
   }
 
   /**
-   * @brief Free allocation of size `bytes` pointed to to by `p` and log the
-   * deallocation.
+   * @brief Free allocation of size `bytes` pointed to to by `ptr`.s
    *
    * @throws Nothing.
    *
-   * @param p Pointer to be deallocated
+   * @param ptr Pointer to be deallocated
    * @param bytes Size of the allocation
    * @param stream Stream on which to perform the deallocation
    */
-  void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override
+  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
     lock_t lock(mtx);
-    upstream_->deallocate(p, bytes, stream);
+    upstream_->deallocate(ptr, bytes, stream);
   }
 
   /**
@@ -124,15 +122,12 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
    */
   bool do_is_equal(device_memory_resource const& other) const noexcept override
   {
-    if (this == &other)
-      return true;
-    else {
-      auto thread_safe_other = dynamic_cast<thread_safe_resource_adaptor<Upstream> const*>(&other);
-      if (thread_safe_other != nullptr)
-        return upstream_->is_equal(*thread_safe_other->get_upstream());
-      else
-        return upstream_->is_equal(other);
+    if (this == &other) { return true; }
+    auto thread_safe_other = dynamic_cast<thread_safe_resource_adaptor<Upstream> const*>(&other);
+    if (thread_safe_other != nullptr) {
+      return upstream_->is_equal(*thread_safe_other->get_upstream());
     }
+    return upstream_->is_equal(other);
   }
 
   /**
@@ -153,5 +148,4 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
   Upstream* upstream_;     ///< The upstream resource used for satisfying allocation requests
 };
 
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr

From 7fb12d2cd6f8bd00b1031a108f7f6875834bb52c Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 12:02:29 +1000
Subject: [PATCH 45/72] tidy thrust_allocator_adaptor

---
 .../mr/device/thrust_allocator_adaptor.hpp    | 24 +++++++++----------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/include/rmm/mr/device/thrust_allocator_adaptor.hpp b/include/rmm/mr/device/thrust_allocator_adaptor.hpp
index d841304a6..56e910801 100644
--- a/include/rmm/mr/device/thrust_allocator_adaptor.hpp
+++ b/include/rmm/mr/device/thrust_allocator_adaptor.hpp
@@ -22,8 +22,7 @@
 #include <thrust/detail/type_traits/pointer_traits.h>
 #include <thrust/device_malloc_allocator.h>
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 /**
  * @brief An `allocator` compatible with Thrust containers and algorithms using
  * a `device_memory_resource` for memory (de)allocation.
@@ -91,39 +90,38 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
   /**
    * @brief Allocate objects of type `T`
    *
-   * @param n  The number of elements of type `T` to allocate
+   * @param num  The number of elements of type `T` to allocate
    * @return pointer Pointer to the newly allocated storage
    */
-  pointer allocate(size_type n)
+  pointer allocate(size_type num)
   {
-    return thrust::device_pointer_cast(static_cast<T*>(_mr->allocate(n * sizeof(T), _stream)));
+    return thrust::device_pointer_cast(static_cast<T*>(_mr->allocate(num * sizeof(T), _stream)));
   }
 
   /**
    * @brief Deallocates objects of type `T`
    *
-   * @param p Pointer returned by a previous call to `allocate`
-   * @param n number of elements, *must* be equal to the argument passed to the
+   * @param ptr Pointer returned by a previous call to `allocate`
+   * @param num number of elements, *must* be equal to the argument passed to the
    * prior `allocate` call that produced `p`
    */
-  void deallocate(pointer p, size_type n)
+  void deallocate(pointer ptr, size_type num)
   {
-    return _mr->deallocate(thrust::raw_pointer_cast(p), n * sizeof(T), _stream);
+    return _mr->deallocate(thrust::raw_pointer_cast(ptr), num * sizeof(T), _stream);
   }
 
   /**
    * @brief Returns the device memory resource used by this allocator.
    */
-  device_memory_resource* resource() const noexcept { return _mr; }
+  [[nodiscard]] device_memory_resource* resource() const noexcept { return _mr; }
 
   /**
    * @brief Returns the stream used by this allocator.
    */
-  cuda_stream_view stream() const noexcept { return _stream; }
+  [[nodiscard]] cuda_stream_view stream() const noexcept { return _stream; }
 
  private:
   cuda_stream_view _stream{};
   device_memory_resource* _mr{rmm::mr::get_current_device_resource()};
 };
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr

From 2c28eeea0cc03a66046de0ebabb8cbf5008ddc0e Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 12:05:59 +1000
Subject: [PATCH 46/72] tidy tracking_resource_adaptor

---
 .../mr/device/tracking_resource_adaptor.hpp   | 49 +++++++++----------
 1 file changed, 22 insertions(+), 27 deletions(-)

diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp
index 1a32a1c44..fbcb44898 100644
--- a/include/rmm/mr/device/tracking_resource_adaptor.hpp
+++ b/include/rmm/mr/device/tracking_resource_adaptor.hpp
@@ -25,8 +25,7 @@
 #include <shared_mutex>
 #include <sstream>
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 /**
  * @brief Resource that uses `Upstream` to allocate memory and tracks allocations.
  *
@@ -83,11 +82,11 @@ class tracking_resource_adaptor final : public device_memory_resource {
   }
 
   tracking_resource_adaptor()                                 = delete;
-  virtual ~tracking_resource_adaptor()                        = default;
+  ~tracking_resource_adaptor() override                       = default;
   tracking_resource_adaptor(tracking_resource_adaptor const&) = delete;
-  tracking_resource_adaptor(tracking_resource_adaptor&&)      = default;
   tracking_resource_adaptor& operator=(tracking_resource_adaptor const&) = delete;
-  tracking_resource_adaptor& operator=(tracking_resource_adaptor&&) = default;
+  tracking_resource_adaptor(tracking_resource_adaptor&&) noexcept        = default;
+  tracking_resource_adaptor& operator=(tracking_resource_adaptor&&) noexcept = default;
 
   /**
    * @brief Return pointer to the upstream resource.
@@ -154,10 +153,10 @@ class tracking_resource_adaptor final : public device_memory_resource {
     std::ostringstream oss;
 
     if (!allocations_.empty()) {
-      for (auto const& al : allocations_) {
-        oss << al.first << ": " << al.second.allocation_size << " B";
-        if (al.second.strace != nullptr) {
-          oss << " : callstack:" << std::endl << *al.second.strace;
+      for (auto const& alloc : allocations_) {
+        oss << alloc.first << ": " << alloc.second.allocation_size << " B";
+        if (alloc.second.strace != nullptr) {
+          oss << " : callstack:" << std::endl << *alloc.second.strace;
         }
         oss << std::endl;
       }
@@ -193,34 +192,34 @@ class tracking_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    void* p = upstream_->allocate(bytes, stream);
+    void* ptr = upstream_->allocate(bytes, stream);
 
     // track it.
     {
       write_lock_t lock(mtx_);
-      allocations_.emplace(p, allocation_info{bytes, capture_stacks_});
+      allocations_.emplace(ptr, allocation_info{bytes, capture_stacks_});
     }
     allocated_bytes_ += bytes;
 
-    return p;
+    return ptr;
   }
 
   /**
-   * @brief Free allocation of size `bytes` pointed to by `p`
+   * @brief Free allocation of size `bytes` pointed to by `ptr`
    *
    * @throws Nothing.
    *
-   * @param p Pointer to be deallocated
+   * @param ptr Pointer to be deallocated
    * @param bytes Size of the allocation
    * @param stream Stream on which to perform the deallocation
    */
-  void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override
+  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    upstream_->deallocate(p, bytes, stream);
+    upstream_->deallocate(ptr, bytes, stream);
     {
       write_lock_t lock(mtx_);
 
-      const auto found = allocations_.find(p);
+      const auto found = allocations_.find(ptr);
 
       // Ensure the allocation is found and the number of bytes match
       if (found == allocations_.end()) {
@@ -229,7 +228,7 @@ class tracking_resource_adaptor final : public device_memory_resource {
         RMM_LOG_ERROR(
           "Deallocating a pointer that was not tracked. Ptr: {:p} [{}B], Current Num. Allocations: "
           "{}",
-          fmt::ptr(p),
+          fmt::ptr(ptr),
           bytes,
           this->allocations_.size());
       } else {
@@ -261,13 +260,10 @@ class tracking_resource_adaptor final : public device_memory_resource {
    */
   bool do_is_equal(device_memory_resource const& other) const noexcept override
   {
-    if (this == &other)
-      return true;
-    else {
-      auto cast = dynamic_cast<tracking_resource_adaptor<Upstream> const*>(&other);
-      return cast != nullptr ? upstream_->is_equal(*cast->get_upstream())
-                             : upstream_->is_equal(other);
-    }
+    if (this == &other) { return true; }
+    auto cast = dynamic_cast<tracking_resource_adaptor<Upstream> const*>(&other);
+    return cast != nullptr ? upstream_->is_equal(*cast->get_upstream())
+                           : upstream_->is_equal(other);
   }
 
   /**
@@ -303,5 +299,4 @@ tracking_resource_adaptor<Upstream> make_tracking_adaptor(Upstream* upstream)
   return tracking_resource_adaptor<Upstream>{upstream};
 }
 
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr

From c0c71a956de79b62e67dd4f20f26e59a5c09abcc Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 12:19:43 +1000
Subject: [PATCH 47/72] tidy host_mrs

---
 .../rmm/mr/device/device_memory_resource.hpp  |   4 +-
 include/rmm/mr/host/host_memory_resource.hpp  | 133 +++++++++---------
 include/rmm/mr/host/new_delete_resource.hpp   |  47 +++----
 .../rmm/mr/host/pinned_memory_resource.hpp    |  55 ++++----
 4 files changed, 113 insertions(+), 126 deletions(-)

diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp
index e0e97b86d..52aa8c79f 100644
--- a/include/rmm/mr/device/device_memory_resource.hpp
+++ b/include/rmm/mr/device/device_memory_resource.hpp
@@ -85,8 +85,8 @@ class device_memory_resource {
   virtual ~device_memory_resource()                     = default;
   device_memory_resource(device_memory_resource const&) = default;
   device_memory_resource& operator=(device_memory_resource const&) = default;
-  device_memory_resource(device_memory_resource&&)                 = default;
-  device_memory_resource& operator=(device_memory_resource&&) = default;
+  device_memory_resource(device_memory_resource&&) noexcept        = default;
+  device_memory_resource& operator=(device_memory_resource&&) noexcept = default;
 
   /**
    * @brief Allocates memory of size at least \p bytes.
diff --git a/include/rmm/mr/host/host_memory_resource.hpp b/include/rmm/mr/host/host_memory_resource.hpp
index b799b46da..4edffc860 100644
--- a/include/rmm/mr/host/host_memory_resource.hpp
+++ b/include/rmm/mr/host/host_memory_resource.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,9 +18,9 @@
 #include <cstddef>
 #include <utility>
 
-namespace rmm {
-namespace mr {
-/**---------------------------------------------------------------------------*
+namespace rmm::mr {
+
+/**
  * @brief Base class for host memory allocation.
  *
  * This is based on `std::pmr::memory_resource`:
@@ -43,124 +43,121 @@ namespace mr {
  * base class' `allocate` function may log every allocation, no matter what
  * derived class implementation is used.
  *
- *---------------------------------------------------------------------------**/
+ */
 class host_memory_resource {
  public:
-  virtual ~host_memory_resource() = default;
+  host_memory_resource()                            = default;
+  virtual ~host_memory_resource()                   = default;
+  host_memory_resource(host_memory_resource const&) = default;
+  host_memory_resource& operator=(host_memory_resource const&) = default;
+  host_memory_resource(host_memory_resource&&) noexcept        = default;
+  host_memory_resource& operator=(host_memory_resource&&) noexcept = default;
 
-  /**---------------------------------------------------------------------------*
+  /**
    * @brief Allocates memory on the host of size at least `bytes` bytes.
    *
-   * The returned storage is aligned to the specified `alignment` if supported,
-   * and to `alignof(std::max_align_t)` otherwise.
+   * The returned storage is aligned to the specified `alignment` if supported, and to
+   * `alignof(std::max_align_t)` otherwise.
    *
-   * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be
-   * allocated.
+   * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated.
    *
    * @param bytes The size of the allocation
    * @param alignment Alignment of the allocation
    * @return void* Pointer to the newly allocated memory
-   *---------------------------------------------------------------------------**/
+   */
   void* allocate(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
   {
     return do_allocate(bytes, alignment);
   }
 
-  /**---------------------------------------------------------------------------*
-   * @brief Deallocate memory pointed to by `p`.
+  /**
+   * @brief Deallocate memory pointed to by `ptr`.
    *
-   * `p` must have been returned by a prior call to `allocate(bytes,alignment)`
-   * on a `host_memory_resource` that compares equal to `*this`, and the storage
-   * it points to must not yet have been deallocated, otherwise behavior is
-   * undefined.
+   * `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a
+   * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not
+   * yet have been deallocated, otherwise behavior is undefined.
    *
    * @throws Nothing.
    *
-   * @param p Pointer to be deallocated
-   * @param bytes The size in bytes of the allocation. This must be equal to the
-   * value of `bytes` that was passed to the `allocate` call that returned `p`.
-   * @param alignment Alignment of the allocation. This must be equal to the
-   *value of `alignment` that was passed to the `allocate` call that returned
-   *`p`.
+   * @param ptr Pointer to be deallocated
+   * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes`
+   *              that was passed to the `allocate` call that returned `ptr`.
+   * @param alignment Alignment of the allocation. This must be equal to the value of `alignment`
+   *                  that was passed to the `allocate` call that returned `ptr`.
    * @param stream Stream on which to perform deallocation
-   *---------------------------------------------------------------------------**/
-  void deallocate(void* p, std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
+   */
+  void deallocate(void* ptr, std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
   {
-    do_deallocate(p, bytes, alignment);
+    do_deallocate(ptr, bytes, alignment);
   }
 
-  /**---------------------------------------------------------------------------*
+  /**
    * @brief Compare this resource to another.
    *
-   * Two `host_memory_resource`s compare equal if and only if memory allocated
-   * from one `host_memory_resource` can be deallocated from the other and vice
-   * versa.
+   * Two `host_memory_resource`s compare equal if and only if memory allocated from one
+   * `host_memory_resource` can be deallocated from the other and vice versa.
    *
-   * By default, simply checks if \p *this and \p other refer to the same
-   * object, i.e., does not check if they are two objects of the same class.
+   * By default, simply checks if \p *this and \p other refer to the same object, i.e., does not
+   * check if they are two objects of the same class.
    *
    * @param other The other resource to compare to
-   * @returns If the two resources are equivalent
-   *---------------------------------------------------------------------------**/
-  bool is_equal(host_memory_resource const& other) const noexcept { return do_is_equal(other); }
+   * @returns true if the two resources are equivalent
+   */
+  [[nodiscard]] bool is_equal(host_memory_resource const& other) const noexcept
+  {
+    return do_is_equal(other);
+  }
 
  private:
-  /**---------------------------------------------------------------------------*
+  /**
    * @brief Allocates memory on the host of size at least `bytes` bytes.
    *
-   * The returned storage is aligned to the specified `alignment` if supported,
-   * and to `alignof(std::max_align_t)` otherwise.
+   * The returned storage is aligned to the specified `alignment` if supported, and to
+   * `alignof(std::max_align_t)` otherwise.
    *
-   * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be
-   * allocated.
+   * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated.
    *
    * @param bytes The size of the allocation
    * @param alignment Alignment of the allocation
    * @return void* Pointer to the newly allocated memory
-   *---------------------------------------------------------------------------**/
+   */
   virtual void* do_allocate(std::size_t bytes,
                             std::size_t alignment = alignof(std::max_align_t)) = 0;
 
-  /**---------------------------------------------------------------------------*
-   * @brief Deallocate memory pointed to by `p`.
+  /**
+   * @brief Deallocate memory pointed to by `ptr`.
    *
-   * `p` must have been returned by a prior call to `allocate(bytes,alignment)`
-   * on a `host_memory_resource` that compares equal to `*this`, and the storage
-   * it points to must not yet have been deallocated, otherwise behavior is
-   * undefined.
+   * `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a
+   * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not
+   * yet have been deallocated, otherwise behavior is undefined.
    *
    * @throws Nothing.
    *
-   * @param p Pointer to be deallocated
-   * @param bytes The size in bytes of the allocation. This must be equal to the
-   * value of `bytes` that was passed to the `allocate` call that returned `p`.
-   * @param alignment Alignment of the allocation. This must be equal to the
-   *value of `alignment` that was passed to the `allocate` call that returned
-   *`p`.
-   * @param stream Stream on which to perform deallocation
-   *---------------------------------------------------------------------------**/
-  virtual void do_deallocate(void* p,
+   * @param ptr Pointer to be deallocated
+   * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes`
+   *              that was passed to the `allocate` call that returned `ptr`.
+   * @param alignment Alignment of the allocation. This must be equal to the value of `alignment`
+   *                  that was passed to the `allocate` call that returned `ptr`.
+   */
+  virtual void do_deallocate(void* ptr,
                              std::size_t bytes,
                              std::size_t alignment = alignof(std::max_align_t)) = 0;
 
-  /**---------------------------------------------------------------------------*
+  /**
    * @brief Compare this resource to another.
    *
-   * Two host_memory_resources compare equal if and only if memory allocated
-   * from one host_memory_resource can be deallocated from the other and vice
-   * versa.
+   * Two host_memory_resources compare equal if and only if memory allocated from one
+   * host_memory_resource can be deallocated from the other and vice versa.
    *
-   * By default, simply checks if \p *this and \p other refer to the same
-   * object, i.e., does not check if they are two objects of the same class.
+   * By default, simply checks if `*this` and `other` refer to the same object, i.e., does not check
+   * whether they are two objects of the same class.
    *
    * @param other The other resource to compare to
    * @return true If the two resources are equivalent
-   * @return false If the two resources are not equal
-   *---------------------------------------------------------------------------**/
-  virtual bool do_is_equal(host_memory_resource const& other) const noexcept
+   */
+  [[nodiscard]] virtual bool do_is_equal(host_memory_resource const& other) const noexcept
   {
     return this == &other;
   }
 };
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr
diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp
index 694450798..e30a6f41c 100644
--- a/include/rmm/mr/host/new_delete_resource.hpp
+++ b/include/rmm/mr/host/new_delete_resource.hpp
@@ -24,10 +24,10 @@
 
 namespace rmm::mr {
 
-/**---------------------------------------------------------------------------*
- * @brief A `host_memory_resource` that uses the global `operator new` and
- * `operator delete` to allocate host memory.
- *---------------------------------------------------------------------------**/
+/**
+ * @brief A `host_memory_resource` that uses the global `operator new` and `operator delete` to
+ * allocate host memory.
+ */
 class new_delete_resource final : public host_memory_resource {
  public:
   new_delete_resource()                           = default;
@@ -38,19 +38,18 @@ class new_delete_resource final : public host_memory_resource {
   new_delete_resource& operator=(new_delete_resource&&) = default;
 
  private:
-  /**---------------------------------------------------------------------------*
+  /**
    * @brief Allocates memory on the host of size at least `bytes` bytes.
    *
-   * The returned storage is aligned to the specified `alignment` if supported,
-   * and to `alignof(std::max_align_t)` otherwise.
+   * The returned storage is aligned to the specified `alignment` if supported, and to
+   * `alignof(std::max_align_t)` otherwise.
    *
-   * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be
-   * allocated.
+   * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated.
    *
    * @param bytes The size of the allocation
    * @param alignment Alignment of the allocation
-   * @return void* Pointer to the newly allocated memory
-   *---------------------------------------------------------------------------**/
+   * @return Pointer to the newly allocated memory
+   */
   void* do_allocate(std::size_t bytes,
                     std::size_t alignment = detail::RMM_DEFAULT_HOST_ALIGNMENT) override
   {
@@ -62,24 +61,21 @@ class new_delete_resource final : public host_memory_resource {
       bytes, alignment, [](std::size_t size) { return ::operator new(size); });
   }
 
-  /**---------------------------------------------------------------------------*
-   * @brief Deallocate memory pointed to by `p`.
+  /**
+   * @brief Deallocate memory pointed to by `ptr`.
    *
-   * `p` must have been returned by a prior call to `allocate(bytes,alignment)`
-   * on a `host_memory_resource` that compares equal to `*this`, and the storage
-   * it points to must not yet have been deallocated, otherwise behavior is
-   * undefined.
+   * `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a
+   * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not
+   * yet have been deallocated, otherwise behavior is undefined.
    *
    * @throws Nothing.
    *
-   * @param p Pointer to be deallocated
-   * @param bytes The size in bytes of the allocation. This must be equal to the
-   * value of `bytes` that was passed to the `allocate` call that returned `p`.
-   * @param alignment Alignment of the allocation. This must be equal to the
-   *value of `alignment` that was passed to the `allocate` call that returned
-   *`p`.
-   * @param stream Stream on which to perform deallocation
-   *---------------------------------------------------------------------------**/
+   * @param ptr Pointer to be deallocated
+   * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes`
+   *              that was passed to the `allocate` call that returned `ptr`.
+   * @param alignment Alignment of the allocation. This must be equal to the value of `alignment`
+   *                  that was passed to the `allocate` call that returned `ptr`.
+   */
   void do_deallocate(void* ptr,
                      std::size_t bytes,
                      std::size_t alignment = detail::RMM_DEFAULT_HOST_ALIGNMENT) override
@@ -87,4 +83,5 @@ class new_delete_resource final : public host_memory_resource {
     detail::aligned_deallocate(ptr, bytes, alignment, [](void* ptr) { ::operator delete(ptr); });
   }
 };
+
 }  // namespace rmm::mr
diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp
index d00a5cffe..42be3644f 100644
--- a/include/rmm/mr/host/pinned_memory_resource.hpp
+++ b/include/rmm/mr/host/pinned_memory_resource.hpp
@@ -22,38 +22,36 @@
 #include <cstddef>
 #include <utility>
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 
-/**---------------------------------------------------------------------------*
+/*
  * @brief A `host_memory_resource` that uses `cudaMallocHost` to allocate
  * pinned/page-locked host memory.
  *
  * See https://devblogs.nvidia.com/how-optimize-data-transfers-cuda-cc/
- *---------------------------------------------------------------------------**/
+ */
 class pinned_memory_resource final : public host_memory_resource {
  public:
   pinned_memory_resource()                              = default;
-  ~pinned_memory_resource()                             = default;
+  ~pinned_memory_resource() override                    = default;
   pinned_memory_resource(pinned_memory_resource const&) = default;
   pinned_memory_resource(pinned_memory_resource&&)      = default;
   pinned_memory_resource& operator=(pinned_memory_resource const&) = default;
   pinned_memory_resource& operator=(pinned_memory_resource&&) = default;
 
  private:
-  /**---------------------------------------------------------------------------*
+  /**
    * @brief Allocates pinned memory on the host of size at least `bytes` bytes.
    *
-   * The returned storage is aligned to the specified `alignment` if supported,
-   * and to `alignof(std::max_align_t)` otherwise.
+   * The returned storage is aligned to the specified `alignment` if supported, and to
+   * `alignof(std::max_align_t)` otherwise.
    *
-   * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be
-   * allocated.
+   * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated.
    *
    * @param bytes The size of the allocation
    * @param alignment Alignment of the allocation
    * @return void* Pointer to the newly allocated memory
-   *---------------------------------------------------------------------------**/
+   */
   void* do_allocate(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) override
   {
     // don't allocate anything if the user requested zero bytes
@@ -71,33 +69,28 @@ class pinned_memory_resource final : public host_memory_resource {
     });
   }
 
-  /**---------------------------------------------------------------------------*
-   * @brief Deallocate memory pointed to by `p`.
+  /**
+   * @brief Deallocate memory pointed to by `ptr`.
    *
-   * `p` must have been returned by a prior call to `allocate(bytes,alignment)`
-   * on a `host_memory_resource` that compares equal to `*this`, and the storage
-   * it points to must not yet have been deallocated, otherwise behavior is
-   * undefined.
+   * `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a
+   * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not
+   * yet have been deallocated, otherwise behavior is undefined.
    *
    * @throws Nothing.
    *
-   * @param p Pointer to be deallocated
-   * @param bytes The size in bytes of the allocation. This must be equal to the
-   * value of `bytes` that was passed to the `allocate` call that returned `p`.
-   * @param alignment Alignment of the allocation. This must be equal to the
-   *value of `alignment` that was passed to the `allocate` call that returned
-   *`p`.
-   * @param stream Stream on which to perform deallocation
-   *---------------------------------------------------------------------------**/
-  void do_deallocate(void* p,
+   * @param ptr Pointer to be deallocated
+   * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes`
+   *               that was passed to the `allocate` call that returned `ptr`.
+   * @param alignment Alignment of the allocation. This must be equal to the value of `alignment`
+   *                  that was passed to the `allocate` call that returned `ptr`.
+   */
+  void do_deallocate(void* ptr,
                      std::size_t bytes,
                      std::size_t alignment = alignof(std::max_align_t)) override
   {
-    (void)alignment;
-    if (nullptr == p) { return; }
+    if (nullptr == ptr) { return; }
     detail::aligned_deallocate(
-      p, bytes, alignment, [](void* p) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(p)); });
+      ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); });
   }
 };
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr

From 111a2f434b08ae90bf7e536e7b89035c6f74b32e Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 12:31:45 +1000
Subject: [PATCH 48/72] tidy aligned_mr_tests

---
 tests/mr/device/aligned_mr_tests.cpp | 49 +++++++++++++---------------
 1 file changed, 22 insertions(+), 27 deletions(-)

diff --git a/tests/mr/device/aligned_mr_tests.cpp b/tests/mr/device/aligned_mr_tests.cpp
index a39dcdbfd..f1ed561f4 100644
--- a/tests/mr/device/aligned_mr_tests.cpp
+++ b/tests/mr/device/aligned_mr_tests.cpp
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <rmm/detail/aligned.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/device/aligned_resource_adaptor.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
@@ -40,6 +41,12 @@ class mock_resource : public rmm::mr::device_memory_resource {
 using aligned_mock = rmm::mr::aligned_resource_adaptor<mock_resource>;
 using aligned_real = rmm::mr::aligned_resource_adaptor<rmm::mr::device_memory_resource>;
 
+void* int_to_address(std::size_t val)
+{
+  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast, performance-no-int-to-ptr)
+  return reinterpret_cast<void*>(val);
+}
+
 TEST(AlignedTest, ThrowOnNullUpstream)
 {
   auto construct_nullptr = []() { aligned_mock mr{nullptr}; };
@@ -87,8 +94,8 @@ TEST(AlignedTest, DefaultAllocationAlignmentPassthrough)
   aligned_mock mr{&mock};
 
   cuda_stream_view stream;
-  auto const unaligned_address{123};
-  void* const pointer = reinterpret_cast<void*>(unaligned_address);
+  void* const pointer = int_to_address(123);
+
   // device_memory_resource aligns to 8.
   {
     auto const size{8};
@@ -111,8 +118,7 @@ TEST(AlignedTest, BelowAlignmentThresholdPassthrough)
   aligned_mock mr{&mock, alignment, threshold};
 
   cuda_stream_view stream;
-  auto const unaligned_address1{123};
-  void* const pointer = reinterpret_cast<void*>(unaligned_address1);
+  void* const pointer = int_to_address(123);
   // device_memory_resource aligns to 8.
   {
     auto const size{8};
@@ -127,9 +133,8 @@ TEST(AlignedTest, BelowAlignmentThresholdPassthrough)
   }
 
   {
-    auto const unaligned_address2{456};
     auto const size{65528};
-    void* const pointer1 = reinterpret_cast<void*>(unaligned_address2);
+    void* const pointer1 = int_to_address(456);
     EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer1));
     EXPECT_CALL(mock, do_deallocate(pointer1, size, stream)).Times(1);
     EXPECT_EQ(mr.allocate(size, stream), pointer1);
@@ -145,8 +150,7 @@ TEST(AlignedTest, UpstreamAddressAlreadyAligned)
   aligned_mock mr{&mock, alignment, threshold};
 
   cuda_stream_view stream;
-  auto const aligned_address{4096};
-  void* const pointer = reinterpret_cast<void*>(aligned_address);
+  void* const pointer = int_to_address(4096);
 
   {
     auto const size{69376};
@@ -170,16 +174,14 @@ TEST(AlignedTest, AlignUpstreamAddress)
 
   cuda_stream_view stream;
   {
-    auto const address{256};
-    void* const pointer = reinterpret_cast<void*>(address);
+    void* const pointer = int_to_address(256);
     auto const size{69376};
     EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer));
     EXPECT_CALL(mock, do_deallocate(pointer, size, stream)).Times(1);
   }
 
   {
-    auto const address{4096};
-    void* const expected_pointer = reinterpret_cast<void*>(address);
+    void* const expected_pointer = int_to_address(4096);
     auto const size{65536};
     EXPECT_EQ(mr.allocate(size, stream), expected_pointer);
     mr.deallocate(expected_pointer, size, stream);
@@ -196,12 +198,9 @@ TEST(AlignedTest, AlignMultiple)
   cuda_stream_view stream;
 
   {
-    auto const address1{256};
-    auto const address2{131584};
-    auto const address3{263168};
-    void* const pointer1 = reinterpret_cast<void*>(address1);
-    void* const pointer2 = reinterpret_cast<void*>(address2);
-    void* const pointer3 = reinterpret_cast<void*>(address3);
+    void* const pointer1 = int_to_address(256);
+    void* const pointer2 = int_to_address(131584);
+    void* const pointer3 = int_to_address(263168);
     auto const size1{69376};
     auto const size2{77568};
     auto const size3{81664};
@@ -214,12 +213,9 @@ TEST(AlignedTest, AlignMultiple)
   }
 
   {
-    auto const expected_address1{4096};
-    auto const expected_address2{135168};
-    auto const expected_address3{266240};
-    void* const expected_pointer1 = reinterpret_cast<void*>(expected_address1);
-    void* const expected_pointer2 = reinterpret_cast<void*>(expected_address2);
-    void* const expected_pointer3 = reinterpret_cast<void*>(expected_address3);
+    void* const expected_pointer1 = int_to_address(4096);
+    void* const expected_pointer2 = int_to_address(135168);
+    void* const expected_pointer3 = int_to_address(266240);
     auto const size1{65536};
     auto const size2{73728};
     auto const size3{77800};
@@ -237,9 +233,8 @@ TEST(AlignedTest, AlignRealPointer)
   auto const alignment{4096};
   auto const threshold{65536};
   aligned_real mr{rmm::mr::get_current_device_resource(), alignment, threshold};
-  void* alloc        = mr.allocate(threshold);
-  auto const address = reinterpret_cast<std::size_t>(alloc);
-  EXPECT_TRUE(address % alignment == 0);
+  void* alloc = mr.allocate(threshold);
+  EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc, alignment));
   mr.deallocate(alloc, threshold);
 }
 

From 70b85c29bec915d1d220fb4f08f65d478c80f08a Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 13:10:42 +1000
Subject: [PATCH 49/72] tidy device MR tests

---
 tests/mr/device/mr_multithreaded_tests.cpp    |  2 +-
 tests/mr/device/mr_test.hpp                   | 11 ++-
 .../mr/device/polymorphic_allocator_tests.cpp | 12 ++-
 tests/mr/device/pool_mr_tests.cpp             | 28 +++---
 tests/mr/device/statistics_mr_tests.cpp       | 60 ++++++-----
 .../device/stream_allocator_adaptor_tests.cpp | 25 ++---
 tests/mr/device/thrust_allocator_tests.cu     | 20 ++--
 tests/mr/device/tracking_mr_tests.cpp         | 99 ++++++++++---------
 8 files changed, 139 insertions(+), 118 deletions(-)

diff --git a/tests/mr/device/mr_multithreaded_tests.cpp b/tests/mr/device/mr_multithreaded_tests.cpp
index a80af38fe..4bacb208f 100644
--- a/tests/mr/device/mr_multithreaded_tests.cpp
+++ b/tests/mr/device/mr_multithreaded_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp
index 5609168b8..4bef2b54e 100644
--- a/tests/mr/device/mr_test.hpp
+++ b/tests/mr/device/mr_test.hpp
@@ -100,13 +100,14 @@ inline void test_allocate(rmm::mr::device_memory_resource* mr,
 inline void concurrent_allocations_are_different(rmm::mr::device_memory_resource* mr,
                                                  cuda_stream_view stream)
 {
-  void* p1 = mr->allocate(8_B, stream);
-  void* p2 = mr->allocate(8_B, stream);
+  const auto size{8_B};
+  void* ptr1 = mr->allocate(size, stream);
+  void* ptr2 = mr->allocate(size, stream);
 
-  EXPECT_NE(p1, p2);
+  EXPECT_NE(ptr1, ptr2);
 
-  mr->deallocate(p1, 8_B, stream);
-  mr->deallocate(p2, 8_B, stream);
+  mr->deallocate(ptr1, size, stream);
+  mr->deallocate(ptr2, size, stream);
 }
 
 inline void test_various_allocations(rmm::mr::device_memory_resource* mr, cuda_stream_view stream)
diff --git a/tests/mr/device/polymorphic_allocator_tests.cpp b/tests/mr/device/polymorphic_allocator_tests.cpp
index 319d0ca63..4c8346730 100644
--- a/tests/mr/device/polymorphic_allocator_tests.cpp
+++ b/tests/mr/device/polymorphic_allocator_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -42,7 +42,8 @@ TEST_F(allocator_test, custom_resource)
   EXPECT_EQ(allocator.resource(), &mr);
 }
 
-void test_conversion(rmm::mr::polymorphic_allocator<int>) {}
+void test_conversion(rmm::mr::polymorphic_allocator<int> /*unused*/) {}
+
 TEST_F(allocator_test, implicit_conversion)
 {
   rmm::mr::cuda_memory_resource mr;
@@ -106,9 +107,10 @@ TEST_F(allocator_test, rebind)
 TEST_F(allocator_test, allocate_deallocate)
 {
   rmm::mr::polymorphic_allocator<int> allocator{};
-  auto p = allocator.allocate(1000, stream);
-  EXPECT_NE(p, nullptr);
-  EXPECT_NO_THROW(allocator.deallocate(p, 1000, stream));
+  const auto size{1000};
+  auto* ptr = allocator.allocate(size, stream);
+  EXPECT_NE(ptr, nullptr);
+  EXPECT_NO_THROW(allocator.deallocate(ptr, size, stream));
 }
 
 }  // namespace
diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp
index 7f31412c8..9f2020785 100644
--- a/tests/mr/device/pool_mr_tests.cpp
+++ b/tests/mr/device/pool_mr_tests.cpp
@@ -26,8 +26,7 @@
 
 #include <gtest/gtest.h>
 
-namespace rmm {
-namespace test {
+namespace rmm::test {
 namespace {
 using cuda_mr     = rmm::mr::cuda_memory_resource;
 using pool_mr     = rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>;
@@ -44,7 +43,9 @@ TEST(PoolTest, ThrowMaxLessThanInitial)
   // Make sure first argument is enough larger than the second that alignment rounding doesn't
   // make them equal
   auto max_less_than_initial = []() {
-    pool_mr mr{rmm::mr::get_current_device_resource(), 1024, 256};
+    const auto initial{1024};
+    const auto maximum{256};
+    pool_mr mr{rmm::mr::get_current_device_resource(), initial, maximum};
   };
   EXPECT_THROW(max_less_than_initial(), rmm::logic_error);
 }
@@ -54,8 +55,9 @@ TEST(PoolTest, AllocateNinetyPercent)
   auto allocate_ninety = []() {
     auto const [free, total] = rmm::detail::available_device_memory();
     (void)total;
-    auto const ninety_percent_pool = rmm::detail::align_up(static_cast<std::size_t>(free * 0.9),
-                                                           rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
+    auto const ninety_percent_pool =
+      rmm::detail::align_up(static_cast<std::size_t>(static_cast<double>(free) * 0.9),
+                            rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
     pool_mr mr{rmm::mr::get_current_device_resource(), ninety_percent_pool};
   };
   EXPECT_NO_THROW(allocate_ninety());
@@ -67,10 +69,10 @@ TEST(PoolTest, TwoLargeBuffers)
     auto const [free, total] = rmm::detail::available_device_memory();
     (void)total;
     pool_mr mr{rmm::mr::get_current_device_resource()};
-    auto p1 = mr.allocate(free / 4);
-    auto p2 = mr.allocate(free / 4);
-    mr.deallocate(p1, free / 4);
-    mr.deallocate(p2, free / 4);
+    auto* ptr1 = mr.allocate(free / 4);
+    auto* ptr2 = mr.allocate(free / 4);
+    mr.deallocate(ptr1, free / 4);
+    mr.deallocate(ptr2, free / 4);
   };
   EXPECT_NO_THROW(two_large());
 }
@@ -78,7 +80,8 @@ TEST(PoolTest, TwoLargeBuffers)
 TEST(PoolTest, ForceGrowth)
 {
   cuda_mr cuda;
-  limiting_mr limiter{&cuda, 6000};
+  auto const max_size{6000};
+  limiting_mr limiter{&cuda, max_size};
   pool_mr mr{&limiter, 0};
   EXPECT_NO_THROW(mr.allocate(1000));
   EXPECT_NO_THROW(mr.allocate(4000));
@@ -89,7 +92,7 @@ TEST(PoolTest, ForceGrowth)
 TEST(PoolTest, DeletedStream)
 {
   pool_mr mr{rmm::mr::get_current_device_resource(), 0};
-  cudaStream_t stream;  // we don't use rmm::cuda_stream here to make destruction more explicit
+  cudaStream_t stream{};  // we don't use rmm::cuda_stream here to make destruction more explicit
   const int size = 10000;
   EXPECT_EQ(cudaSuccess, cudaStreamCreate(&stream));
   EXPECT_NO_THROW(rmm::device_buffer buff(size, cuda_stream_view{stream}, &mr));
@@ -124,5 +127,4 @@ TEST(PoolTest, NonAlignedPoolSize)
 }
 
 }  // namespace
-}  // namespace test
-}  // namespace rmm
+}  // namespace rmm::test
diff --git a/tests/mr/device/statistics_mr_tests.cpp b/tests/mr/device/statistics_mr_tests.cpp
index 83464bbe2..59c356b1e 100644
--- a/tests/mr/device/statistics_mr_tests.cpp
+++ b/tests/mr/device/statistics_mr_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,20 +14,24 @@
  * limitations under the License.
  */
 
+#include "../../byte_literals.hpp"
+
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/device_buffer.hpp>
 #include <rmm/mr/device/statistics_resource_adaptor.hpp>
-#include "mr_test.hpp"
 
 #include <gtest/gtest.h>
 
-namespace rmm {
-namespace test {
+namespace rmm::test {
 namespace {
 
 using statistics_adaptor = rmm::mr::statistics_resource_adaptor<rmm::mr::device_memory_resource>;
 
+constexpr auto num_allocations{10};
+constexpr auto num_more_allocations{5};
+constexpr auto ten_MiB{10_MiB};
+
 TEST(StatisticsTest, ThrowOnNullUpstream)
 {
   auto construct_nullptr = []() { statistics_adaptor mr{nullptr}; };
@@ -51,11 +55,13 @@ TEST(StatisticsTest, AllFreed)
 {
   statistics_adaptor mr{rmm::mr::get_current_device_resource()};
   std::vector<void*> allocations;
-  for (int i = 0; i < 10; ++i) {
-    allocations.push_back(mr.allocate(10_MiB));
+
+  allocations.reserve(num_allocations);
+  for (int i = 0; i < num_allocations; ++i) {
+    allocations.push_back(mr.allocate(ten_MiB));
   }
-  for (auto p : allocations) {
-    mr.deallocate(p, 10_MiB);
+  for (auto* alloc : allocations) {
+    mr.deallocate(alloc, ten_MiB);
   }
 
   // Counter values should be 0
@@ -67,12 +73,13 @@ TEST(StatisticsTest, PeakAllocations)
 {
   statistics_adaptor mr{rmm::mr::get_current_device_resource()};
   std::vector<void*> allocations;
-  for (std::size_t i = 0; i < 10; ++i) {
-    allocations.push_back(mr.allocate(10_MiB));
+
+  for (std::size_t i = 0; i < num_allocations; ++i) {
+    allocations.push_back(mr.allocate(ten_MiB));
   }
   // Delete every other allocation
   for (auto&& it = allocations.begin(); it != allocations.end(); ++it) {
-    mr.deallocate(*it, 10_MiB);
+    mr.deallocate(*it, ten_MiB);
     it = allocations.erase(it);
   }
 
@@ -92,13 +99,13 @@ TEST(StatisticsTest, PeakAllocations)
   EXPECT_EQ(current_alloc_counts.total, 10);
 
   // Add 10 more to increase the peak
-  for (std::size_t i = 0; i < 10; ++i) {
-    allocations.push_back(mr.allocate(10_MiB));
+  for (std::size_t i = 0; i < num_allocations; ++i) {
+    allocations.push_back(mr.allocate(ten_MiB));
   }
 
   // Deallocate all remaining
-  for (std::size_t i = 0; i < allocations.size(); ++i) {
-    mr.deallocate(allocations[i], 10_MiB);
+  for (auto& allocation : allocations) {
+    mr.deallocate(allocation, ten_MiB);
   }
   allocations.clear();
 
@@ -124,9 +131,9 @@ TEST(StatisticsTest, MultiTracking)
   rmm::mr::set_current_device_resource(&mr);
 
   std::vector<std::shared_ptr<rmm::device_buffer>> allocations;
-  for (std::size_t i = 0; i < 10; ++i) {
+  for (std::size_t i = 0; i < num_allocations; ++i) {
     allocations.emplace_back(
-      std::make_shared<rmm::device_buffer>(10_MiB, rmm::cuda_stream_default));
+      std::make_shared<rmm::device_buffer>(ten_MiB, rmm::cuda_stream_default));
   }
 
   EXPECT_EQ(mr.get_allocations_counter().value, 10);
@@ -134,9 +141,9 @@ TEST(StatisticsTest, MultiTracking)
   statistics_adaptor inner_mr{rmm::mr::get_current_device_resource()};
   rmm::mr::set_current_device_resource(&inner_mr);
 
-  for (std::size_t i = 0; i < 5; ++i) {
+  for (std::size_t i = 0; i < num_more_allocations; ++i) {
     allocations.emplace_back(
-      std::make_shared<rmm::device_buffer>(10_MiB, rmm::cuda_stream_default));
+      std::make_shared<rmm::device_buffer>(ten_MiB, rmm::cuda_stream_default));
   }
 
   // Check the allocated bytes for both MRs
@@ -174,8 +181,8 @@ TEST(StatisticsTest, NegativeInnerTracking)
   // memory pointer
   statistics_adaptor mr{rmm::mr::get_current_device_resource()};
   std::vector<void*> allocations;
-  for (std::size_t i = 0; i < 10; ++i) {
-    allocations.push_back(mr.allocate(10_MiB));
+  for (std::size_t i = 0; i < num_allocations; ++i) {
+    allocations.push_back(mr.allocate(ten_MiB));
   }
 
   EXPECT_EQ(mr.get_allocations_counter().value, 10);
@@ -183,8 +190,8 @@ TEST(StatisticsTest, NegativeInnerTracking)
   statistics_adaptor inner_mr{&mr};
 
   // Add more allocations
-  for (std::size_t i = 0; i < 5; ++i) {
-    allocations.push_back(inner_mr.allocate(10_MiB));
+  for (std::size_t i = 0; i < num_more_allocations; ++i) {
+    allocations.push_back(inner_mr.allocate(ten_MiB));
   }
 
   // Check the outstanding allocations
@@ -199,8 +206,8 @@ TEST(StatisticsTest, NegativeInnerTracking)
   EXPECT_EQ(inner_mr.get_allocations_counter().value, 5);
 
   // Deallocate all allocations using the inner_mr
-  for (std::size_t i = 0; i < allocations.size(); ++i) {
-    inner_mr.deallocate(allocations[i], 10_MiB);
+  for (auto& allocation : allocations) {
+    inner_mr.deallocate(allocation, ten_MiB);
   }
   allocations.clear();
 
@@ -227,5 +234,4 @@ TEST(StatisticsTest, NegativeInnerTracking)
 }
 
 }  // namespace
-}  // namespace test
-}  // namespace rmm
+}  // namespace rmm::test
diff --git a/tests/mr/device/stream_allocator_adaptor_tests.cpp b/tests/mr/device/stream_allocator_adaptor_tests.cpp
index 616b899f1..669fca5f5 100644
--- a/tests/mr/device/stream_allocator_adaptor_tests.cpp
+++ b/tests/mr/device/stream_allocator_adaptor_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,15 +14,16 @@
  * limitations under the License.
  */
 
-#include <memory>
-
-#include <gtest/gtest.h>
 #include <rmm/cuda_stream.hpp>
 #include <rmm/mr/device/cuda_memory_resource.hpp>
 #include <rmm/mr/device/managed_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/mr/device/polymorphic_allocator.hpp>
 
+#include <gtest/gtest.h>
+
+#include <memory>
+
 namespace {
 
 struct allocator_test : public ::testing::Test {
@@ -34,7 +35,7 @@ TEST_F(allocator_test, factory)
 {
   using Adaptor = rmm::mr::stream_allocator_adaptor<decltype(allocator)>;
   auto adapted  = rmm::mr::make_stream_allocator_adaptor(allocator, stream);
-  static_assert((std::is_same<decltype(adapted), Adaptor>::value), "");
+  static_assert((std::is_same<decltype(adapted), Adaptor>::value));
   EXPECT_EQ(adapted.underlying_allocator(), allocator);
   EXPECT_EQ(adapted.stream(), stream);
 }
@@ -97,21 +98,21 @@ TEST_F(allocator_test, rebind)
 {
   auto adapted  = rmm::mr::make_stream_allocator_adaptor(allocator, stream);
   using Rebound = std::allocator_traits<decltype(adapted)>::rebind_alloc<double>;
-  static_assert((std::is_same<std::allocator_traits<Rebound>::value_type, double>::value), "");
+  static_assert((std::is_same<std::allocator_traits<Rebound>::value_type, double>::value));
   static_assert(
     std::is_same<Rebound,
-                 rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<double>>>::value,
-    "");
+                 rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<double>>>::value);
 
-  Rebound r{adapted};
+  Rebound rebound{adapted};
 }
 
 TEST_F(allocator_test, allocate_deallocate)
 {
   auto adapted = rmm::mr::make_stream_allocator_adaptor(allocator, stream);
-  auto p       = adapted.allocate(1000);
-  EXPECT_NE(p, nullptr);
-  EXPECT_NO_THROW(adapted.deallocate(p, 1000));
+  auto const size{1000};
+  auto* ptr = adapted.allocate(size);
+  EXPECT_NE(ptr, nullptr);
+  EXPECT_NO_THROW(adapted.deallocate(ptr, size));
 }
 
 }  // namespace
diff --git a/tests/mr/device/thrust_allocator_tests.cu b/tests/mr/device/thrust_allocator_tests.cu
index 0092f21ba..eabdfe143 100644
--- a/tests/mr/device/thrust_allocator_tests.cu
+++ b/tests/mr/device/thrust_allocator_tests.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,13 +14,14 @@
  * limitations under the License.
  */
 
-#include <gtest/gtest.h>
+#include "mr_test.hpp"
+
 #include <rmm/device_vector.hpp>
 #include <rmm/mr/device/thrust_allocator_adaptor.hpp>
-#include "mr_test.hpp"
 
-namespace rmm {
-namespace test {
+#include <gtest/gtest.h>
+
+namespace rmm::test {
 namespace {
 
 struct allocator_test : public mr_test {
@@ -28,8 +29,9 @@ struct allocator_test : public mr_test {
 
 TEST_P(allocator_test, first)
 {
-  rmm::device_vector<int> ints(100, 1);
-  EXPECT_EQ(100, thrust::reduce(ints.begin(), ints.end()));
+  auto const num_ints{100};
+  rmm::device_vector<int> ints(num_ints, 1);
+  EXPECT_EQ(num_ints, thrust::reduce(ints.begin(), ints.end()));
 }
 
 INSTANTIATE_TEST_CASE_P(ThrustAllocatorTests,
@@ -39,6 +41,6 @@ INSTANTIATE_TEST_CASE_P(ThrustAllocatorTests,
                                           mr_factory{"Pool", &make_pool},
                                           mr_factory{"Binning", &make_binning}),
                         [](auto const& info) { return info.param.name; });
+
 }  // namespace
-}  // namespace test
-}  // namespace rmm
+}  // namespace rmm::test
diff --git a/tests/mr/device/tracking_mr_tests.cpp b/tests/mr/device/tracking_mr_tests.cpp
index 8dc666f0c..5926cbf62 100644
--- a/tests/mr/device/tracking_mr_tests.cpp
+++ b/tests/mr/device/tracking_mr_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,20 +14,24 @@
  * limitations under the License.
  */
 
+#include "../../byte_literals.hpp"
+
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/device_buffer.hpp>
 #include <rmm/mr/device/tracking_resource_adaptor.hpp>
-#include "mr_test.hpp"
 
 #include <gtest/gtest.h>
 
-namespace rmm {
-namespace test {
+namespace rmm::test {
 namespace {
 
 using tracking_adaptor = rmm::mr::tracking_resource_adaptor<rmm::mr::device_memory_resource>;
 
+constexpr auto num_allocations{10};
+constexpr auto num_more_allocations{5};
+constexpr auto ten_MiB{10_MiB};
+
 TEST(TrackingTest, ThrowOnNullUpstream)
 {
   auto construct_nullptr = []() { tracking_adaptor mr{nullptr}; };
@@ -45,11 +49,12 @@ TEST(TrackingTest, AllFreed)
 {
   tracking_adaptor mr{rmm::mr::get_current_device_resource()};
   std::vector<void*> allocations;
-  for (int i = 0; i < 10; ++i) {
-    allocations.push_back(mr.allocate(10_MiB));
+  allocations.reserve(num_allocations);
+  for (int i = 0; i < num_allocations; ++i) {
+    allocations.push_back(mr.allocate(ten_MiB));
   }
-  for (auto p : allocations) {
-    mr.deallocate(p, 10_MiB);
+  for (auto* alloc : allocations) {
+    mr.deallocate(alloc, ten_MiB);
   }
   EXPECT_EQ(mr.get_outstanding_allocations().size(), 0);
   EXPECT_EQ(mr.get_allocated_bytes(), 0);
@@ -59,16 +64,17 @@ TEST(TrackingTest, AllocationsLeftWithStacks)
 {
   tracking_adaptor mr{rmm::mr::get_current_device_resource(), true};
   std::vector<void*> allocations;
-  for (int i = 0; i < 10; ++i) {
-    allocations.push_back(mr.allocate(10_MiB));
+  allocations.reserve(num_allocations);
+  for (int i = 0; i < num_allocations; ++i) {
+    allocations.push_back(mr.allocate(ten_MiB));
   }
-  for (int i = 0; i < 10; i += 2) {
-    mr.deallocate(allocations[i], 10_MiB);
+  for (int i = 0; i < num_allocations; i += 2) {
+    mr.deallocate(allocations[i], ten_MiB);
   }
-  EXPECT_EQ(mr.get_outstanding_allocations().size(), 5);
-  EXPECT_EQ(mr.get_allocated_bytes(), 50_MiB);
+  EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations / 2);
+  EXPECT_EQ(mr.get_allocated_bytes(), ten_MiB * (num_allocations / 2));
   auto const& outstanding_allocations = mr.get_outstanding_allocations();
-  EXPECT_EQ(outstanding_allocations.size(), 5);
+  EXPECT_EQ(outstanding_allocations.size(), num_allocations / 2);
   EXPECT_NE(outstanding_allocations.begin()->second.strace, nullptr);
 }
 
@@ -76,16 +82,18 @@ TEST(TrackingTest, AllocationsLeftWithoutStacks)
 {
   tracking_adaptor mr{rmm::mr::get_current_device_resource()};
   std::vector<void*> allocations;
-  for (int i = 0; i < 10; ++i) {
-    allocations.push_back(mr.allocate(10_MiB));
+  allocations.reserve(num_allocations);
+  for (int i = 0; i < num_allocations; ++i) {
+    allocations.push_back(mr.allocate(ten_MiB));
   }
-  for (int i = 0; i < 10; i += 2) {
-    mr.deallocate(allocations[i], 10_MiB);
+
+  for (int i = 0; i < num_allocations; i += 2) {
+    mr.deallocate(allocations[i], ten_MiB);
   }
-  EXPECT_EQ(mr.get_outstanding_allocations().size(), 5);
-  EXPECT_EQ(mr.get_allocated_bytes(), 50_MiB);
+  EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations / 2);
+  EXPECT_EQ(mr.get_allocated_bytes(), ten_MiB * (num_allocations / 2));
   auto const& outstanding_allocations = mr.get_outstanding_allocations();
-  EXPECT_EQ(outstanding_allocations.size(), 5);
+  EXPECT_EQ(outstanding_allocations.size(), num_allocations / 2);
   EXPECT_EQ(outstanding_allocations.begin()->second.strace, nullptr);
 }
 
@@ -95,27 +103,27 @@ TEST(TrackingTest, MultiTracking)
   rmm::mr::set_current_device_resource(&mr);
 
   std::vector<std::shared_ptr<rmm::device_buffer>> allocations;
-  for (std::size_t i = 0; i < 10; ++i) {
+  for (std::size_t i = 0; i < num_allocations; ++i) {
     allocations.emplace_back(
-      std::make_shared<rmm::device_buffer>(10_MiB, rmm::cuda_stream_default));
+      std::make_shared<rmm::device_buffer>(ten_MiB, rmm::cuda_stream_default));
   }
 
-  EXPECT_EQ(mr.get_outstanding_allocations().size(), 10);
+  EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations);
 
   tracking_adaptor inner_mr{rmm::mr::get_current_device_resource()};
   rmm::mr::set_current_device_resource(&inner_mr);
 
-  for (std::size_t i = 0; i < 5; ++i) {
+  for (std::size_t i = 0; i < num_more_allocations; ++i) {
     allocations.emplace_back(
-      std::make_shared<rmm::device_buffer>(10_MiB, rmm::cuda_stream_default));
+      std::make_shared<rmm::device_buffer>(ten_MiB, rmm::cuda_stream_default));
   }
 
   // Check the allocated bytes for both MRs
-  EXPECT_EQ(mr.get_outstanding_allocations().size(), 15);
-  EXPECT_EQ(inner_mr.get_outstanding_allocations().size(), 5);
+  EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations + num_more_allocations);
+  EXPECT_EQ(inner_mr.get_outstanding_allocations().size(), num_more_allocations);
 
-  EXPECT_EQ(mr.get_allocated_bytes(), 150_MiB);
-  EXPECT_EQ(inner_mr.get_allocated_bytes(), 50_MiB);
+  EXPECT_EQ(mr.get_allocated_bytes(), ten_MiB * (num_allocations + num_more_allocations));
+  EXPECT_EQ(inner_mr.get_allocated_bytes(), ten_MiB * num_more_allocations);
 
   EXPECT_GT(mr.get_outstanding_allocations_str().size(), 0);
 
@@ -140,26 +148,26 @@ TEST(TrackingTest, NegativeInnerTracking)
   // memory pointer
   tracking_adaptor mr{rmm::mr::get_current_device_resource()};
   std::vector<void*> allocations;
-  for (std::size_t i = 0; i < 10; ++i) {
-    allocations.push_back(mr.allocate(10_MiB));
+  for (std::size_t i = 0; i < num_allocations; ++i) {
+    allocations.push_back(mr.allocate(ten_MiB));
   }
 
-  EXPECT_EQ(mr.get_outstanding_allocations().size(), 10);
+  EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations);
 
   tracking_adaptor inner_mr{&mr};
 
   // Add more allocations
-  for (std::size_t i = 0; i < 5; ++i) {
-    allocations.push_back(inner_mr.allocate(10_MiB));
+  for (std::size_t i = 0; i < num_more_allocations; ++i) {
+    allocations.push_back(inner_mr.allocate(ten_MiB));
   }
 
   // Check the outstanding allocations
-  EXPECT_EQ(mr.get_outstanding_allocations().size(), 15);
-  EXPECT_EQ(inner_mr.get_outstanding_allocations().size(), 5);
+  EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations + num_more_allocations);
+  EXPECT_EQ(inner_mr.get_outstanding_allocations().size(), num_more_allocations);
 
   // Deallocate all allocations using the inner_mr
-  for (std::size_t i = 0; i < allocations.size(); ++i) {
-    inner_mr.deallocate(allocations[i], 10_MiB);
+  for (auto& allocation : allocations) {
+    inner_mr.deallocate(allocation, ten_MiB);
   }
   allocations.clear();
 
@@ -172,13 +180,13 @@ TEST(TrackingTest, DeallocWrongBytes)
 {
   tracking_adaptor mr{rmm::mr::get_current_device_resource()};
   std::vector<void*> allocations;
-  for (std::size_t i = 0; i < 10; ++i) {
-    allocations.push_back(mr.allocate(10_MiB));
+  for (std::size_t i = 0; i < num_allocations; ++i) {
+    allocations.push_back(mr.allocate(ten_MiB));
   }
 
   // When deallocating, pass the wrong bytes to deallocate
-  for (std::size_t i = 0; i < allocations.size(); ++i) {
-    mr.deallocate(allocations[i], 5_MiB);
+  for (auto& allocation : allocations) {
+    mr.deallocate(allocation, ten_MiB / 2);
   }
   allocations.clear();
 
@@ -190,5 +198,4 @@ TEST(TrackingTest, DeallocWrongBytes)
 }
 
 }  // namespace
-}  // namespace test
-}  // namespace rmm
+}  // namespace rmm::test

From 7ac76e6c79c8e8a418bb391f7c774e60ddd8ce6f Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 13:28:35 +1000
Subject: [PATCH 50/72] tidy host mr tests

---
 tests/mr/host/mr_tests.cpp | 151 ++++++++++++++++++-------------------
 1 file changed, 72 insertions(+), 79 deletions(-)

diff --git a/tests/mr/host/mr_tests.cpp b/tests/mr/host/mr_tests.cpp
index 442a70ca0..24f52a88a 100644
--- a/tests/mr/host/mr_tests.cpp
+++ b/tests/mr/host/mr_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,36 +14,33 @@
  * limitations under the License.
  */
 
-#include <gtest/gtest.h>
+#include "../../byte_literals.hpp"
 
+#include <rmm/detail/aligned.hpp>
 #include <rmm/mr/host/host_memory_resource.hpp>
 #include <rmm/mr/host/new_delete_resource.hpp>
 #include <rmm/mr/host/pinned_memory_resource.hpp>
 
 #include <cuda_runtime_api.h>
+
+#include <gtest/gtest.h>
+
 #include <cstddef>
 #include <deque>
 #include <random>
 
+namespace rmm::test {
 namespace {
-inline bool is_aligned(void* p, std::size_t alignment = alignof(std::max_align_t))
-{
-  return (0 == reinterpret_cast<uintptr_t>(p) % alignment);
-}
-
-inline void expect_aligned(void* p, std::size_t alignment)
+inline bool is_aligned(void* ptr, std::size_t alignment = alignof(std::max_align_t))
 {
-  EXPECT_EQ(0, reinterpret_cast<uintptr_t>(p) % alignment);
+  return rmm::detail::is_pointer_aligned(ptr, alignment);
 }
 
-/**---------------------------------------------------------------------------*
- * @brief Returns if a pointer points to a device memory or managed memory
- * allocation.
- *---------------------------------------------------------------------------**/
-inline bool is_device_memory(void* p)
+// Returns true if a pointer points to a device memory or managed memory allocation.
+inline bool is_device_memory(void* ptr)
 {
   cudaPointerAttributes attributes{};
-  if (cudaSuccess != cudaPointerGetAttributes(&attributes, p)) { return false; }
+  if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; }
 #if CUDART_VERSION < 10000  // memoryType is deprecated in CUDA 10
   return attributes.memoryType == cudaMemoryTypeDevice;
 #else
@@ -54,24 +51,23 @@ inline bool is_device_memory(void* p)
 /**
  * @brief Returns if a pointer `p` points to pinned host memory.
  */
-inline bool is_pinned_memory(void* p)
+inline bool is_pinned_memory(void* ptr)
 {
   cudaPointerAttributes attributes{};
-  if (cudaSuccess != cudaPointerGetAttributes(&attributes, p)) { return false; }
+  if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; }
   return attributes.type == cudaMemoryTypeHost;
 }
 
-static constexpr std::size_t size_word{4};
-static constexpr std::size_t size_kb{std::size_t{1} << 10};
-static constexpr std::size_t size_mb{std::size_t{1} << 20};
-static constexpr std::size_t size_gb{std::size_t{1} << 30};
-static constexpr std::size_t size_tb{std::size_t{1} << 40};
-static constexpr std::size_t size_pb{std::size_t{1} << 50};
+constexpr std::size_t size_word{4_B};
+constexpr std::size_t size_kb{1_KiB};
+constexpr std::size_t size_mb{1_MiB};
+constexpr std::size_t size_gb{1_GiB};
+constexpr std::size_t size_pb{1_PiB};
 
 struct allocation {
-  void* p{nullptr};
+  void* ptr{nullptr};
   std::size_t size{0};
-  allocation(void* _p, std::size_t _size) : p{_p}, size{_size} {}
+  allocation(void* ptr, std::size_t size) : ptr{ptr}, size{size} {}
   allocation() = default;
 };
 }  // namespace
@@ -81,7 +77,6 @@ struct MRTest : public ::testing::Test {
   std::unique_ptr<rmm::mr::host_memory_resource> mr;
 
   MRTest() : mr{new MemoryResourceType} {}
-  ~MRTest() = default;
 };
 
 using resources = ::testing::Types<rmm::mr::new_delete_resource, rmm::mr::pinned_memory_resource>;
@@ -92,56 +87,56 @@ TYPED_TEST(MRTest, SelfEquality) { EXPECT_TRUE(this->mr->is_equal(*this->mr)); }
 
 TYPED_TEST(MRTest, AllocateZeroBytes)
 {
-  void* p{nullptr};
-  EXPECT_NO_THROW(p = this->mr->allocate(0));
-  EXPECT_NO_THROW(this->mr->deallocate(p, 0));
+  void* ptr{nullptr};
+  EXPECT_NO_THROW(ptr = this->mr->allocate(0));
+  EXPECT_NO_THROW(this->mr->deallocate(ptr, 0));
 }
 
 TYPED_TEST(MRTest, AllocateWord)
 {
-  void* p{nullptr};
-  EXPECT_NO_THROW(p = this->mr->allocate(size_word));
-  EXPECT_NE(nullptr, p);
-  EXPECT_TRUE(is_aligned(p));
-  EXPECT_FALSE(is_device_memory(p));
-  EXPECT_NO_THROW(this->mr->deallocate(p, size_word));
+  void* ptr{nullptr};
+  EXPECT_NO_THROW(ptr = this->mr->allocate(size_word));
+  EXPECT_NE(nullptr, ptr);
+  EXPECT_TRUE(is_aligned(ptr));
+  EXPECT_FALSE(is_device_memory(ptr));
+  EXPECT_NO_THROW(this->mr->deallocate(ptr, size_word));
 }
 
 TYPED_TEST(MRTest, AllocateKB)
 {
-  void* p{nullptr};
-  EXPECT_NO_THROW(p = this->mr->allocate(size_kb));
-  EXPECT_NE(nullptr, p);
-  EXPECT_TRUE(is_aligned(p));
-  EXPECT_FALSE(is_device_memory(p));
-  EXPECT_NO_THROW(this->mr->deallocate(p, size_kb));
+  void* ptr{nullptr};
+  EXPECT_NO_THROW(ptr = this->mr->allocate(size_kb));
+  EXPECT_NE(nullptr, ptr);
+  EXPECT_TRUE(is_aligned(ptr));
+  EXPECT_FALSE(is_device_memory(ptr));
+  EXPECT_NO_THROW(this->mr->deallocate(ptr, size_kb));
 }
 
 TYPED_TEST(MRTest, AllocateMB)
 {
-  void* p{nullptr};
-  EXPECT_NO_THROW(p = this->mr->allocate(size_mb));
-  EXPECT_NE(nullptr, p);
-  EXPECT_TRUE(is_aligned(p));
-  EXPECT_FALSE(is_device_memory(p));
-  EXPECT_NO_THROW(this->mr->deallocate(p, size_mb));
+  void* ptr{nullptr};
+  EXPECT_NO_THROW(ptr = this->mr->allocate(size_mb));
+  EXPECT_NE(nullptr, ptr);
+  EXPECT_TRUE(is_aligned(ptr));
+  EXPECT_FALSE(is_device_memory(ptr));
+  EXPECT_NO_THROW(this->mr->deallocate(ptr, size_mb));
 }
 
 TYPED_TEST(MRTest, AllocateGB)
 {
-  void* p{nullptr};
-  EXPECT_NO_THROW(p = this->mr->allocate(size_gb));
-  EXPECT_NE(nullptr, p);
-  EXPECT_TRUE(is_aligned(p));
-  EXPECT_FALSE(is_device_memory(p));
-  EXPECT_NO_THROW(this->mr->deallocate(p, size_gb));
+  void* ptr{nullptr};
+  EXPECT_NO_THROW(ptr = this->mr->allocate(size_gb));
+  EXPECT_NE(nullptr, ptr);
+  EXPECT_TRUE(is_aligned(ptr));
+  EXPECT_FALSE(is_device_memory(ptr));
+  EXPECT_NO_THROW(this->mr->deallocate(ptr, size_gb));
 }
 
 TYPED_TEST(MRTest, AllocateTooMuch)
 {
-  void* p{nullptr};
-  EXPECT_THROW(p = this->mr->allocate(size_pb), std::bad_alloc);
-  EXPECT_EQ(nullptr, p);
+  void* ptr{nullptr};
+  EXPECT_THROW(ptr = this->mr->allocate(size_pb), std::bad_alloc);
+  EXPECT_EQ(nullptr, ptr);
 }
 
 TYPED_TEST(MRTest, RandomAllocations)
@@ -156,16 +151,16 @@ TYPED_TEST(MRTest, RandomAllocations)
 
   // 100 allocations from [0,5MB)
   std::for_each(
-    allocations.begin(), allocations.end(), [&generator, &distribution, this](allocation& a) {
-      a.size = distribution(generator);
-      EXPECT_NO_THROW(a.p = this->mr->allocate(a.size));
-      EXPECT_NE(nullptr, a.p);
-      EXPECT_TRUE(is_aligned(a.p));
+    allocations.begin(), allocations.end(), [&generator, &distribution, this](allocation& alloc) {
+      alloc.size = distribution(generator);
+      EXPECT_NO_THROW(alloc.ptr = this->mr->allocate(alloc.size));
+      EXPECT_NE(nullptr, alloc.ptr);
+      EXPECT_TRUE(is_aligned(alloc.ptr));
     });
 
   std::for_each(
-    allocations.begin(), allocations.end(), [generator, distribution, this](allocation& a) {
-      EXPECT_NO_THROW(this->mr->deallocate(a.p, a.size));
+    allocations.begin(), allocations.end(), [generator, distribution, this](allocation& alloc) {
+      EXPECT_NO_THROW(this->mr->deallocate(alloc.ptr, alloc.size));
     });
 }
 
@@ -189,30 +184,27 @@ TYPED_TEST(MRTest, MixedRandomAllocationFree)
     std::size_t allocation_size = size_distribution(generator);
     EXPECT_NO_THROW(allocations.emplace_back(this->mr->allocate(allocation_size), allocation_size));
     auto new_allocation = allocations.back();
-    EXPECT_NE(nullptr, new_allocation.p);
-    EXPECT_TRUE(is_aligned(new_allocation.p));
+    EXPECT_NE(nullptr, new_allocation.ptr);
+    EXPECT_TRUE(is_aligned(new_allocation.ptr));
 
     bool const free_front{free_distribution(generator) == free_distribution.max()};
 
     if (free_front) {
       auto front = allocations.front();
-      EXPECT_NO_THROW(this->mr->deallocate(front.p, front.size));
+      EXPECT_NO_THROW(this->mr->deallocate(front.ptr, front.size));
       allocations.pop_front();
     }
   }
   // free any remaining allocations
-  for (auto a : allocations) {
-    EXPECT_NO_THROW(this->mr->deallocate(a.p, a.size));
+  for (auto alloc : allocations) {
+    EXPECT_NO_THROW(this->mr->deallocate(alloc.ptr, alloc.size));
     allocations.pop_front();
   }
 }
 
-static constexpr std::size_t MinTestedSize             = 32;
-static constexpr std::size_t MaxTestedSize             = 8 * 1024;
-static constexpr std::size_t TestedSizeStep            = 1;
-static constexpr std::size_t MinTestedAlignment        = 16;
-static constexpr std::size_t MaxTestedAlignment        = 4 * 1024;
-static constexpr std::size_t TestedAlignmentMultiplier = 2;
+static constexpr std::size_t MinTestedAlignment{16};
+static constexpr std::size_t MaxTestedAlignment{4096};
+static constexpr std::size_t TestedAlignmentMultiplier{2};
 static constexpr std::size_t NUM_TRIALS{100};
 
 TYPED_TEST(MRTest, AlignmentTest)
@@ -248,7 +240,7 @@ TYPED_TEST(MRTest, UnsupportedAlignmentTest)
       // alignment of `alignof(std::max_align_t)`
       auto const bad_alignment = alignment + 1;
       EXPECT_NO_THROW(ptr = this->mr->allocate(allocation_size, bad_alignment));
-      expect_aligned(ptr, alignof(std::max_align_t));
+      EXPECT_TRUE(is_aligned(ptr, alignof(std::max_align_t)));
       EXPECT_NO_THROW(this->mr->deallocate(ptr, allocation_size, bad_alignment));
     }
   }
@@ -257,8 +249,9 @@ TYPED_TEST(MRTest, UnsupportedAlignmentTest)
 TEST(PinnedResource, isPinned)
 {
   rmm::mr::pinned_memory_resource mr;
-  void* p{nullptr};
-  EXPECT_NO_THROW(p = mr.allocate(100));
-  EXPECT_TRUE(is_pinned_memory(p));
-  EXPECT_NO_THROW(mr.deallocate(p, 100));
+  void* ptr{nullptr};
+  EXPECT_NO_THROW(ptr = mr.allocate(100));
+  EXPECT_TRUE(is_pinned_memory(ptr));
+  EXPECT_NO_THROW(mr.deallocate(ptr, 100));
 }
+}  // namespace rmm::test

From 9dcbae6a653f821af22b41e1e1d69df30774c6da Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 13:30:06 +1000
Subject: [PATCH 51/72] copyright

---
 tests/cuda_stream_pool_tests.cpp | 2 +-
 tests/device_uvector_tests.cpp   | 2 +-
 tests/logger_tests.cpp           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/cuda_stream_pool_tests.cpp b/tests/cuda_stream_pool_tests.cpp
index 3f1919600..4fddb2da6 100644
--- a/tests/cuda_stream_pool_tests.cpp
+++ b/tests/cuda_stream_pool_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/tests/device_uvector_tests.cpp b/tests/device_uvector_tests.cpp
index b3c06885b..09aa7e527 100644
--- a/tests/device_uvector_tests.cpp
+++ b/tests/device_uvector_tests.cpp
@@ -1,6 +1,6 @@
 
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/tests/logger_tests.cpp b/tests/logger_tests.cpp
index 498a96722..e0663e84b 100644
--- a/tests/logger_tests.cpp
+++ b/tests/logger_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

From 1cfa4933715f58bf5a595ce6aec3c5dda200bf7d Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 16:25:44 +1000
Subject: [PATCH 52/72] tidy benchmarks and more

---
 .../cuda_stream_pool_bench.cpp                |  22 ++--
 .../device_uvector/device_uvector_bench.cu    |  30 +++---
 .../multi_stream_allocations_bench.cu         |  29 ++---
 .../random_allocations/random_allocations.cpp |   8 +-
 benchmarks/replay/replay.cpp                  | 102 ++++++++++--------
 .../synchronization/synchronization.cpp       |   5 +-
 .../synchronization/synchronization.hpp       |  14 ++-
 benchmarks/utilities/log_parser.hpp           |  86 +++++++++------
 .../utilities/simulated_memory_resource.hpp   |  43 ++++----
 tests/cuda_stream_tests.cpp                   |   1 +
 tests/device_buffer_tests.cu                  |  31 ++++--
 tests/mr/device/mr_multithreaded_tests.cpp    |  59 +++++-----
 tests/mr/host/mr_tests.cpp                    |   7 +-
 13 files changed, 248 insertions(+), 189 deletions(-)

diff --git a/benchmarks/cuda_stream_pool/cuda_stream_pool_bench.cpp b/benchmarks/cuda_stream_pool/cuda_stream_pool_bench.cpp
index a536077f9..6710ffe50 100644
--- a/benchmarks/cuda_stream_pool/cuda_stream_pool_bench.cpp
+++ b/benchmarks/cuda_stream_pool/cuda_stream_pool_bench.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,36 +14,36 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
-
 #include <rmm/cuda_stream_pool.hpp>
 #include <rmm/detail/error.hpp>
 
 #include <cuda_runtime_api.h>
 
+#include <benchmark/benchmark.h>
+
 #include <stdexcept>
 
 static void BM_StreamPoolGetStream(benchmark::State& state)
 {
   rmm::cuda_stream_pool stream_pool{};
 
-  for (auto _ : state) {
-    auto s = stream_pool.get_stream();
-    cudaStreamQuery(s.value());
+  for (auto _ : state) {  // NOLINT(clang-analyzer-deadcode.DeadStores)
+    auto stream = stream_pool.get_stream();
+    cudaStreamQuery(stream.value());
   }
 
-  state.SetItemsProcessed(state.iterations());
+  state.SetItemsProcessed(static_cast<int64_t>(state.iterations()));
 }
 BENCHMARK(BM_StreamPoolGetStream)->Unit(benchmark::kMicrosecond);
 
 static void BM_CudaStreamClass(benchmark::State& state)
 {
-  for (auto _ : state) {
-    auto s = rmm::cuda_stream{};
-    cudaStreamQuery(s.view().value());
+  for (auto _ : state) {  // NOLINT(clang-analyzer-deadcode.DeadStores)
+    auto stream = rmm::cuda_stream{};
+    cudaStreamQuery(stream.view().value());
   }
 
-  state.SetItemsProcessed(state.iterations());
+  state.SetItemsProcessed(static_cast<int64_t>(state.iterations()));
 }
 BENCHMARK(BM_CudaStreamClass)->Unit(benchmark::kMicrosecond);
 
diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu
index 01d81c55d..6665ccaa8 100644
--- a/benchmarks/device_uvector/device_uvector_bench.cu
+++ b/benchmarks/device_uvector/device_uvector_bench.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,33 +14,39 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
-
-#include <cuda_runtime_api.h>
 #include <rmm/device_uvector.hpp>
 #include <rmm/device_vector.hpp>
 #include <rmm/mr/device/cuda_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/mr/device/pool_memory_resource.hpp>
 
+#include <benchmark/benchmark.h>
+
+#include <cuda_runtime_api.h>
+
 static void BM_UvectorSizeConstruction(benchmark::State& state)
 {
   rmm::mr::cuda_memory_resource cuda_mr{};
   rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{&cuda_mr};
   rmm::mr::set_current_device_resource(&mr);
 
-  for (auto _ : state) {
+  for (auto _ : state) {  // NOLINT(clang-analyzer-deadcode.DeadStores)
     rmm::device_uvector<int32_t> vec(state.range(0), rmm::cuda_stream_view{});
     cudaDeviceSynchronize();
   }
 
-  state.SetItemsProcessed(state.iterations());
+  state.SetItemsProcessed(static_cast<int64_t>(state.iterations()));
 
   rmm::mr::set_current_device_resource(nullptr);
 }
+
+const auto range_multiplier{10};
+const auto range_start{10'000};
+const auto range_end{1'000'000'000};
+
 BENCHMARK(BM_UvectorSizeConstruction)
-  ->RangeMultiplier(10)
-  ->Range(10'000, 1'000'000'000)
+  ->RangeMultiplier(range_multiplier)
+  ->Range(range_start, range_end)
   ->Unit(benchmark::kMicrosecond);
 
 static void BM_ThrustVectorSizeConstruction(benchmark::State& state)
@@ -49,19 +55,19 @@ static void BM_ThrustVectorSizeConstruction(benchmark::State& state)
   rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{&cuda_mr};
   rmm::mr::set_current_device_resource(&mr);
 
-  for (auto _ : state) {
+  for (auto _ : state) {  // NOLINT(clang-analyzer-deadcode.DeadStores)
     rmm::device_vector<int32_t> vec(state.range(0));
     cudaDeviceSynchronize();
   }
 
-  state.SetItemsProcessed(state.iterations());
+  state.SetItemsProcessed(static_cast<int64_t>(state.iterations()));
 
   rmm::mr::set_current_device_resource(nullptr);
 }
 
 BENCHMARK(BM_ThrustVectorSizeConstruction)
-  ->RangeMultiplier(10)
-  ->Range(10'000, 1'000'000'000)
+  ->RangeMultiplier(range_multiplier)
+  ->Range(range_start, range_end)
   ->Unit(benchmark::kMicrosecond);
 
 BENCHMARK_MAIN();
diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
index 7d0a8a17a..9b6210a2d 100644
--- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
+++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
@@ -16,8 +16,6 @@
 
 #include <benchmarks/utilities/cxxopts.hpp>
 
-#include <benchmark/benchmark.h>
-
 #include <rmm/cuda_stream.hpp>
 #include <rmm/cuda_stream_pool.hpp>
 #include <rmm/device_uvector.hpp>
@@ -31,15 +29,18 @@
 
 #include <cuda_runtime_api.h>
 
+#include <benchmark/benchmark.h>
+
 #include <cstddef>
 
 __global__ void compute_bound_kernel(int64_t* out)
 {
   clock_t clock_begin   = clock64();
   clock_t clock_current = clock_begin;
+  auto const million{1'000'000};
 
-  if (threadIdx.x == 0) {
-    while (clock_current - clock_begin < 1000000) {
+  if (threadIdx.x == 0) {  // NOLINT(readability-static-accessed-through-instance)
+    while (clock_current - clock_begin < million) {
       clock_current = clock64();
     }
   }
@@ -69,7 +70,7 @@ static void run_test(std::size_t num_kernels,
   }
 }
 
-static void BM_MultiStreamAllocations(benchmark::State& state, MRFactoryFunc factory)
+static void BM_MultiStreamAllocations(benchmark::State& state, MRFactoryFunc const& factory)
 {
   auto mr = factory();
 
@@ -77,18 +78,18 @@ static void BM_MultiStreamAllocations(benchmark::State& state, MRFactoryFunc fac
 
   auto num_streams = state.range(0);
   auto num_kernels = state.range(1);
-  auto do_prewarm  = state.range(2);
+  bool do_prewarm  = state.range(2) != 0;
 
   auto stream_pool = rmm::cuda_stream_pool(num_streams);
 
   if (do_prewarm) { run_prewarm(stream_pool, mr.get()); }
 
-  for (auto _ : state) {
+  for (auto _ : state) {  // NOLINT(clang-analyzer-deadcode.DeadStores)
     run_test(num_kernels, stream_pool, mr.get());
     cudaDeviceSynchronize();
   }
 
-  state.SetItemsProcessed(state.iterations() * num_kernels);
+  state.SetItemsProcessed(static_cast<int64_t>(state.iterations() * num_kernels));
 
   rmm::mr::set_current_device_resource(nullptr);
 }
@@ -124,7 +125,7 @@ static void benchmark_range(benchmark::internal::Benchmark* b)
     ->Unit(benchmark::kMicrosecond);
 }
 
-MRFactoryFunc get_mr_factory(std::string resource_name)
+MRFactoryFunc get_mr_factory(std::string const& resource_name)
 {
   if (resource_name == "cuda") { return &make_cuda; }
 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
@@ -139,7 +140,7 @@ MRFactoryFunc get_mr_factory(std::string resource_name)
   RMM_FAIL();
 }
 
-void declare_benchmark(std::string name)
+void declare_benchmark(std::string const& name)
 {
   if (name == "cuda") {
     BENCHMARK_CAPTURE(BM_MultiStreamAllocations, cuda, &make_cuda)  //
@@ -176,7 +177,7 @@ void declare_benchmark(std::string name)
   std::cout << "Error: invalid memory_resource name: " << name << std::endl;
 }
 
-void run_profile(std::string resource_name, int kernel_count, int stream_count, bool prewarm)
+void run_profile(std::string const& resource_name, int kernel_count, int stream_count, bool prewarm)
 {
   auto mr_factory  = get_mr_factory(resource_name);
   auto mr          = mr_factory();
@@ -228,7 +229,11 @@ int main(int argc, char** argv)
     auto num_kernels   = args["kernels"].as<int>();
     auto num_streams   = args["streams"].as<int>();
     auto prewarm       = args["warm"].as<bool>();
-    run_profile(resource_name, num_kernels, num_streams, prewarm);
+    try {
+      run_profile(resource_name, num_kernels, num_streams, prewarm);
+    } catch (std::exception const& e) {
+      std::cout << "Exception caught: " << e.what() << std::endl;
+    }
   } else {
     auto resource_names = std::vector<std::string>();
 
diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp
index b82a10c89..65abd56b5 100644
--- a/benchmarks/random_allocations/random_allocations.cpp
+++ b/benchmarks/random_allocations/random_allocations.cpp
@@ -197,7 +197,7 @@ static void BM_RandomAllocations(benchmark::State& state, MRFactoryFunc const& f
   std::size_t max_size        = state.range(1);
 
   try {
-    for (auto _ : state) {
+    for (auto _ : state) {  // NOLINT(clang-analyzer-deadcode.DeadStores)
       uniform_random_allocations(*mr, num_allocations, max_size, max_usage);
     }
   } catch (std::exception const& e) {
@@ -321,7 +321,11 @@ int main(int argc, char** argv)
     std::cout << "Profiling " << resource << " with " << num_allocations << " allocations of max "
               << max_size << "B\n";
 
-    profile_random_allocations(funcs.at(resource), num_allocations, max_size);
+    try {
+      profile_random_allocations(funcs.at(resource), num_allocations, max_size);
+    } catch (std::exception const& e) {
+      std::cout << "Exception caught: " << e.what() << std::endl;
+    }
 
     std::cout << "Finished\n";
   } else {
diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp
index 6fbd5f2ab..a1355cdb2 100644
--- a/benchmarks/replay/replay.cpp
+++ b/benchmarks/replay/replay.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include <atomic>
 #include <benchmarks/utilities/cxxopts.hpp>
 #include <benchmarks/utilities/log_parser.hpp>
 #include <benchmarks/utilities/simulated_memory_resource.hpp>
@@ -37,6 +36,7 @@
 
 #include <spdlog/common.h>
 
+#include <atomic>
 #include <chrono>
 #include <iterator>
 #include <memory>
@@ -45,7 +45,7 @@
 #include <thread>
 
 /// MR factory functions
-std::shared_ptr<rmm::mr::device_memory_resource> make_cuda(std::size_t = 0)
+std::shared_ptr<rmm::mr::device_memory_resource> make_cuda(std::size_t /*unused*/ = 0)
 {
   return std::make_shared<rmm::mr::cuda_memory_resource>();
 }
@@ -75,7 +75,9 @@ inline auto make_binning(std::size_t simulated_size)
 {
   auto pool = make_pool(simulated_size);
   auto mr   = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(pool);
-  for (std::size_t i = 18; i <= 22; i++) {
+  const auto min_size_exp{18};
+  const auto max_size_exp{22};
+  for (std::size_t i = min_size_exp; i <= max_size_exp; i++) {
     mr->wrapped().add_bin(1 << i);
   }
   return mr;
@@ -89,8 +91,8 @@ using MRFactoryFunc = std::function<std::shared_ptr<rmm::mr::device_memory_resou
  */
 struct allocation {
   allocation() = default;
-  allocation(void* p_, std::size_t size_) : p{p_}, size{size_} {}
-  void* p{};
+  void* ptr{};
+  allocation(void* ptr, std::size_t size) : ptr{ptr}, size{size} {}
   std::size_t size{};
 };
 
@@ -127,10 +129,8 @@ struct replay_benchmark {
                    std::vector<std::vector<rmm::detail::event>> const& events)
     : factory_{std::move(factory)},
       simulated_size_{simulated_size},
-      mr_{},
       events_{events},
-      allocation_map{events.size()},
-      event_index{0}
+      allocation_map{events.size()}
   {
   }
 
@@ -144,12 +144,14 @@ struct replay_benchmark {
       simulated_size_{other.simulated_size_},
       mr_{std::move(other.mr_)},
       events_{other.events_},
-      allocation_map{events_.size()},
-      event_index{0}
+      allocation_map{std::move(other.allocation_map)}
   {
   }
 
+  ~replay_benchmark()                       = default;
   replay_benchmark(replay_benchmark const&) = delete;
+  replay_benchmark& operator=(replay_benchmark const&) = delete;
+  replay_benchmark& operator=(replay_benchmark&& other) noexcept = delete;
 
   /// Add an allocation to the map (NOT thread safe)
   void set_allocation(uintptr_t ptr, allocation alloc) { allocation_map.insert({ptr, alloc}); }
@@ -159,9 +161,9 @@ struct replay_benchmark {
   {
     auto iter = allocation_map.find(ptr);
     if (iter != allocation_map.end()) {
-      allocation a = iter->second;
+      allocation alloc = iter->second;
       allocation_map.erase(iter);
-      return a;
+      return alloc;
     }
     return allocation{};
   }
@@ -187,11 +189,12 @@ struct replay_benchmark {
         auto alloc = ptr_alloc.second;
         num_leaked++;
         total_leaked += alloc.size;
-        mr_->deallocate(alloc.p, alloc.size);
+        mr_->deallocate(alloc.ptr, alloc.size);
       }
-      if (num_leaked > 0)
+      if (num_leaked > 0) {
         std::cout << "LOG shows leak of " << num_leaked << " allocations of " << total_leaked
                   << " total bytes\n";
+      }
       allocation_map.clear();
       mr_.reset();
     }
@@ -204,20 +207,20 @@ struct replay_benchmark {
 
     auto const& my_events = events_.at(state.thread_index);
 
-    for (auto _ : state) {
-      std::for_each(my_events.begin(), my_events.end(), [&state, this](auto e) {
+    for (auto _ : state) {  // NOLINT(clang-analyzer-deadcode.DeadStores)
+      std::for_each(my_events.begin(), my_events.end(), [this](auto event) {
         // ensure correct ordering between threads
         std::unique_lock<std::mutex> lock{event_mutex};
-        if (event_index != e.index) {
-          cv.wait(lock, [&]() { return event_index == e.index; });
+        if (event_index != event.index) {
+          cv.wait(lock, [&]() { return event_index == event.index; });
         }
 
-        if (rmm::detail::action::ALLOCATE == e.act) {
-          auto p = mr_->allocate(e.size);
-          set_allocation(e.pointer, allocation{p, e.size});
+        if (rmm::detail::action::ALLOCATE == event.act) {
+          auto ptr = mr_->allocate(event.size);
+          set_allocation(event.pointer, allocation{ptr, event.size});
         } else {
-          auto a = remove_allocation(e.pointer);
-          mr_->deallocate(a.p, e.size);
+          auto alloc = remove_allocation(event.pointer);
+          mr_->deallocate(alloc.p, event.size);
         }
 
         event_index++;
@@ -242,11 +245,11 @@ std::vector<std::vector<rmm::detail::event>> parse_per_thread_events(std::string
 
   RMM_EXPECTS(std::all_of(all_events.begin(),
                           all_events.end(),
-                          [](auto const& e) {
-                            cudaStream_t cs;
-                            memcpy(&cs, &e.stream, sizeof(cudaStream_t));
-                            auto s = rmm::cuda_stream_view{cs};
-                            return s.is_default() or s.is_per_thread_default();
+                          [](auto const& event) {
+                            cudaStream_t custream;
+                            memcpy(&custream, &event.stream, sizeof(cudaStream_t));
+                            auto stream = rmm::cuda_stream_view{custream};
+                            return stream.is_default() or stream.is_per_thread_default();
                           }),
               "Non-default streams not currently supported.");
 
@@ -294,28 +297,29 @@ void declare_benchmark(std::string const& name,
                        std::vector<std::vector<rmm::detail::event>> const& per_thread_events,
                        std::size_t num_threads)
 {
-  if (name == "cuda")
+  if (name == "cuda") {
     benchmark::RegisterBenchmark("CUDA Resource",
                                  replay_benchmark(&make_cuda, simulated_size, per_thread_events))
       ->Unit(benchmark::kMillisecond)
-      ->Threads(num_threads);
-  else if (name == "binning")
+      ->Threads(static_cast<int>(num_threads));
+  } else if (name == "binning") {
     benchmark::RegisterBenchmark("Binning Resource",
                                  replay_benchmark(&make_binning, simulated_size, per_thread_events))
       ->Unit(benchmark::kMillisecond)
-      ->Threads(num_threads);
-  else if (name == "pool")
+      ->Threads(static_cast<int>(num_threads));
+  } else if (name == "pool") {
     benchmark::RegisterBenchmark("Pool Resource",
                                  replay_benchmark(&make_pool, simulated_size, per_thread_events))
       ->Unit(benchmark::kMillisecond)
-      ->Threads(num_threads);
-  else if (name == "arena")
+      ->Threads(static_cast<int>(num_threads));
+  } else if (name == "arena") {
     benchmark::RegisterBenchmark("Arena Resource",
                                  replay_benchmark(&make_arena, simulated_size, per_thread_events))
       ->Unit(benchmark::kMillisecond)
-      ->Threads(num_threads);
-  else
+      ->Threads(static_cast<int>(num_threads));
+  } else {
     std::cout << "Error: invalid memory_resource name: " << name << "\n";
+  }
 }
 
 // Usage: REPLAY_BENCHMARK -f "path/to/log/file"
@@ -355,14 +359,22 @@ int main(int argc, char** argv)
 
   auto filename = args["file"].as<std::string>();
 
-  auto per_thread_events = parse_per_thread_events(filename);
+  auto per_thread_events = [filename]() {
+    try {
+      auto events = parse_per_thread_events(filename);
+      return events;
+    } catch (std::exception const& e) {
+      std::cout << "Failed to parse events: " << e.what() << std::endl;
+      return std::vector<std::vector<rmm::detail::event>>{};
+    }
+  }();
 
 #ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM
   std::cout << "Using CUDA per-thread default stream.\n";
 #endif
 
   auto const simulated_size =
-    static_cast<std::size_t>(args["size"].as<float>() * static_cast<float>(1u << 30u));
+    static_cast<std::size_t>(args["size"].as<float>() * static_cast<float>(1U << 30U));
   if (simulated_size != 0 && args["resource"].as<std::string>() != "cuda") {
     std::cout << "Simulating GPU with memory size of " << simulated_size << " bytes.\n";
   }
@@ -375,11 +387,11 @@ int main(int argc, char** argv)
                  [](std::size_t accum, auto const& events) { return accum + events.size(); })
             << std::endl;
 
-  for (std::size_t t = 0; t < per_thread_events.size(); ++t) {
-    std::cout << "Thread " << t << ": " << per_thread_events[t].size() << " events\n";
+  for (std::size_t thread = 0; thread < per_thread_events.size(); ++thread) {
+    std::cout << "Thread " << thread << ": " << per_thread_events[thread].size() << " events\n";
     if (args["verbose"].as<bool>()) {
-      for (auto const& e : per_thread_events[t]) {
-        std::cout << e << std::endl;
+      for (auto const& event : per_thread_events[thread]) {
+        std::cout << event << std::endl;
       }
     }
   }
@@ -396,8 +408,8 @@ int main(int argc, char** argv)
     std::array<std::string, 4> mrs{"pool", "arena", "binning", "cuda"};
     std::for_each(std::cbegin(mrs),
                   std::cend(mrs),
-                  [&simulated_size, &per_thread_events, &num_threads](auto const& s) {
-                    declare_benchmark(s, simulated_size, per_thread_events, num_threads);
+                  [&simulated_size, &per_thread_events, &num_threads](auto const& mr) {
+                    declare_benchmark(mr, simulated_size, per_thread_events, num_threads);
                   });
   }
 
diff --git a/benchmarks/synchronization/synchronization.cpp b/benchmarks/synchronization/synchronization.cpp
index 5db8c4a3e..9e048b285 100644
--- a/benchmarks/synchronization/synchronization.cpp
+++ b/benchmarks/synchronization/synchronization.cpp
@@ -59,9 +59,10 @@ cuda_event_timer::~cuda_event_timer()
   RMM_CUDA_ASSERT_OK(cudaEventRecord(stop, stream.value()));
   RMM_CUDA_ASSERT_OK(cudaEventSynchronize(stop));
 
-  float milliseconds = 0.0f;
+  float milliseconds = 0.0F;
   RMM_CUDA_ASSERT_OK(cudaEventElapsedTime(&milliseconds, start, stop));
-  p_state->SetIterationTime(milliseconds / (1000.0f));
+  const auto to_milliseconds{1.0F / 1000};
+  p_state->SetIterationTime(milliseconds * to_milliseconds);
   RMM_CUDA_ASSERT_OK(cudaEventDestroy(start));
   RMM_CUDA_ASSERT_OK(cudaEventDestroy(stop));
 }
diff --git a/benchmarks/synchronization/synchronization.hpp b/benchmarks/synchronization/synchronization.hpp
index 6c2298575..b0007d9b2 100644
--- a/benchmarks/synchronization/synchronization.hpp
+++ b/benchmarks/synchronization/synchronization.hpp
@@ -89,9 +89,15 @@ class cuda_event_timer {
   // will be set to the value given by `cudaEventElapsedTime`.
   ~cuda_event_timer();
 
+  // disable copy and move
+  cuda_event_timer(cuda_event_timer const&) = delete;
+  cuda_event_timer& operator=(cuda_event_timer const&) = delete;
+  cuda_event_timer(cuda_event_timer&&)                 = delete;
+  cuda_event_timer& operator=(cuda_event_timer&&) = delete;
+
  private:
-  cudaEvent_t start;
-  cudaEvent_t stop;
-  rmm::cuda_stream_view stream;
-  benchmark::State* p_state;
+  cudaEvent_t start{};
+  cudaEvent_t stop{};
+  rmm::cuda_stream_view stream{};
+  benchmark::State* p_state{};
 };
diff --git a/benchmarks/utilities/log_parser.hpp b/benchmarks/utilities/log_parser.hpp
index f4bbdbbc8..db939e65f 100644
--- a/benchmarks/utilities/log_parser.hpp
+++ b/benchmarks/utilities/log_parser.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,8 +31,7 @@
 #include <stdexcept>
 #include <string>
 
-namespace rmm {
-namespace detail {
+namespace rmm::detail {
 
 enum class action : bool { ALLOCATE, FREE };
 
@@ -43,41 +42,57 @@ enum class action : bool { ALLOCATE, FREE };
 struct event {
   event()             = default;
   event(event const&) = default;
-  event(action a, std::size_t s, void const* p)
-    : act{a}, size{s}, pointer{reinterpret_cast<uintptr_t>(p)}
+  event& operator=(event const&) = default;
+  event(event&&) noexcept        = default;
+  event& operator=(event&&) noexcept = default;
+  ~event()                           = default;
+  event(action act, std::size_t size, void const* ptr)
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+    : act{act}, size{size}, pointer{reinterpret_cast<uintptr_t>(ptr)}
   {
   }
 
-  event(action a, std::size_t s, uintptr_t p) : act{a}, size{s}, pointer{p} {}
+  // NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
+  event(action act, std::size_t size, uintptr_t ptr) : act{act}, size{size}, pointer{ptr} {}
 
-  event(std::size_t tid, action a, std::size_t sz, uintptr_t p, uintptr_t s, std::size_t i)
-    : act{a}, size{sz}, pointer{p}, thread_id{tid}, stream{s}, index{i}
+  event(std::size_t tid,
+        action act,
+        std::size_t size,  // NOLINT(bugprone-easily-swappable-parameters)
+        uintptr_t ptr,
+        uintptr_t stream,
+        std::size_t index)
+    : act{act}, size{size}, pointer{ptr}, thread_id{tid}, stream{stream}, index{index}
   {
   }
 
-  event(std::size_t tid, action a, std::size_t sz, void* p, uintptr_t s, std::size_t i)
-    : event{tid, a, sz, reinterpret_cast<uintptr_t>(p), s, i}
+  event(
+    std::size_t tid, action act, std::size_t size, void* ptr, uintptr_t stream, std::size_t index)
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+    : event{tid, act, size, reinterpret_cast<uintptr_t>(ptr), stream, index}
   {
   }
 
-  friend std::ostream& operator<<(std::ostream& os, event const& e);
+  friend std::ostream& operator<<(std::ostream& os, event const& evt);
 
-  action act{};           ///< Indicates if the event is an allocation or a free
-  std::size_t size{};     ///< The size of the memory allocated or freed
-  uintptr_t pointer{};    ///< The pointer returned from an allocation, or the
-                          ///< pointer freed
-  std::size_t thread_id;  ///< ID of the thread that initiated the event
-  uintptr_t stream;       ///< Numeric representation of the CUDA stream on which the event occurred
-  std::size_t index;      ///< Original ordering index of the event
+  action act{};             ///< Indicates if the event is an allocation or a free
+  std::size_t size{};       ///< The size of the memory allocated or freed
+  uintptr_t pointer{};      ///< The pointer returned from an allocation, or the
+                            ///< pointer freed
+  std::size_t thread_id{};  ///< ID of the thread that initiated the event
+  uintptr_t stream{};   ///< Numeric representation of the CUDA stream on which the event occurred
+  std::size_t index{};  ///< Original ordering index of the event
 };
 
-inline std::ostream& operator<<(std::ostream& os, event const& e)
+inline std::ostream& operator<<(std::ostream& os, event const& evt)
 {
-  auto act_string = (e.act == action::ALLOCATE) ? "allocate" : "free";
+  const auto* act_string = (evt.act == action::ALLOCATE) ? "allocate" : "free";
 
-  os << "Thread: " << e.thread_id << std::setw(9) << act_string
-     << " Size: " << std::setw(std::numeric_limits<std::size_t>::digits10) << e.size << " Pointer: "
-     << "0x" << std::hex << e.pointer << std::dec << " Stream: " << e.stream;
+  const auto format_width{9};
+
+  os << "Thread: " << evt.thread_id << std::setw(format_width) << act_string
+     << " Size: " << std::setw(std::numeric_limits<std::size_t>::digits10) << evt.size
+     << " Pointer: "
+     << "0x" << std::hex << evt.pointer << std::dec << " Stream: " << evt.stream;
   return os;
 }
 
@@ -105,11 +120,12 @@ inline std::chrono::time_point<std::chrono::system_clock> parse_time(std::string
   int seconds          = std::stoi(str_time.substr(previous, current - previous));
   int microseconds     = std::stoi(str_time.substr(current + 1, str_time.length()));
 
-  std::tm tm{seconds, minutes, hours, 1, 0, 1970, 0, 0, 0};
+  auto const epoch_year{1970};
+  std::tm time{seconds, minutes, hours, 1, 0, epoch_year, 0, 0, 0};
 
-  auto tp = std::chrono::system_clock::from_time_t(std::mktime(&tm));
-  tp += std::chrono::microseconds{microseconds};
-  return tp;
+  auto timepoint = std::chrono::system_clock::from_time_t(std::mktime(&time));
+  timepoint += std::chrono::microseconds{microseconds};
+  return timepoint;
 }
 
 /**
@@ -128,8 +144,9 @@ inline std::vector<event> parse_csv(std::string const& filename)
   std::vector<std::size_t> tids    = csv.GetColumn<std::size_t>("Thread");
   std::vector<std::string> actions = csv.GetColumn<std::string>("Action");
 
-  auto parse_pointer = [](std::string const& s, uintptr_t& ptr) {
-    ptr = std::stoll(s, nullptr, 16);
+  auto parse_pointer = [](std::string const& str, uintptr_t& ptr) {
+    auto const base{16};
+    ptr = std::stoll(str, nullptr, base);
   };
 
   std::vector<uintptr_t> pointers = csv.GetColumn<uintptr_t>("Pointer", parse_pointer);
@@ -140,19 +157,18 @@ inline std::vector<event> parse_csv(std::string const& filename)
 
   RMM_EXPECTS(std::all_of(std::begin(size_list),
                           std::end(size_list),
-                          [size = sizes.size()](auto i) { return i == size; }),
+                          [size = sizes.size()](auto val) { return val == size; }),
               "Size mismatch in columns of parsed log.");
 
   std::vector<event> events(sizes.size());
 
   for (std::size_t i = 0; i < actions.size(); ++i) {
-    auto const& a = actions[i];
-    RMM_EXPECTS((a == "allocate") or (a == "free"), "Invalid action string.");
-    auto act  = (a == "allocate") ? action::ALLOCATE : action::FREE;
+    auto const& action = actions[i];
+    RMM_EXPECTS((action == "allocate") or (action == "free"), "Invalid action string.");
+    auto act  = (action == "allocate") ? action::ALLOCATE : action::FREE;
     events[i] = event{tids[i], act, sizes[i], pointers[i], streams[i], i};
   }
   return events;
 }
 
-}  // namespace detail
-}  // namespace rmm
+}  // namespace rmm::detail
diff --git a/benchmarks/utilities/simulated_memory_resource.hpp b/benchmarks/utilities/simulated_memory_resource.hpp
index 67883ad5d..44ee4798c 100644
--- a/benchmarks/utilities/simulated_memory_resource.hpp
+++ b/benchmarks/utilities/simulated_memory_resource.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,8 +20,7 @@
 
 #include <cuda_runtime_api.h>
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 
 /**
  * @brief A device memory resource that simulates a fix-sized GPU.
@@ -39,14 +38,18 @@ class simulated_memory_resource final : public device_memory_resource {
    * @param memory_size_bytes The size of the memory to simulate.
    */
   explicit simulated_memory_resource(std::size_t memory_size_bytes)
-    : begin_{reinterpret_cast<char*>(0x100)},
-      end_{reinterpret_cast<char*>(begin_ + memory_size_bytes)}
+    : begin_{reinterpret_cast<char*>(0x100)},                    // NOLINT
+      end_{reinterpret_cast<char*>(begin_ + memory_size_bytes)}  // NOLINT
   {
   }
 
+  ~simulated_memory_resource() override = default;
+
   // Disable copy (and move) semantics.
   simulated_memory_resource(simulated_memory_resource const&) = delete;
   simulated_memory_resource& operator=(simulated_memory_resource const&) = delete;
+  simulated_memory_resource(simulated_memory_resource&&)                 = delete;
+  simulated_memory_resource& operator=(simulated_memory_resource&&) = delete;
 
   /**
    * @brief Query whether the resource supports use of non-null CUDA streams for
@@ -54,14 +57,14 @@ class simulated_memory_resource final : public device_memory_resource {
    *
    * @returns bool false
    */
-  bool supports_streams() const noexcept override { return false; }
+  [[nodiscard]] bool supports_streams() const noexcept override { return false; }
 
   /**
    * @brief Query whether the resource supports the get_mem_info API.
    *
    * @return false
    */
-  bool supports_get_mem_info() const noexcept override { return false; }
+  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; }
 
  private:
   /**
@@ -74,24 +77,25 @@ class simulated_memory_resource final : public device_memory_resource {
    * @param bytes The size, in bytes, of the allocation
    * @return void* Pointer to the newly allocated memory
    */
-  void* do_allocate(std::size_t bytes, cuda_stream_view) override
+  void* do_allocate(std::size_t bytes, cuda_stream_view /*stream*/) override
   {
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
     RMM_EXPECTS(begin_ + bytes <= end_, rmm::bad_alloc, "Simulated memory size exceeded");
-    auto p = static_cast<void*>(begin_);
-    begin_ += bytes;
-    return p;
+    auto* ptr = static_cast<void*>(begin_);
+    begin_ += bytes;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+    return ptr;
   }
 
   /**
-   * @brief Deallocate memory pointed to by \p p.
+   * @brief Deallocate memory pointed to by `p`.
    *
    * @note This call is ignored.
    *
    * @throws Nothing.
    *
-   * @param p Pointer to be deallocated
+   * @param ptr Pointer to be deallocated
    */
-  void do_deallocate(void* p, std::size_t, cuda_stream_view) override {}
+  void do_deallocate(void* ptr, std::size_t /*bytes*/, cuda_stream_view /*stream*/) override {}
 
   /**
    * @brief Get free and available memory for memory resource.
@@ -99,14 +103,13 @@ class simulated_memory_resource final : public device_memory_resource {
    * @param stream to execute on.
    * @return std::pair containing free_size and total_size of memory.
    */
-  std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view stream) const override
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
+    cuda_stream_view stream) const override
   {
     return std::make_pair(0, 0);
   }
 
- private:
-  char* begin_;
-  char* end_;
+  char* begin_{};
+  char* end_{};
 };
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr
diff --git a/tests/cuda_stream_tests.cpp b/tests/cuda_stream_tests.cpp
index 55e3185fe..96cae868e 100644
--- a/tests/cuda_stream_tests.cpp
+++ b/tests/cuda_stream_tests.cpp
@@ -47,6 +47,7 @@ TEST_F(CudaStreamTest, MoveConstructor)
   rmm::cuda_stream stream_a;
   auto const view_a         = stream_a.view();
   rmm::cuda_stream stream_b = std::move(stream_a);
+  // NOLINTNEXTLINE(bugprone-use-after-move)
   EXPECT_FALSE(stream_a.is_valid());  // Any other operations on stream_a are UB, may segfault
   EXPECT_EQ(stream_b, view_a);
 }
diff --git a/tests/device_buffer_tests.cu b/tests/device_buffer_tests.cu
index 63841a67e..ff71dfba1 100644
--- a/tests/device_buffer_tests.cu
+++ b/tests/device_buffer_tests.cu
@@ -154,6 +154,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor)
   // Initialize buffer
   thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default),
                    static_cast<char*>(buff.data()),
+                   // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
                    static_cast<char*>(buff.data()) + buff.size(),
                    0);
 
@@ -168,6 +169,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor)
 
   EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default),
                             static_cast<char*>(buff.data()),
+                            // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
                             static_cast<char*>(buff.data()) + buff.size(),
                             static_cast<char*>(buff_copy.data())));
 
@@ -179,6 +181,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor)
 
   EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default),
                             static_cast<signed char*>(buff.data()),
+                            // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
                             static_cast<signed char*>(buff.data()) + buff.size(),
                             static_cast<signed char*>(buff_copy.data())));
 }
@@ -193,6 +196,7 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSize)
 
   thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default),
                    static_cast<signed char*>(buff.data()),
+                   // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
                    static_cast<signed char*>(buff.data()) + buff.size(),
                    0);
   rmm::device_buffer buff_copy(buff, rmm::cuda_stream_default);
@@ -208,6 +212,7 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSize)
 
   EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default),
                             static_cast<signed char*>(buff.data()),
+                            // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
                             static_cast<signed char*>(buff.data()) + buff.size(),
                             static_cast<signed char*>(buff_copy.data())));
 }
@@ -218,6 +223,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructorExplicitMr)
 
   thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default),
                    static_cast<signed char*>(buff.data()),
+                   // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
                    static_cast<signed char*>(buff.data()) + buff.size(),
                    0);
   rmm::device_buffer buff_copy(buff, this->stream, &this->mr);
@@ -231,6 +237,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructorExplicitMr)
 
   EXPECT_TRUE(thrust::equal(rmm::exec_policy(buff_copy.stream()),
                             static_cast<signed char*>(buff.data()),
+                            // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
                             static_cast<signed char*>(buff.data()) + buff.size(),
                             static_cast<signed char*>(buff_copy.data())));
 }
@@ -245,6 +252,7 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSizeExplicitMr)
 
   thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default),
                    static_cast<signed char*>(buff.data()),
+                   // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
                    static_cast<signed char*>(buff.data()) + buff.size(),
                    0);
   rmm::device_buffer buff_copy(buff, this->stream, &this->mr);
@@ -261,6 +269,7 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSizeExplicitMr)
 
   EXPECT_TRUE(thrust::equal(rmm::exec_policy(buff_copy.stream()),
                             static_cast<signed char*>(buff.data()),
+                            // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
                             static_cast<signed char*>(buff.data()) + buff.size(),
                             static_cast<signed char*>(buff_copy.data())));
 }
@@ -284,11 +293,11 @@ TYPED_TEST(DeviceBufferTest, MoveConstructor)
   EXPECT_EQ(mr, buff_new.memory_resource());
 
   // Original buffer should be empty
-  EXPECT_EQ(nullptr, buff.data());
-  EXPECT_EQ(0, buff.size());
-  EXPECT_EQ(0, buff.capacity());
-  EXPECT_EQ(rmm::cuda_stream_default, buff.stream());
-  EXPECT_NE(nullptr, buff.memory_resource());
+  EXPECT_EQ(nullptr, buff.data());                     // NOLINT(bugprone-use-after-move)
+  EXPECT_EQ(0, buff.size());                           // NOLINT(bugprone-use-after-move)
+  EXPECT_EQ(0, buff.capacity());                       // NOLINT(bugprone-use-after-move)
+  EXPECT_EQ(rmm::cuda_stream_default, buff.stream());  // NOLINT(bugprone-use-after-move)
+  EXPECT_NE(nullptr, buff.memory_resource());          // NOLINT(bugprone-use-after-move)
 }
 
 TYPED_TEST(DeviceBufferTest, MoveConstructorStream)
@@ -312,11 +321,11 @@ TYPED_TEST(DeviceBufferTest, MoveConstructorStream)
   EXPECT_EQ(mr, buff_new.memory_resource());
 
   // Original buffer should be empty
-  EXPECT_EQ(nullptr, buff.data());
-  EXPECT_EQ(0, buff.size());
-  EXPECT_EQ(0, buff.capacity());
-  EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream());
-  EXPECT_NE(nullptr, buff.memory_resource());
+  EXPECT_EQ(nullptr, buff.data());                    // NOLINT(bugprone-use-after-move)
+  EXPECT_EQ(0, buff.size());                          // NOLINT(bugprone-use-after-move)
+  EXPECT_EQ(0, buff.capacity());                      // NOLINT(bugprone-use-after-move)
+  EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream());  // NOLINT(bugprone-use-after-move)
+  EXPECT_NE(nullptr, buff.memory_resource());         // NOLINT(bugprone-use-after-move)
 }
 
 TYPED_TEST(DeviceBufferTest, MoveAssignmentToDefault)
@@ -399,6 +408,7 @@ TYPED_TEST(DeviceBufferTest, ResizeSmaller)
 
   thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default),
                    static_cast<signed char*>(buff.data()),
+                   // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
                    static_cast<signed char*>(buff.data()) + buff.size(),
                    0);
 
@@ -422,6 +432,7 @@ TYPED_TEST(DeviceBufferTest, ResizeSmaller)
 
   EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default),
                             static_cast<signed char*>(buff.data()),
+                            // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
                             static_cast<signed char*>(buff.data()) + buff.size(),
                             static_cast<signed char*>(old_content.data())));
 }
diff --git a/tests/mr/device/mr_multithreaded_tests.cpp b/tests/mr/device/mr_multithreaded_tests.cpp
index 4bacb208f..838035d9f 100644
--- a/tests/mr/device/mr_multithreaded_tests.cpp
+++ b/tests/mr/device/mr_multithreaded_tests.cpp
@@ -76,8 +76,7 @@ TEST(DefaultTest, CurrentDeviceResourceIsCUDA_mt)
 TEST(DefaultTest, GetCurrentDeviceResource_mt)
 {
   spawn([]() {
-    rmm::mr::device_memory_resource* mr{nullptr};
-    EXPECT_NO_THROW(mr = rmm::mr::get_current_device_resource());
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource();
     EXPECT_NE(nullptr, mr);
     EXPECT_TRUE(mr->is_equal(rmm::mr::cuda_memory_resource{}));
   });
@@ -87,8 +86,7 @@ TEST_P(mr_test_mt, SetCurrentDeviceResource_mt)
 {
   // single thread changes default resource, then multiple threads use it
 
-  rmm::mr::device_memory_resource* old{nullptr};
-  EXPECT_NO_THROW(old = rmm::mr::set_current_device_resource(this->mr.get()));
+  rmm::mr::device_memory_resource* old = rmm::mr::set_current_device_resource(this->mr.get());
   EXPECT_NE(nullptr, old);
 
   spawn([mr = this->mr.get()]() {
@@ -97,7 +95,7 @@ TEST_P(mr_test_mt, SetCurrentDeviceResource_mt)
   });
 
   // setting default resource w/ nullptr should reset to initial
-  EXPECT_NO_THROW(rmm::mr::set_current_device_resource(nullptr));
+  rmm::mr::set_current_device_resource(nullptr);
   EXPECT_TRUE(old->is_equal(*rmm::mr::get_current_device_resource()));
 }
 
@@ -109,25 +107,25 @@ TEST_P(mr_test_mt, SetCurrentDeviceResourcePerThread_mt)
   std::vector<std::thread> threads;
   threads.reserve(num_devices);
   for (int i = 0; i < num_devices; ++i) {
-    threads.emplace_back(std::thread{
-      [mr = this->mr.get()](auto dev_id) {
-        RMM_CUDA_TRY(cudaSetDevice(dev_id));
-        rmm::mr::device_memory_resource* old{};
-        EXPECT_NO_THROW(old = rmm::mr::set_current_device_resource(mr));
-        EXPECT_NE(nullptr, old);
-        // initial resource for this device should be CUDA mr
-        EXPECT_TRUE(old->is_equal(rmm::mr::cuda_memory_resource{}));
-        // get_current_device_resource should equal the resource we just set
-        EXPECT_EQ(mr, rmm::mr::get_current_device_resource());
-        // Setting current dev resource to nullptr should reset to cuda MR and return the MR we
-        // previously set
-        EXPECT_NO_THROW(old = rmm::mr::set_current_device_resource(nullptr));
-        EXPECT_NE(nullptr, old);
-        EXPECT_EQ(old, mr);
-        EXPECT_TRUE(
-          rmm::mr::get_current_device_resource()->is_equal(rmm::mr::cuda_memory_resource{}));
-      },
-      i});
+    threads.emplace_back(std::thread{[mr = this->mr.get()](auto dev_id) {
+                                       RMM_CUDA_TRY(cudaSetDevice(dev_id));
+                                       rmm::mr::device_memory_resource* old =
+                                         rmm::mr::set_current_device_resource(mr);
+                                       EXPECT_NE(nullptr, old);
+                                       // initial resource for this device should be CUDA mr
+                                       EXPECT_TRUE(old->is_equal(rmm::mr::cuda_memory_resource{}));
+                                       // get_current_device_resource should equal the resource we
+                                       // just set
+                                       EXPECT_EQ(mr, rmm::mr::get_current_device_resource());
+                                       // Setting current dev resource to nullptr should reset to
+                                       // cuda MR and return the MR we previously set
+                                       old = rmm::mr::set_current_device_resource(nullptr);
+                                       EXPECT_NE(nullptr, old);
+                                       EXPECT_EQ(old, mr);
+                                       EXPECT_TRUE(rmm::mr::get_current_device_resource()->is_equal(
+                                         rmm::mr::cuda_memory_resource{}));
+                                     },
+                                     i});
   }
 
   for (auto& thread : threads) {
@@ -187,8 +185,7 @@ void allocate_loop(rmm::mr::device_memory_resource* mr,
 
   for (std::size_t i = 0; i < num_allocations; ++i) {
     std::size_t size = size_distribution(generator);
-    void* ptr{};
-    EXPECT_NO_THROW(ptr = mr->allocate(size, stream));
+    void* ptr        = mr->allocate(size, stream);
     {
       std::lock_guard<std::mutex> lock(mtx);
       allocations.emplace_back(ptr, size);
@@ -208,7 +205,7 @@ void deallocate_loop(rmm::mr::device_memory_resource* mr,
     i++;
     allocation alloc = allocations.front();
     allocations.pop_front();
-    EXPECT_NO_THROW(mr->deallocate(alloc.ptr, alloc.size, stream));
+    mr->deallocate(alloc.ptr, alloc.size, stream);
   }
 }
 
@@ -250,11 +247,9 @@ TEST_P(mr_test_mt, AllocFreeDifferentThreadsSameStream)
 
 TEST_P(mr_test_mt, AllocFreeDifferentThreadsDifferentStream)
 {
-  EXPECT_NO_THROW([this]() {
-    rmm::cuda_stream streamB;
-    test_allocate_free_different_threads(this->mr.get(), this->stream, streamB);
-    streamB.synchronize();
-  }());
+  rmm::cuda_stream streamB;
+  test_allocate_free_different_threads(this->mr.get(), this->stream, streamB);
+  streamB.synchronize();
 }
 
 }  // namespace
diff --git a/tests/mr/host/mr_tests.cpp b/tests/mr/host/mr_tests.cpp
index 24f52a88a..1cd59f5a6 100644
--- a/tests/mr/host/mr_tests.cpp
+++ b/tests/mr/host/mr_tests.cpp
@@ -158,10 +158,9 @@ TYPED_TEST(MRTest, RandomAllocations)
       EXPECT_TRUE(is_aligned(alloc.ptr));
     });
 
-  std::for_each(
-    allocations.begin(), allocations.end(), [generator, distribution, this](allocation& alloc) {
-      EXPECT_NO_THROW(this->mr->deallocate(alloc.ptr, alloc.size));
-    });
+  std::for_each(allocations.begin(), allocations.end(), [this](allocation& alloc) {
+    EXPECT_NO_THROW(this->mr->deallocate(alloc.ptr, alloc.size));
+  });
 }
 
 TYPED_TEST(MRTest, MixedRandomAllocationFree)

From 41790ffc71be393875fb0f788011311286845f58 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 16:27:13 +1000
Subject: [PATCH 53/72] p->ptr

---
 benchmarks/replay/replay.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp
index a1355cdb2..4e9bbffcf 100644
--- a/benchmarks/replay/replay.cpp
+++ b/benchmarks/replay/replay.cpp
@@ -220,7 +220,7 @@ struct replay_benchmark {
           set_allocation(event.pointer, allocation{ptr, event.size});
         } else {
           auto alloc = remove_allocation(event.pointer);
-          mr_->deallocate(alloc.p, event.size);
+          mr_->deallocate(alloc.ptr, event.size);
         }
 
         event_index++;

From 514a4f180d0d4c47f15328e0d3fb5a8cf95083aa Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 8 Sep 2021 16:34:17 +1000
Subject: [PATCH 54/72] Revert threshold size type

---
 include/rmm/mr/device/cuda_async_memory_resource.hpp | 8 ++++----
 tests/mr/device/cuda_async_mr_tests.cpp              | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp
index 19d52b16b..13eb99ec8 100644
--- a/include/rmm/mr/device/cuda_async_memory_resource.hpp
+++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp
@@ -40,7 +40,6 @@ namespace rmm::mr {
  */
 class cuda_async_memory_resource final : public device_memory_resource {
  public:
-  enum release_threshold_size_type : std::size_t {};
   /**
    * @brief Constructs a cuda_async_memory_resource with the optionally specified initial pool size
    * and release threshold.
@@ -55,8 +54,9 @@ class cuda_async_memory_resource final : public device_memory_resource {
    * @param release_threshold Optional release threshold size in bytes of the pool. If no value is
    * provided, the release threshold is set to the total amount of memory on the current device.
    */
-  cuda_async_memory_resource(thrust::optional<std::size_t> initial_pool_size                 = {},
-                             thrust::optional<release_threshold_size_type> release_threshold = {})
+  // NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
+  cuda_async_memory_resource(thrust::optional<std::size_t> initial_pool_size = {},
+                             thrust::optional<std::size_t> release_threshold = {})
   {
 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
     // Check if cudaMallocAsync Memory pool supported
@@ -78,7 +78,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
     auto const [free, total] = rmm::detail::available_device_memory();
 
     // Need an l-value to take address to pass to cudaMemPoolSetAttribute
-    uint64_t threshold = release_threshold.value_or(release_threshold_size_type{total});
+    uint64_t threshold = release_threshold.value_or(total);
     RMM_CUDA_TRY(
       cudaMemPoolSetAttribute(cuda_pool_handle_, cudaMemPoolAttrReleaseThreshold, &threshold));
 
diff --git a/tests/mr/device/cuda_async_mr_tests.cpp b/tests/mr/device/cuda_async_mr_tests.cpp
index 5a507162c..4bf0c3d5b 100644
--- a/tests/mr/device/cuda_async_mr_tests.cpp
+++ b/tests/mr/device/cuda_async_mr_tests.cpp
@@ -47,7 +47,7 @@ TEST(PoolTest, ExplicitInitialPoolSize)
 TEST(PoolTest, ExplicitReleaseThreshold)
 {
   const auto pool_init_size{100};
-  const auto pool_release_threshold = cuda_async_mr::release_threshold_size_type{1000};
+  const auto pool_release_threshold{1000};
   cuda_async_mr mr{pool_init_size, pool_release_threshold};
   void* ptr = mr.allocate(pool_init_size);
   mr.deallocate(ptr, pool_init_size);

From d3d2b08d9d3eddcfbb30760e7affaea438cd0c3b Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 9 Sep 2021 09:13:37 +1000
Subject: [PATCH 55/72] Disable readability-named-parameter

---
 .clang-tidy                                          | 5 +++--
 benchmarks/utilities/simulated_memory_resource.hpp   | 4 ++--
 include/rmm/mr/device/cuda_async_memory_resource.hpp | 4 ++--
 include/rmm/mr/device/cuda_memory_resource.hpp       | 7 +++----
 include/rmm/mr/device/managed_memory_resource.hpp    | 4 ++--
 include/rmm/mr/device/owning_wrapper.hpp             | 2 +-
 6 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/.clang-tidy b/.clang-tidy
index b76743aeb..263a2d2d6 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -8,6 +8,7 @@ Checks:          'clang-diagnostic-*,
                   readability-*,
                   llvm-*,
                   -modernize-use-trailing-return-type,
+                  -readability-named-parameter,
                   -cppcoreguidelines-macro-usage'
 WarningsAsErrors: ''
 HeaderFilterRegex: ''
@@ -54,8 +55,8 @@ CheckOptions:
     value:           'mr|os'
   - key:             readability-identifier-length.IgnoredVariableNames
     value:           'mr|_'
-  - key:             readability-function-cognitive-complexity.IgnoreMacros
-    value:           '1'
+  #- key:             readability-function-cognitive-complexity.IgnoreMacros
+  #  value:           '1'
   - key:             bugprone-easily-swappable-parameters.IgnoredParameterNames
     value:           'alignment'
 ...
diff --git a/benchmarks/utilities/simulated_memory_resource.hpp b/benchmarks/utilities/simulated_memory_resource.hpp
index 44ee4798c..d8c7cf946 100644
--- a/benchmarks/utilities/simulated_memory_resource.hpp
+++ b/benchmarks/utilities/simulated_memory_resource.hpp
@@ -77,7 +77,7 @@ class simulated_memory_resource final : public device_memory_resource {
    * @param bytes The size, in bytes, of the allocation
    * @return void* Pointer to the newly allocated memory
    */
-  void* do_allocate(std::size_t bytes, cuda_stream_view /*stream*/) override
+  void* do_allocate(std::size_t bytes, cuda_stream_view) override
   {
     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
     RMM_EXPECTS(begin_ + bytes <= end_, rmm::bad_alloc, "Simulated memory size exceeded");
@@ -95,7 +95,7 @@ class simulated_memory_resource final : public device_memory_resource {
    *
    * @param ptr Pointer to be deallocated
    */
-  void do_deallocate(void* ptr, std::size_t /*bytes*/, cuda_stream_view /*stream*/) override {}
+  void do_deallocate(void* ptr, std::size_t, cuda_stream_view) override {}
 
   /**
    * @brief Get free and available memory for memory resource.
diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp
index 13eb99ec8..9111a2da3 100644
--- a/include/rmm/mr/device/cuda_async_memory_resource.hpp
+++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp
@@ -165,7 +165,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
    *
    * @param p Pointer to be deallocated
    */
-  void do_deallocate(void* ptr, std::size_t /*bytes*/, rmm::cuda_stream_view stream) override
+  void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view stream) override
   {
 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
     if (ptr != nullptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeAsync(ptr, stream.value())); }
@@ -197,7 +197,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
    * @return std::pair contaiing free_size and total_size of memory
    */
   [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
-    rmm::cuda_stream_view /*stream*/) const override
+    rmm::cuda_stream_view) const override
   {
     return std::make_pair(0, 0);
   }
diff --git a/include/rmm/mr/device/cuda_memory_resource.hpp b/include/rmm/mr/device/cuda_memory_resource.hpp
index 59a729297..b5b3d87df 100644
--- a/include/rmm/mr/device/cuda_memory_resource.hpp
+++ b/include/rmm/mr/device/cuda_memory_resource.hpp
@@ -64,7 +64,7 @@ class cuda_memory_resource final : public device_memory_resource {
    * @param bytes The size, in bytes, of the allocation
    * @return void* Pointer to the newly allocated memory
    */
-  void* do_allocate(std::size_t bytes, cuda_stream_view /*stream*/) override
+  void* do_allocate(std::size_t bytes, cuda_stream_view) override
   {
     void* ptr{nullptr};
     RMM_CUDA_TRY(cudaMalloc(&ptr, bytes), rmm::bad_alloc);
@@ -80,7 +80,7 @@ class cuda_memory_resource final : public device_memory_resource {
    *
    * @param p Pointer to be deallocated
    */
-  void do_deallocate(void* ptr, std::size_t /*bytes*/, cuda_stream_view /*stream*/) override
+  void do_deallocate(void* ptr, std::size_t, cuda_stream_view) override
   {
     RMM_ASSERT_CUDA_SUCCESS(cudaFree(ptr));
   }
@@ -109,8 +109,7 @@ class cuda_memory_resource final : public device_memory_resource {
    *
    * @return std::pair contaiing free_size and total_size of memory
    */
-  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
-    cuda_stream_view /*stream*/) const override
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view) const override
   {
     std::size_t free_size{};
     std::size_t total_size{};
diff --git a/include/rmm/mr/device/managed_memory_resource.hpp b/include/rmm/mr/device/managed_memory_resource.hpp
index 3ed44a528..7cce644be 100644
--- a/include/rmm/mr/device/managed_memory_resource.hpp
+++ b/include/rmm/mr/device/managed_memory_resource.hpp
@@ -64,7 +64,7 @@ class managed_memory_resource final : public device_memory_resource {
    * @param bytes The size, in bytes, of the allocation
    * @return void* Pointer to the newly allocated memory
    */
-  void* do_allocate(std::size_t bytes, cuda_stream_view /*stream*/) override
+  void* do_allocate(std::size_t bytes, cuda_stream_view) override
   {
     // FIXME: Unlike cudaMalloc, cudaMallocManaged will throw an error for 0
     // size allocations.
@@ -84,7 +84,7 @@ class managed_memory_resource final : public device_memory_resource {
    *
    * @param ptr Pointer to be deallocated
    */
-  void do_deallocate(void* ptr, std::size_t /*bytes*/, cuda_stream_view /*stream*/) override
+  void do_deallocate(void* ptr, std::size_t, cuda_stream_view) override
   {
     RMM_ASSERT_CUDA_SUCCESS(cudaFree(ptr));
   }
diff --git a/include/rmm/mr/device/owning_wrapper.hpp b/include/rmm/mr/device/owning_wrapper.hpp
index 7bcc3b2fa..17a5b4565 100644
--- a/include/rmm/mr/device/owning_wrapper.hpp
+++ b/include/rmm/mr/device/owning_wrapper.hpp
@@ -27,7 +27,7 @@ namespace detail {
 /// Converts a tuple into a parameter pack
 template <typename Resource, typename UpstreamTuple, std::size_t... Indices, typename... Args>
 auto make_resource_impl(UpstreamTuple const& upstreams,
-                        std::index_sequence<Indices...> /*indices*/,
+                        std::index_sequence<Indices...>,
                         Args&&... args)
 {
   return std::make_unique<Resource>(std::get<Indices>(upstreams).get()...,

From a859235112187616f0edc2f5c2fd91df87d3715c Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 9 Sep 2021 11:02:54 +1000
Subject: [PATCH 56/72] Use C++17 for all of Cython

---
 python/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/setup.py b/python/setup.py
index e05a73e4b..8101ba2ef 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -155,7 +155,7 @@ def get_cuda_version_from_header(cuda_include_dir):
             ],
             libraries=["cuda", "cudart"],
             language="c++",
-            extra_compile_args=["-std=c++14"],
+            extra_compile_args=["-std=c++17"],
         )
     ],
     nthreads=nthreads,
@@ -178,7 +178,7 @@ def get_cuda_version_from_header(cuda_include_dir):
             ],
             libraries=["cuda", "cudart"],
             language="c++",
-            extra_compile_args=["-std=c++14"],
+            extra_compile_args=["-std=c++17"],
         )
     ],
     nthreads=nthreads,

From 0ac66e10ac12fc29d76eb85a696b9cf0442b1629 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 9 Sep 2021 11:03:28 +1000
Subject: [PATCH 57/72] Avoid bounds check exception in `stack_trace`

---
 include/rmm/detail/stack_trace.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/rmm/detail/stack_trace.hpp b/include/rmm/detail/stack_trace.hpp
index 2b83aa7cf..1f76af0a4 100644
--- a/include/rmm/detail/stack_trace.hpp
+++ b/include/rmm/detail/stack_trace.hpp
@@ -53,7 +53,7 @@ class stack_trace {
     const int MaxStackDepth = 64;
     std::array<void*, MaxStackDepth> stack{};
     auto const depth = backtrace(stack.begin(), MaxStackDepth);
-    stack_ptrs.insert(stack_ptrs.end(), stack.begin(), &stack.at(depth));
+    stack_ptrs.insert(stack_ptrs.end(), stack.begin(), stack.begin() + depth);
 #endif  // RMM_ENABLE_STACK_TRACES
   }
 

From fe1d70de9fbdb4e3b3d1ef4f89cfba8bae94c2ca Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 9 Sep 2021 11:03:50 +1000
Subject: [PATCH 58/72] variable name

---
 .../multi_stream_allocations_bench.cu                        | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
index 9b6210a2d..dbcea2a45 100644
--- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
+++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
@@ -117,9 +117,9 @@ inline auto make_binning()
   return mr;
 }
 
-static void benchmark_range(benchmark::internal::Benchmark* b)
+static void benchmark_range(benchmark::internal::Benchmark* bench)
 {
-  b  //
+  bench  //
     ->RangeMultiplier(2)
     ->Ranges({{1, 4}, {4, 4}, {false, true}})
     ->Unit(benchmark::kMicrosecond);
@@ -177,6 +177,7 @@ void declare_benchmark(std::string const& name)
   std::cout << "Error: invalid memory_resource name: " << name << std::endl;
 }
 
+// NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
 void run_profile(std::string const& resource_name, int kernel_count, int stream_count, bool prewarm)
 {
   auto mr_factory  = get_mr_factory(resource_name);

From fbf1c9c27c3148f87bc9bfae5392863a1cb324d8 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 9 Sep 2021 11:12:51 +1000
Subject: [PATCH 59/72] Revert magic numbers

---
 benchmarks/device_uvector/device_uvector_bench.cu | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu
index 6665ccaa8..7e73451e6 100644
--- a/benchmarks/device_uvector/device_uvector_bench.cu
+++ b/benchmarks/device_uvector/device_uvector_bench.cu
@@ -40,13 +40,9 @@ static void BM_UvectorSizeConstruction(benchmark::State& state)
   rmm::mr::set_current_device_resource(nullptr);
 }
 
-const auto range_multiplier{10};
-const auto range_start{10'000};
-const auto range_end{1'000'000'000};
-
 BENCHMARK(BM_UvectorSizeConstruction)
-  ->RangeMultiplier(range_multiplier)
-  ->Range(range_start, range_end)
+  ->RangeMultiplier(10)           // NOLINT
+  ->Range(10'000, 1'000'000'000)  // NOLINT
   ->Unit(benchmark::kMicrosecond);
 
 static void BM_ThrustVectorSizeConstruction(benchmark::State& state)
@@ -66,8 +62,8 @@ static void BM_ThrustVectorSizeConstruction(benchmark::State& state)
 }
 
 BENCHMARK(BM_ThrustVectorSizeConstruction)
-  ->RangeMultiplier(range_multiplier)
-  ->Range(range_start, range_end)
+  ->RangeMultiplier(10)           // NOLINT
+  ->Range(10'000, 1'000'000'000)  // NOLINT
   ->Unit(benchmark::kMicrosecond);
 
 BENCHMARK_MAIN();

From 8d743a8e34d11a3c6e3fc140fc182b32e20eca65 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 14 Sep 2021 15:21:43 +1000
Subject: [PATCH 60/72] Fix merge problems

---
 include/rmm/logger.hpp                        |  13 +-
 .../rmm/mr/device/arena_memory_resource.hpp   | 151 ++++----
 include/rmm/mr/device/detail/arena.hpp        | 350 +++++++++---------
 .../rmm/mr/host/pinned_memory_resource.hpp    |   6 +-
 4 files changed, 270 insertions(+), 250 deletions(-)

diff --git a/include/rmm/logger.hpp b/include/rmm/logger.hpp
index b000b5ef0..43d4b6ea3 100644
--- a/include/rmm/logger.hpp
+++ b/include/rmm/logger.hpp
@@ -75,16 +75,17 @@ struct logger_wrapper {
 struct bytes {
   std::size_t value;
 
-  friend std::ostream& operator<<(std::ostream& os, bytes const& b)
+  friend std::ostream& operator<<(std::ostream& os, bytes const& value)
   {
-    std::string const units[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"};
-    int i                     = 0;
-    auto size                 = static_cast<double>(b.value);
+    static std::array<std::string, 9> const units{
+      "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"};
+    int index = 0;
+    auto size = static_cast<double>(value.value);
     while (size > 1024) {
       size /= 1024;
-      i++;
+      index++;
     }
-    return os << size << ' ' << units[i];
+    return os << size << ' ' << units.at(index);
   }
 };
 
diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp
index 28376142c..ce8737225 100644
--- a/include/rmm/mr/device/arena_memory_resource.hpp
+++ b/include/rmm/mr/device/arena_memory_resource.hpp
@@ -16,17 +16,20 @@
 #pragma once
 
 #include <rmm/detail/error.hpp>
+#include <rmm/logger.hpp>
 #include <rmm/mr/device/detail/arena.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
 #include <cuda_runtime_api.h>
 
+#include <spdlog/common.h>
+#include <spdlog/fmt/bundled/ostream.h>
+
 #include <cstddef>
 #include <map>
 #include <shared_mutex>
 
-namespace rmm {
-namespace mr {
+namespace rmm::mr {
 
 /**
  * @brief A suballocator that emphasizes fragmentation avoidance and scalable concurrency support.
@@ -87,14 +90,23 @@ class arena_memory_resource final : public device_memory_resource {
    */
   explicit arena_memory_resource(Upstream* upstream_mr,
                                  std::size_t initial_size = global_arena::default_initial_size,
-                                 std::size_t maximum_size = global_arena::default_maximum_size)
-    : global_arena_{upstream_mr, initial_size, maximum_size}
+                                 std::size_t maximum_size = global_arena::default_maximum_size,
+                                 bool dump_log_on_failure = false)
+    : global_arena_{upstream_mr, initial_size, maximum_size},
+      dump_log_on_failure_{dump_log_on_failure}
   {
+    if (dump_log_on_failure_) {
+      logger_ = spdlog::basic_logger_mt("arena_memory_dump", "rmm_arena_memory_dump.log");
+    }
   }
 
+  ~arena_memory_resource() override = default;
+
   // Disable copy (and move) semantics.
   arena_memory_resource(arena_memory_resource const&) = delete;
   arena_memory_resource& operator=(arena_memory_resource const&) = delete;
+  arena_memory_resource(arena_memory_resource&&) noexcept        = delete;
+  arena_memory_resource& operator=(arena_memory_resource&&) noexcept = delete;
 
   /**
    * @brief Queries whether the resource supports use of non-null CUDA streams for
@@ -130,69 +142,54 @@ class arena_memory_resource final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    if (bytes <= 0) return nullptr;
+    if (bytes <= 0) { return nullptr; }
 
-    bytes = detail::arena::align_up(bytes);
-    return get_arena(stream).allocate(bytes);
+    bytes         = detail::arena::align_up(bytes);
+    auto& arena   = get_arena(stream);
+    void* pointer = arena.allocate(bytes);
+
+    if (pointer == nullptr) {
+      write_lock lock(mtx_);
+      defragment();
+      pointer = arena.allocate(bytes);
+      if (pointer == nullptr) {
+        if (dump_log_on_failure_) { dump_memory_log(bytes); }
+        RMM_FAIL("Maximum pool size exceeded", rmm::bad_alloc);
+      }
+    }
+
+    return pointer;
   }
 
   /**
-   * @brief Deallocate memory pointed to by `p`.
+   * @brief Deallocate memory pointed to by `ptr`.
    *
-   * @param p Pointer to be deallocated.
+   * @param ptr Pointer to be deallocated.
    * @param bytes The size in bytes of the allocation. This must be equal to the
    * value of `bytes` that was passed to the `allocate` call that returned `p`.
    * @param stream Stream on which to perform deallocation.
    */
-  void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override
+  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    if (p == nullptr || bytes <= 0) return;
+    if (ptr == nullptr || bytes <= 0) { return; }
 
     bytes = detail::arena::align_up(bytes);
-#ifdef RMM_POOL_TRACK_ALLOCATIONS
-    if (!get_arena(stream).deallocate(p, bytes, stream)) {
-      deallocate_from_other_arena(p, bytes, stream);
-    }
-#else
-    get_arena(stream).deallocate(p, bytes, stream);
-#endif
+    get_arena(stream).deallocate(ptr, bytes, stream);
   }
 
-#ifdef RMM_POOL_TRACK_ALLOCATIONS
   /**
-   * @brief Deallocate memory pointed to by `p` that was allocated in a different arena.
-   *
-   * @param p Pointer to be deallocated.
-   * @param bytes The size in bytes of the allocation. This must be equal to the
-   * value of `bytes` that was passed to the `allocate` call that returned `p`.
-   * @param stream Stream on which to perform deallocation.
+   * @brief Defragment memory by returning all free blocks to the global arena.
    */
-  void deallocate_from_other_arena(void* p, std::size_t bytes, cuda_stream_view stream)
+  void defragment()
   {
-    stream.synchronize_no_throw();
-
-    read_lock lock(mtx_);
-
-    if (use_per_thread_arena(stream)) {
-      auto const id = std::this_thread::get_id();
-      for (auto& kv : thread_arenas_) {
-        // If the arena does not belong to the current thread, try to deallocate from it, and return
-        // if successful.
-        if (kv.first != id && kv.second->deallocate(p, bytes)) return;
-      }
-    } else {
-      for (auto& kv : stream_arenas_) {
-        // If the arena does not belong to the current stream, try to deallocate from it, and return
-        // if successful.
-        if (stream != kv.first && kv.second.deallocate(p, bytes)) return;
-      }
+    RMM_CUDA_TRY(cudaDeviceSynchronize());
+    for (auto& thread_arena : thread_arenas_) {
+      thread_arena.second->clean();
+    }
+    for (auto& stream_arena : stream_arenas_) {
+      stream_arena.second.clean();
     }
-
-    // The thread that originally allocated the block has terminated, deallocate directly in the
-    // global arena.
-    global_arena_.deallocate({p, bytes});
   }
-#endif
 
   /**
    * @brief Get the arena associated with the current thread or the given stream.
@@ -202,11 +199,8 @@ class arena_memory_resource final : public device_memory_resource {
    */
   arena& get_arena(cuda_stream_view stream)
   {
-    if (use_per_thread_arena(stream)) {
-      return get_thread_arena();
-    } else {
-      return get_stream_arena(stream);
-    }
+    if (use_per_thread_arena(stream)) { return get_thread_arena(); }
+    return get_stream_arena(stream);
   }
 
   /**
@@ -216,18 +210,18 @@ class arena_memory_resource final : public device_memory_resource {
    */
   arena& get_thread_arena()
   {
-    auto const id = std::this_thread::get_id();
+    auto const thread_id = std::this_thread::get_id();
     {
       read_lock lock(mtx_);
-      auto const it = thread_arenas_.find(id);
-      if (it != thread_arenas_.end()) { return *it->second; }
+      auto const iter = thread_arenas_.find(thread_id);
+      if (iter != thread_arenas_.end()) { return *iter->second; }
     }
     {
       write_lock lock(mtx_);
-      auto a = std::make_shared<arena>(global_arena_);
-      thread_arenas_.emplace(id, a);
-      thread_local detail::arena::arena_cleaner<Upstream> cleaner{a};
-      return *a;
+      auto thread_arena = std::make_shared<arena>(global_arena_);
+      thread_arenas_.emplace(thread_id, thread_arena);
+      thread_local detail::arena::arena_cleaner<Upstream> cleaner{thread_arena};
+      return *thread_arena;
     }
   }
 
@@ -241,8 +235,8 @@ class arena_memory_resource final : public device_memory_resource {
     RMM_LOGGING_ASSERT(!use_per_thread_arena(stream));
     {
       read_lock lock(mtx_);
-      auto const it = stream_arenas_.find(stream.value());
-      if (it != stream_arenas_.end()) { return it->second; }
+      auto const iter = stream_arenas_.find(stream.value());
+      if (iter != stream_arenas_.end()) { return iter->second; }
     }
     {
       write_lock lock(mtx_);
@@ -262,6 +256,32 @@ class arena_memory_resource final : public device_memory_resource {
     return std::make_pair(0, 0);
   }
 
+  /**
+   * Dump memory to log.
+   *
+   * @param bytes the number of bytes requested for allocation
+   */
+  void dump_memory_log(size_t bytes)
+  {
+    logger_->info("**************************************************");
+    logger_->info("Ran out of memory trying to allocate {}.", rmm::detail::bytes{bytes});
+    logger_->info("**************************************************");
+    logger_->info("Global arena:");
+    global_arena_.dump_memory_log(logger_);
+    logger_->info("Per-thread arenas:");
+    for (auto const& thread_arena : thread_arenas_) {
+      logger_->info("  Thread {}:", thread_arena.first);
+      thread_arena.second->dump_memory_log(logger_);
+    }
+    if (!stream_arenas_.empty()) {
+      logger_->info("Per-stream arenas:");
+      for (auto const& stream_arena : stream_arenas_) {
+        logger_->info("  Stream {}:", static_cast<void*>(stream_arena.first));
+        stream_arena.second.dump_memory_log(logger_);
+      }
+    }
+  }
+
   /**
    * @brief Should a per-thread arena be used given the CUDA stream.
    *
@@ -281,9 +301,12 @@ class arena_memory_resource final : public device_memory_resource {
   /// Arenas for non-default streams, one per stream.
   /// Implementation note: for small sizes, map is more efficient than unordered_map.
   std::map<cudaStream_t, arena> stream_arenas_;
+  /// If true, dump memory information to log on allocation failure.
+  bool dump_log_on_failure_;
+  /// The logger for memory dump.
+  std::shared_ptr<spdlog::logger> logger_{};
   /// Mutex for read and write locks.
   mutable std::shared_timed_mutex mtx_;
 };
 
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr
diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp
index 7a449949c..6cfe94058 100644
--- a/include/rmm/mr/device/detail/arena.hpp
+++ b/include/rmm/mr/device/detail/arena.hpp
@@ -19,24 +19,26 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/aligned.hpp>
 #include <rmm/detail/error.hpp>
+#include <rmm/logger.hpp>
 
 #include <cuda_runtime_api.h>
 
+#include <spdlog/common.h>
+#include <spdlog/fmt/bundled/ostream.h>
+
 #include <algorithm>
 #include <cstddef>
 #include <limits>
 #include <memory>
 #include <mutex>
+#include <numeric>
 #include <set>
 #include <unordered_map>
 
-namespace rmm {
-namespace mr {
-namespace detail {
-namespace arena {
+namespace rmm::mr::detail::arena {
 
 /// Minimum size of a superblock (256 KiB).
-constexpr std::size_t minimum_superblock_size = 1u << 18u;
+constexpr std::size_t minimum_superblock_size = 1U << 18U;
 
 /**
  * @brief Represents a chunk of memory that can be allocated and deallocated.
@@ -67,16 +69,16 @@ class block {
   block(void* pointer, std::size_t size) : pointer_(static_cast<char*>(pointer)), size_(size) {}
 
   /// Returns the underlying pointer.
-  void* pointer() const { return pointer_; }
+  [[nodiscard]] void* pointer() const { return pointer_; }
 
   /// Returns the size of the block.
-  std::size_t size() const { return size_; }
+  [[nodiscard]] std::size_t size() const { return size_; }
 
   /// Returns true if this block is valid (non-null), false otherwise.
-  bool is_valid() const { return pointer_ != nullptr; }
+  [[nodiscard]] bool is_valid() const { return pointer_ != nullptr; }
 
   /// Returns true if this block is a superblock, false otherwise.
-  bool is_superblock() const { return size_ >= minimum_superblock_size; }
+  [[nodiscard]] bool is_superblock() const { return size_ >= minimum_superblock_size; }
 
   /**
    * @brief Verifies whether this block can be merged to the beginning of block b.
@@ -85,30 +87,32 @@ class block {
    * @return true Returns true if this block's `pointer` + `size` == `b.ptr`, and `not b.is_head`,
                   false otherwise.
    */
-  bool is_contiguous_before(block const& b) const { return pointer_ + size_ == b.pointer_; }
+  [[nodiscard]] bool is_contiguous_before(block const& blk) const
+  {
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+    return pointer_ + size_ == blk.pointer_;
+  }
 
   /**
    * @brief Is this block large enough to fit `sz` bytes?
    *
-   * @param sz The size in bytes to check for fit.
+   * @param size The size in bytes to check for fit.
    * @return true if this block is at least `sz` bytes.
    */
-  bool fits(std::size_t sz) const { return size_ >= sz; }
+  [[nodiscard]] bool fits(std::size_t size) const { return size_ >= size; }
 
   /**
    * @brief Split this block into two by the given size.
    *
-   * @param sz The size in bytes of the first block.
+   * @param size The size in bytes of the first block.
    * @return std::pair<block, block> A pair of blocks split by sz.
    */
-  std::pair<block, block> split(std::size_t sz) const
+  [[nodiscard]] std::pair<block, block> split(std::size_t size) const
   {
     RMM_LOGGING_ASSERT(size_ >= sz);
-    if (size_ > sz) {
-      return {{pointer_, sz}, {pointer_ + sz, size_ - sz}};
-    } else {
-      return {*this, {}};
-    }
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+    if (size_ > size) { return {{pointer_, size}, {pointer_ + size, size_ - size}}; }
+    return {*this, {}};
   }
 
   /**
@@ -119,29 +123,31 @@ class block {
    * @param b block to merge.
    * @return block The merged block.
    */
-  block merge(block const& b) const
+  [[nodiscard]] block merge(block const& blk) const
   {
     RMM_LOGGING_ASSERT(is_contiguous_before(b));
-    return {pointer_, size_ + b.size_};
+    return {pointer_, size_ + blk.size_};
   }
 
   /// Used by std::set to compare blocks.
-  bool operator<(block const& b) const { return pointer_ < b.pointer_; }
+  bool operator<(block const& blk) const { return pointer_ < blk.pointer_; }
 
  private:
   char* pointer_{};     ///< Raw memory pointer.
   std::size_t size_{};  ///< Size in bytes.
 };
 
+inline bool block_size_compare(block lhs, block rhs) { return lhs.size() < rhs.size(); }
+
 /**
  * @brief Align up to the allocation alignment.
  *
  * @param[in] v value to align
  * @return Return the aligned value
  */
-constexpr std::size_t align_up(std::size_t v) noexcept
+constexpr std::size_t align_up(std::size_t value) noexcept
 {
-  return rmm::detail::align_up(v, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
+  return rmm::detail::align_up(value, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
 }
 
 /**
@@ -150,9 +156,9 @@ constexpr std::size_t align_up(std::size_t v) noexcept
  * @param[in] v value to align
  * @return Return the aligned value
  */
-constexpr std::size_t align_down(std::size_t v) noexcept
+constexpr std::size_t align_down(std::size_t value) noexcept
 {
-  return rmm::detail::align_down(v, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
+  return rmm::detail::align_down(value, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
 }
 
 /**
@@ -172,24 +178,20 @@ constexpr std::size_t align_down(std::size_t v) noexcept
 inline block first_fit(std::set<block>& free_blocks, std::size_t size)
 {
   auto const iter = std::find_if(
-    free_blocks.cbegin(), free_blocks.cend(), [size](auto const& b) { return b.fits(size); });
-
-  if (iter == free_blocks.cend()) {
-    return {};
-  } else {
-    // Remove the block from the free_list.
-    auto const b = *iter;
-    auto const i = free_blocks.erase(iter);
-
-    if (b.size() > size) {
-      // Split the block and put the remainder back.
-      auto const split = b.split(size);
-      free_blocks.insert(i, split.second);
-      return split.first;
-    } else {
-      return b;
-    }
+    free_blocks.cbegin(), free_blocks.cend(), [size](auto const& blk) { return blk.fits(size); });
+
+  if (iter == free_blocks.cend()) { return {}; }
+  // Remove the block from the free_list.
+  auto const blk  = *iter;
+  auto const next = free_blocks.erase(iter);
+
+  if (blk.size() > size) {
+    // Split the block and put the remainder back.
+    auto const split = blk.split(size);
+    free_blocks.insert(next, split.second);
+    return split.first;
   }
+  return blk;
 }
 
 /**
@@ -199,39 +201,47 @@ inline block first_fit(std::set<block>& free_blocks, std::size_t size)
  * @param b The block to coalesce.
  * @return block The coalesced block.
  */
-inline block coalesce_block(std::set<block>& free_blocks, block const& b)
+inline block coalesce_block(std::set<block>& free_blocks, block const& blk)
 {
-  if (!b.is_valid()) return b;
+  if (!blk.is_valid()) { return blk; }
 
   // Find the right place (in ascending address order) to insert the block.
-  auto const next     = free_blocks.lower_bound(b);
+  auto const next     = free_blocks.lower_bound(blk);
   auto const previous = next == free_blocks.cbegin() ? next : std::prev(next);
 
   // Coalesce with neighboring blocks.
-  bool const merge_prev = previous->is_contiguous_before(b);
-  bool const merge_next = next != free_blocks.cend() && b.is_contiguous_before(*next);
+  bool const merge_prev = previous->is_contiguous_before(blk);
+  bool const merge_next = next != free_blocks.cend() && blk.is_contiguous_before(*next);
 
   block merged{};
   if (merge_prev && merge_next) {
-    merged = previous->merge(b).merge(*next);
+    merged = previous->merge(blk).merge(*next);
     free_blocks.erase(previous);
-    auto const i = free_blocks.erase(next);
-    free_blocks.insert(i, merged);
+    auto const iter = free_blocks.erase(next);
+    free_blocks.insert(iter, merged);
   } else if (merge_prev) {
-    merged       = previous->merge(b);
-    auto const i = free_blocks.erase(previous);
-    free_blocks.insert(i, merged);
+    merged          = previous->merge(blk);
+    auto const iter = free_blocks.erase(previous);
+    free_blocks.insert(iter, merged);
   } else if (merge_next) {
-    merged       = b.merge(*next);
-    auto const i = free_blocks.erase(next);
-    free_blocks.insert(i, merged);
+    merged          = blk.merge(*next);
+    auto const iter = free_blocks.erase(next);
+    free_blocks.insert(iter, merged);
   } else {
-    free_blocks.emplace(b);
-    merged = b;
+    free_blocks.emplace(blk);
+    merged = blk;
   }
   return merged;
 }
 
+template <typename T>
+inline auto total_block_size(T const& blocks)
+{
+  return std::accumulate(blocks.cbegin(), blocks.cend(), std::size_t{}, [](auto lhs, auto rhs) {
+    return lhs + rhs.size();
+  });
+}
+
 /**
  * @brief The global arena for allocating memory from the upstream memory resource.
  *
@@ -248,7 +258,7 @@ class global_arena final {
   /// The default maximum size for the global arena.
   static constexpr std::size_t default_maximum_size = std::numeric_limits<std::size_t>::max();
   /// Reserved memory that should not be allocated (64 MiB).
-  static constexpr std::size_t reserved_size = 1u << 26u;
+  static constexpr std::size_t reserved_size = 1U << 26U;
 
   /**
    * @brief Construct a global arena.
@@ -275,7 +285,8 @@ class global_arena final {
                 "Error, Maximum arena size required to be a multiple of 256 bytes");
 
     if (initial_size == default_initial_size || maximum_size == default_maximum_size) {
-      std::size_t free{}, total{};
+      std::size_t free{};
+      std::size_t total{};
       RMM_CUDA_TRY(cudaMemGetInfo(&free, &total));
       if (initial_size == default_initial_size) {
         initial_size = align_up(std::min(free, total / 2));
@@ -290,8 +301,10 @@ class global_arena final {
   }
 
   // Disable copy (and move) semantics.
-  global_arena(const global_arena&) = delete;
-  global_arena& operator=(const global_arena&) = delete;
+  global_arena(global_arena const&) = delete;
+  global_arena& operator=(global_arena const&) = delete;
+  global_arena(global_arena&&) noexcept        = delete;
+  global_arena& operator=(global_arena&&) noexcept = delete;
 
   /**
    * @brief Destroy the global arena and deallocate all memory it allocated using the upstream
@@ -300,8 +313,8 @@ class global_arena final {
   ~global_arena()
   {
     lock_guard lock(mtx_);
-    for (auto const& b : upstream_blocks_) {
-      upstream_mr_->deallocate(b.pointer(), b.size());
+    for (auto const& blk : upstream_blocks_) {
+      upstream_mr_->deallocate(blk.pointer(), blk.size());
     }
   }
 
@@ -320,16 +333,14 @@ class global_arena final {
   }
 
   /**
-   * @brief Deallocate memory pointed to by `p`.
+   * @brief Deallocate memory pointed to by `blk`.
    *
-   * @param p Pointer to be deallocated.
-   * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes`
-   * that was passed to the `allocate` call that returned `p`.
+   * @param blk Block to be deallocated.
    */
-  void deallocate(block const& b)
+  void deallocate(block const& blk)
   {
     lock_guard lock(mtx_);
-    coalesce_block(free_blocks_, b);
+    coalesce_block(free_blocks_, blk);
   }
 
   /**
@@ -340,11 +351,37 @@ class global_arena final {
   void deallocate(std::set<block> const& free_blocks)
   {
     lock_guard lock(mtx_);
-    for (auto const& b : free_blocks) {
-      coalesce_block(free_blocks_, b);
+    for (auto const& blk : free_blocks) {
+      coalesce_block(free_blocks_, blk);
     }
   }
 
+  /**
+   * @brief Dump memory to log.
+   *
+   * @param logger the spdlog logger to use
+   */
+  void dump_memory_log(std::shared_ptr<spdlog::logger> const& logger) const
+  {
+    lock_guard lock(mtx_);
+
+    logger->info("  Maximum size: {}", rmm::detail::bytes{maximum_size_});
+    logger->info("  Current size: {}", rmm::detail::bytes{current_size_});
+
+    logger->info("  # free blocks: {}", free_blocks_.size());
+    if (!free_blocks_.empty()) {
+      logger->info("  Total size of free blocks: {}",
+                   rmm::detail::bytes{total_block_size(free_blocks_)});
+      auto const largest_free =
+        *std::max_element(free_blocks_.begin(), free_blocks_.end(), block_size_compare);
+      logger->info("  Size of largest free block: {}", rmm::detail::bytes{largest_free.size()});
+    }
+
+    logger->info("  # upstream blocks={}", upstream_blocks_.size());
+    logger->info("  Total size of upstream blocks: {}",
+                 rmm::detail::bytes{total_block_size(upstream_blocks_)});
+  }
+
  private:
   using lock_guard = std::lock_guard<std::mutex>;
 
@@ -357,8 +394,8 @@ class global_arena final {
   block get_block(std::size_t size)
   {
     // Find the first-fit free block.
-    auto const b = first_fit(free_blocks_, size);
-    if (b.is_valid()) return b;
+    auto const blk = first_fit(free_blocks_, size);
+    if (blk.is_valid()) { return blk; }
 
     // No existing larger blocks available, so grow the arena.
     auto const upstream_block = expand_arena(size_to_grow(size));
@@ -372,13 +409,11 @@ class global_arena final {
    * This simply grows the global arena to the maximum size.
    *
    * @param size The number of bytes required.
-   * @return size The size for the arena to grow.
+   * @return size The size for the arena to grow, or 0 if no more memory.
    */
   constexpr std::size_t size_to_grow(std::size_t size) const
   {
-    if (current_size_ + size > maximum_size_) {
-      RMM_FAIL("Maximum pool size exceeded", rmm::bad_alloc);
-    }
+    if (current_size_ + size > maximum_size_) { return 0; }
     return maximum_size_ - current_size_;
   }
 
@@ -390,9 +425,12 @@ class global_arena final {
    */
   block expand_arena(std::size_t size)
   {
-    upstream_blocks_.push_back({upstream_mr_->allocate(size), size});
-    current_size_ += size;
-    return upstream_blocks_.back();
+    if (size > 0) {
+      upstream_blocks_.push_back({upstream_mr_->allocate(size), size});
+      current_size_ += size;
+      return upstream_blocks_.back();
+    }
+    return {};
   }
 
   /// The upstream resource to allocate memory from.
@@ -428,9 +466,13 @@ class arena {
    */
   explicit arena(global_arena<Upstream>& global_arena) : global_arena_{global_arena} {}
 
+  ~arena() = default;
+
   // Disable copy (and move) semantics.
-  arena(const arena&) = delete;
-  arena& operator=(const arena&) = delete;
+  arena(arena const&) = delete;
+  arena& operator=(arena const&) = delete;
+  arena(arena&&) noexcept        = delete;
+  arena& operator=(arena&&) noexcept = delete;
 
   /**
    * @brief Allocates memory of size at least `bytes`.
@@ -443,101 +485,86 @@ class arena {
   void* allocate(std::size_t bytes)
   {
     lock_guard lock(mtx_);
-    auto const b = get_block(bytes);
-#ifdef RMM_POOL_TRACK_ALLOCATIONS
-    allocated_blocks_.emplace(b.pointer(), b);
-#endif
-    return b.pointer();
+    auto const blk = get_block(bytes);
+    return blk.pointer();
   }
 
   /**
-   * @brief Deallocate memory pointed to by `p`, and possibly return superblocks to upstream.
+   * @brief Deallocate memory pointed to by `ptr`, and possibly return superblocks to upstream.
    *
-   * @param p Pointer to be deallocated.
+   * @param ptr Pointer to be deallocated.
    * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes`
    * that was passed to the `allocate` call that returned `p`.
    * @param stream Stream on which to perform deallocation.
-   * @return true if the allocation is found, false otherwise.
    */
-  bool deallocate(void* p, std::size_t bytes, cuda_stream_view stream)
+  void deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream)
   {
     lock_guard lock(mtx_);
-#ifdef RMM_POOL_TRACK_ALLOCATIONS
-    auto const b = free_block(p, bytes);
-#else
-    block const b{p, bytes};
-#endif
-    if (b.is_valid()) {
-      auto const merged = coalesce_block(free_blocks_, b);
-      shrink_arena(merged, stream);
-    }
-    return b.is_valid();
+    block const blk{ptr, bytes};
+    auto const merged = coalesce_block(free_blocks_, blk);
+    shrink_arena(merged, stream);
   }
 
-#ifdef RMM_POOL_TRACK_ALLOCATIONS
   /**
-   * @brief Deallocate memory pointed to by `p`, keeping all free superblocks.
-   *
-   * This is done when deallocating from another arena. Since we don't have access to the CUDA
-   * stream associated with this arena, we don't coalesce the freed block and return it directly to
-   * the global arena.
-   *
-   * @param p Pointer to be deallocated.
-   * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes`
-   * that was passed to the `allocate` call that returned `p`.
-   * @return true if the allocation is found, false otherwise.
+   * @brief Clean the arena and deallocate free blocks from the global arena.
    */
-  bool deallocate(void* p, std::size_t bytes)
+  void clean()
   {
     lock_guard lock(mtx_);
-    auto const b = free_block(p, bytes);
-    if (b.is_valid()) { global_arena_.deallocate(b); }
-    return b.is_valid();
+    global_arena_.deallocate(free_blocks_);
+    free_blocks_.clear();
   }
-#endif
 
   /**
-   * @brief Clean the arena and deallocate free blocks from the global arena.
+   * Dump memory to log.
    *
-   * This is only needed when a per-thread arena is about to die.
+   * @param logger the spdlog logger to use
    */
-  void clean()
+  void dump_memory_log(std::shared_ptr<spdlog::logger> const& logger) const
   {
     lock_guard lock(mtx_);
-    global_arena_.deallocate(free_blocks_);
-    free_blocks_.clear();
-#ifdef RMM_POOL_TRACK_ALLOCATIONS
-    allocated_blocks_.clear();
-#endif
+    logger->info("    # free blocks: {}", free_blocks_.size());
+    if (!free_blocks_.empty()) {
+      logger->info("    Total size of free blocks: {}",
+                   rmm::detail::bytes{total_block_size(free_blocks_)});
+      auto const largest_free =
+        *std::max_element(free_blocks_.begin(), free_blocks_.end(), block_size_compare);
+      logger->info("    Size of largest free block: {}", rmm::detail::bytes{largest_free.size()});
+    }
   }
 
  private:
   using lock_guard = std::lock_guard<std::mutex>;
+  /// Maximum number of free blocks to keep.
+  static constexpr int max_free_blocks = 16;
 
   /**
    * @brief Get an available memory block of at least `size` bytes.
    *
    * @param size The number of bytes to allocate.
-   * @return block A block of memory of at least `size` bytes.
+   * @return A block of memory of at least `size` bytes.
    */
   block get_block(std::size_t size)
   {
     if (size < minimum_superblock_size) {
       // Find the first-fit free block.
-      auto const b = first_fit(free_blocks_, size);
-      if (b.is_valid()) { return b; }
+      auto const blk = first_fit(free_blocks_, size);
+      if (blk.is_valid()) { return blk; }
     }
 
     // No existing larger blocks available, so grow the arena and obtain a superblock.
     auto const superblock = expand_arena(size);
-    coalesce_block(free_blocks_, superblock);
-    return first_fit(free_blocks_, size);
+    if (superblock.is_valid()) {
+      coalesce_block(free_blocks_, superblock);
+      return first_fit(free_blocks_, size);
+    }
+    return superblock;
   }
 
   /**
    * @brief Allocate space from upstream to supply the arena and return a superblock.
    *
-   * @return block A superblock.
+   * @return A superblock.
    */
   block expand_arena(std::size_t size)
   {
@@ -545,55 +572,25 @@ class arena {
     return global_arena_.allocate(superblock_size);
   }
 
-#ifdef RMM_POOL_TRACK_ALLOCATIONS
-  /**
-   * @brief Finds, frees and returns the block associated with pointer `p`.
-   *
-   * @param p The pointer to the memory to free.
-   * @param size The size of the memory to free. Must be equal to the original allocation size.
-   * @return The (now freed) block associated with `p`. The caller is expected to return the block
-   * to the arena.
-   */
-  block free_block(void* p, std::size_t size) noexcept
-  {
-    auto const i = allocated_blocks_.find(p);
-
-    // The pointer may be allocated in another arena.
-    if (i == allocated_blocks_.end()) { return {}; }
-
-    auto const found = i->second;
-    RMM_LOGGING_ASSERT(found.size() == size);
-    allocated_blocks_.erase(i);
-
-    return found;
-  }
-#endif
-
   /**
    * @brief Shrink this arena by returning free superblocks to upstream.
    *
-   * @param b The block that can be used to shrink the arena.
+   * @param blk The block that can be used to shrink the arena.
    * @param stream Stream on which to perform shrinking.
    */
-  void shrink_arena(block const& b, cuda_stream_view stream)
+  void shrink_arena(block const& blk, cuda_stream_view stream)
   {
-    // Don't shrink if b is not a superblock.
-    if (!b.is_superblock()) return;
-
-    stream.synchronize_no_throw();
-
-    global_arena_.deallocate(b);
-    free_blocks_.erase(b);
+    if (blk.is_superblock() || free_blocks_.size() > max_free_blocks) {
+      stream.synchronize_no_throw();
+      global_arena_.deallocate(blk);
+      free_blocks_.erase(blk);
+    }
   }
 
   /// The global arena to allocate superblocks from.
   global_arena<Upstream>& global_arena_;
   /// Free blocks.
   std::set<block> free_blocks_;
-#ifdef RMM_POOL_TRACK_ALLOCATIONS
-  //// Map of pointer address to allocated blocks.
-  std::unordered_map<void*, block> allocated_blocks_;
-#endif
   /// Mutex for exclusive lock.
   mutable std::mutex mtx_;
 };
@@ -609,11 +606,13 @@ class arena {
 template <typename Upstream>
 class arena_cleaner {
  public:
-  explicit arena_cleaner(std::shared_ptr<arena<Upstream>> const& a) : arena_(a) {}
+  explicit arena_cleaner(std::shared_ptr<arena<Upstream>> const& arena) : arena_(arena) {}
 
   // Disable copy (and move) semantics.
-  arena_cleaner(const arena_cleaner&) = delete;
-  arena_cleaner& operator=(const arena_cleaner&) = delete;
+  arena_cleaner(arena_cleaner const&) = delete;
+  arena_cleaner& operator=(arena_cleaner const&) = delete;
+  arena_cleaner(arena_cleaner&&) noexcept        = delete;
+  arena_cleaner& operator=(arena_cleaner&&) = delete;
 
   ~arena_cleaner()
   {
@@ -628,7 +627,4 @@ class arena_cleaner {
   std::weak_ptr<arena<Upstream>> arena_;
 };
 
-}  // namespace arena
-}  // namespace detail
-}  // namespace mr
-}  // namespace rmm
+}  // namespace rmm::mr::detail::arena
diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp
index 42be3644f..14b833684 100644
--- a/include/rmm/mr/host/pinned_memory_resource.hpp
+++ b/include/rmm/mr/host/pinned_memory_resource.hpp
@@ -62,10 +62,10 @@ class pinned_memory_resource final : public host_memory_resource {
       (detail::is_supported_alignment(alignment)) ? alignment : detail::RMM_DEFAULT_HOST_ALIGNMENT;
 
     return detail::aligned_allocate(bytes, alignment, [](std::size_t size) {
-      void* p{nullptr};
-      auto status = cudaMallocHost(&p, size);
+      void* ptr{nullptr};
+      auto status = cudaMallocHost(&ptr, size);
       if (cudaSuccess != status) { throw std::bad_alloc{}; }
-      return p;
+      return ptr;
     });
   }
 

From 397962b3d91aa56aed4eafe34214f7c706cfb0e0 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 14 Sep 2021 16:44:56 +1000
Subject: [PATCH 61/72] More tidy warnings found on command line

---
 .clang-tidy                                   |   9 +-
 .../random_allocations/random_allocations.cpp | 110 +++++++++---------
 include/rmm/device_uvector.hpp                |  35 +++---
 .../rmm/mr/device/binning_memory_resource.hpp |   4 +-
 include/rmm/mr/device/detail/free_list.hpp    |  22 ++--
 .../detail/stream_ordered_memory_resource.hpp |  74 ++++++------
 .../mr/device/fixed_size_memory_resource.hpp  |   2 +-
 include/rmm/mr/device/owning_wrapper.hpp      |   4 +-
 .../rmm/mr/device/pool_memory_resource.hpp    |   3 +-
 9 files changed, 144 insertions(+), 119 deletions(-)

diff --git a/.clang-tidy b/.clang-tidy
index 263a2d2d6..04689c330 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -7,9 +7,10 @@ Checks:          'clang-diagnostic-*,
                   performance-*,
                   readability-*,
                   llvm-*,
+                  -cppcoreguidelines-macro-usage,
+                  -llvm-header-guard,
                   -modernize-use-trailing-return-type,
-                  -readability-named-parameter,
-                  -cppcoreguidelines-macro-usage'
+                  -readability-named-parameter'
 WarningsAsErrors: ''
 HeaderFilterRegex: ''
 AnalyzeTemporaryDtors: false
@@ -59,4 +60,8 @@ CheckOptions:
   #  value:           '1'
   - key:             bugprone-easily-swappable-parameters.IgnoredParameterNames
     value:           'alignment'
+  - key:             cppcoreguidelines-avoid-magic-numbers.IgnorePowersOf2IntegerValues
+    value:           '1'
+  - key:             readability-magic-numbers.IgnorePowersOf2IntegerValues
+    value:           '1'
 ...
diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp
index 65abd56b5..828561dd1 100644
--- a/benchmarks/random_allocations/random_allocations.cpp
+++ b/benchmarks/random_allocations/random_allocations.cpp
@@ -284,71 +284,73 @@ static void profile_random_allocations(MRFactoryFunc const& factory,
 
 int main(int argc, char** argv)
 {
-  // benchmark::Initialize will remove GBench command line arguments it
-  // recognizes and leave any remaining arguments
-  ::benchmark::Initialize(&argc, argv);
-
-  // Parse for replay arguments:
-  cxxopts::Options options("RMM Random Allocations Benchmark",
-                           "Benchmarks random allocations within a size range.");
-
-  options.add_options()(
-    "p,profile", "Profiling mode: run once", cxxopts::value<bool>()->default_value("false"));
-  options.add_options()("r,resource",
-                        "Type of device_memory_resource",
-                        cxxopts::value<std::string>()->default_value("pool"));
-  options.add_options()("n,numallocs",
-                        "Number of allocations (default of 0 tests a range)",
-                        cxxopts::value<int>()->default_value("1000"));
-  options.add_options()("m,maxsize",
-                        "Maximum allocation size (default of 0 tests a range)",
-                        cxxopts::value<int>()->default_value("4096"));
-
-  auto args       = options.parse(argc, argv);
-  num_allocations = args["numallocs"].as<int>();
-  max_size        = args["maxsize"].as<int>();
-
-  if (args.count("profile") > 0) {
-    std::map<std::string, MRFactoryFunc> const funcs({{"arena", &make_arena},
-                                                      {"binning", &make_binning},
-                                                      {"cuda", &make_cuda},
+  try {
+    // benchmark::Initialize will remove GBench command line arguments it
+    // recognizes and leave any remaining arguments
+    ::benchmark::Initialize(&argc, argv);
+
+    // Parse for replay arguments:
+    cxxopts::Options options("RMM Random Allocations Benchmark",
+                             "Benchmarks random allocations within a size range.");
+
+    options.add_options()(
+      "p,profile", "Profiling mode: run once", cxxopts::value<bool>()->default_value("false"));
+    options.add_options()("r,resource",
+                          "Type of device_memory_resource",
+                          cxxopts::value<std::string>()->default_value("pool"));
+    options.add_options()("n,numallocs",
+                          "Number of allocations (default of 0 tests a range)",
+                          cxxopts::value<int>()->default_value("1000"));
+    options.add_options()("m,maxsize",
+                          "Maximum allocation size (default of 0 tests a range)",
+                          cxxopts::value<int>()->default_value("4096"));
+
+    auto args       = options.parse(argc, argv);
+    num_allocations = args["numallocs"].as<int>();
+    max_size        = args["maxsize"].as<int>();
+
+    if (args.count("profile") > 0) {
+      std::map<std::string, MRFactoryFunc> const funcs({{"arena", &make_arena},
+                                                        {"binning", &make_binning},
+                                                        {"cuda", &make_cuda},
 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
-                                                      {"cuda_async", &make_cuda_async},
+                                                        {"cuda_async", &make_cuda_async},
 #endif
-                                                      {"pool", &make_pool}});
-    auto resource = args["resource"].as<std::string>();
+                                                        {"pool", &make_pool}});
+      auto resource = args["resource"].as<std::string>();
 
-    std::cout << "Profiling " << resource << " with " << num_allocations << " allocations of max "
-              << max_size << "B\n";
+      std::cout << "Profiling " << resource << " with " << num_allocations << " allocations of max "
+                << max_size << "B\n";
 
-    try {
       profile_random_allocations(funcs.at(resource), num_allocations, max_size);
-    } catch (std::exception const& e) {
-      std::cout << "Exception caught: " << e.what() << std::endl;
-    }
 
-    std::cout << "Finished\n";
-  } else {
-    if (args.count("numallocs") == 0) {  // if zero reset to -1 so we benchmark over a range
-      num_allocations = -1;
-    }
-    if (args.count("maxsize") == 0) {  // if zero reset to -1 so we benchmark over a range
-      max_size = -1;
-    }
-
-    if (args.count("resource") > 0) {
-      std::string mr_name = args["resource"].as<std::string>();
-      declare_benchmark(mr_name);
+      std::cout << "Finished\n";
     } else {
+      if (args.count("numallocs") == 0) {  // if zero reset to -1 so we benchmark over a range
+        num_allocations = -1;
+      }
+      if (args.count("maxsize") == 0) {  // if zero reset to -1 so we benchmark over a range
+        max_size = -1;
+      }
+
+      if (args.count("resource") > 0) {
+        std::string mr_name = args["resource"].as<std::string>();
+        declare_benchmark(mr_name);
+      } else {
 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
-      std::vector<std::string> mrs{"pool", "binning", "arena", "cuda_async", "cuda"};
+        std::vector<std::string> mrs{"pool", "binning", "arena", "cuda_async", "cuda"};
 #else
-      std::vector<std::string> mrs{"pool", "binning", "arena", "cuda"};
+        std::vector<std::string> mrs{"pool", "binning", "arena", "cuda"};
 #endif
-      std::for_each(
-        std::cbegin(mrs), std::cend(mrs), [](auto const& mr) { declare_benchmark(mr); });
+        std::for_each(
+          std::cbegin(mrs), std::cend(mrs), [](auto const& mr) { declare_benchmark(mr); });
+      }
+      ::benchmark::RunSpecifiedBenchmarks();
     }
-    ::benchmark::RunSpecifiedBenchmarks();
+
+  } catch (std::exception const& e) {
+    std::cout << "Exception caught: " << e.what() << std::endl;
   }
+
   return 0;
 }
diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp
index f7f55a910..49d56a0de 100644
--- a/include/rmm/device_uvector.hpp
+++ b/include/rmm/device_uvector.hpp
@@ -147,7 +147,7 @@ class device_uvector {
    * @param element_index Index of the specified element.
    * @return T* Pointer to the desired element
    */
-  pointer element_ptr(std::size_t element_index) noexcept
+  [[nodiscard]] pointer element_ptr(std::size_t element_index) noexcept
   {
     assert(element_index < size());
     return data() + element_index;
@@ -161,7 +161,7 @@ class device_uvector {
    * @param element_index Index of the specified element.
    * @return T* Pointer to the desired element
    */
-  const_pointer element_ptr(std::size_t element_index) const noexcept
+  [[nodiscard]] const_pointer element_ptr(std::size_t element_index) const noexcept
   {
     assert(element_index < size());
     return data() + element_index;
@@ -323,7 +323,10 @@ class device_uvector {
    * @param stream The stream on which to perform the copy
    * @return The value of the first element
    */
-  value_type front_element(cuda_stream_view stream) const { return element(0, stream); }
+  [[nodiscard]] value_type front_element(cuda_stream_view stream) const
+  {
+    return element(0, stream);
+  }
 
   /**
    * @brief Returns the last element.
@@ -336,7 +339,10 @@ class device_uvector {
    * @param stream The stream on which to perform the copy
    * @return The value of the last element
    */
-  value_type back_element(cuda_stream_view stream) const { return element(size() - 1, stream); }
+  [[nodiscard]] value_type back_element(cuda_stream_view stream) const
+  {
+    return element(size() - 1, stream);
+  }
 
   /**
    * @brief Resizes the vector to contain `new_size` elements.
@@ -373,7 +379,7 @@ class device_uvector {
    *
    * @return The `device_buffer` used to store the vector elements
    */
-  device_buffer release() noexcept { return std::move(_storage); }
+  [[nodiscard]] device_buffer release() noexcept { return std::move(_storage); }
 
   /**
    * @brief Returns the number of elements that can be held in currently allocated storage.
@@ -394,7 +400,7 @@ class device_uvector {
    *
    * @return Raw pointer to element storage in device memory.
    */
-  pointer data() noexcept { return static_cast<pointer>(_storage.data()); }
+  [[nodiscard]] pointer data() noexcept { return static_cast<pointer>(_storage.data()); }
 
   /**
    * @brief Returns const pointer to underlying device storage.
@@ -404,7 +410,10 @@ class device_uvector {
    *
    * @return const_pointer Raw const pointer to element storage in device memory.
    */
-  const_pointer data() const noexcept { return static_cast<const_pointer>(_storage.data()); }
+  [[nodiscard]] const_pointer data() const noexcept
+  {
+    return static_cast<const_pointer>(_storage.data());
+  }
 
   /**
    * @brief Returns an iterator to the first element.
@@ -413,7 +422,7 @@ class device_uvector {
    *
    * @return Iterator to the first element.
    */
-  iterator begin() noexcept { return data(); }
+  [[nodiscard]] iterator begin() noexcept { return data(); }
 
   /**
    * @brief Returns a const_iterator to the first element.
@@ -422,7 +431,7 @@ class device_uvector {
    *
    * @return Immutable iterator to the first element.
    */
-  const_iterator cbegin() const noexcept { return data(); }
+  [[nodiscard]] const_iterator cbegin() const noexcept { return data(); }
 
   /**
    * @brief Returns a const_iterator to the first element.
@@ -431,7 +440,7 @@ class device_uvector {
    *
    * @return Immutable iterator to the first element.
    */
-  const_iterator begin() const noexcept { return cbegin(); }
+  [[nodiscard]] const_iterator begin() const noexcept { return cbegin(); }
 
   /**
    * @brief Returns an iterator to the element following the last element of the vector.
@@ -441,7 +450,7 @@ class device_uvector {
    *
    * @return Iterator to one past the last element.
    */
-  iterator end() noexcept { return data() + size(); }
+  [[nodiscard]] iterator end() noexcept { return data() + size(); }
 
   /**
    * @brief Returns a const_iterator to the element following the last element of the vector.
@@ -451,7 +460,7 @@ class device_uvector {
    *
    * @return Immutable iterator to one past the last element.
    */
-  const_iterator cend() const noexcept { return data() + size(); }
+  [[nodiscard]] const_iterator cend() const noexcept { return data() + size(); }
 
   /**
    * @brief Returns an iterator to the element following the last element of the vector.
@@ -461,7 +470,7 @@ class device_uvector {
    *
    * @return Immutable iterator to one past the last element.
    */
-  const_iterator end() const noexcept { return cend(); }
+  [[nodiscard]] const_iterator end() const noexcept { return cend(); }
 
   /**
    * @brief Returns the number of elements in the vector.
diff --git a/include/rmm/mr/device/binning_memory_resource.hpp b/include/rmm/mr/device/binning_memory_resource.hpp
index 46a7e204d..5d4b156c8 100644
--- a/include/rmm/mr/device/binning_memory_resource.hpp
+++ b/include/rmm/mr/device/binning_memory_resource.hpp
@@ -69,7 +69,7 @@ class binning_memory_resource final : public device_memory_resource {
    * @param max_size_exponent The maximum base-2 exponent bin size.
    */
   binning_memory_resource(Upstream* upstream_resource,
-                          int8_t min_size_exponent,
+                          int8_t min_size_exponent,  // NOLINT(bugprone-easily-swappable-parameters)
                           int8_t max_size_exponent)
     : upstream_mr_{[upstream_resource]() {
         RMM_EXPECTS(nullptr != upstream_resource, "Unexpected null upstream pointer.");
@@ -113,7 +113,7 @@ class binning_memory_resource final : public device_memory_resource {
    *
    * @return UpstreamResource* the upstream memory resource.
    */
-  Upstream* get_upstream() const noexcept { return upstream_mr_; }
+  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_mr_; }
 
   /**
    * @brief Add a bin allocator to this resource
diff --git a/include/rmm/mr/device/detail/free_list.hpp b/include/rmm/mr/device/detail/free_list.hpp
index f249c2f25..e561fe249 100644
--- a/include/rmm/mr/device/detail/free_list.hpp
+++ b/include/rmm/mr/device/detail/free_list.hpp
@@ -70,20 +70,26 @@ class free_list {
   using iterator       = typename list_type::iterator;
   using const_iterator = typename list_type::const_iterator;
 
-  iterator begin() noexcept { return blocks.begin(); }                /// beginning of the free list
-  const_iterator begin() const noexcept { return blocks.begin(); }    /// beginning of the free list
-  const_iterator cbegin() const noexcept { return blocks.cbegin(); }  /// beginning of the free list
-
-  iterator end() noexcept { return blocks.end(); }                /// end of the free list
-  const_iterator end() const noexcept { return blocks.end(); }    /// end of the free list
-  const_iterator cend() const noexcept { return blocks.cend(); }  /// end of the free list
+  /// beginning of the free list
+  [[nodiscard]] iterator begin() noexcept { return blocks.begin(); }
+  /// beginning of the free list
+  [[nodiscard]] const_iterator begin() const noexcept { return blocks.begin(); }
+  /// beginning of the free list
+  [[nodiscard]] const_iterator cbegin() const noexcept { return blocks.cbegin(); }
+
+  /// end of the free list
+  [[nodiscard]] iterator end() noexcept { return blocks.end(); }
+  /// beginning of the free list
+  [[nodiscard]] const_iterator end() const noexcept { return blocks.end(); }
+  /// beginning of the free list
+  [[nodiscard]] const_iterator cend() const noexcept { return blocks.cend(); }
 
   /**
    * @brief The size of the free list in blocks.
    *
    * @return size_type The number of blocks in the free list.
    */
-  size_type size() const noexcept { return blocks.size(); }
+  [[nodiscard]] size_type size() const noexcept { return blocks.size(); }
 
   /**
    * @brief checks whether the free_list is empty.
diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
index f9106e17d..2a726377d 100644
--- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
+++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
@@ -46,8 +46,8 @@ namespace rmm::mr::detail {
  */
 template <typename T>
 struct crtp {
-  T& underlying() { return static_cast<T&>(*this); }
-  T const& underlying() const { return static_cast<T const&>(*this); }
+  [[nodiscard]] T& underlying() { return static_cast<T&>(*this); }
+  [[nodiscard]] T const& underlying() const { return static_cast<T const&>(*this); }
 };
 
 /**
@@ -288,7 +288,7 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
       // instance ensures it is destroyed cleaned up only after all are finished with it.
       thread_local auto event_tls = std::make_shared<event_wrapper>();
       default_stream_events.insert(event_tls);
-      return stream_event_pair{stream.value(), event_tls.get()->event};
+      return stream_event_pair{stream.value(), event_tls->event};
     }
     // We use cudaStreamLegacy as the event map key for the default stream for consistency between
     // PTDS and non-PTDS mode. In PTDS mode, the cudaStreamLegacy map key will only exist if the
@@ -383,45 +383,47 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
                                          free_list& blocks,
                                          bool merge_first)
   {
-    for (auto it = stream_free_blocks_.begin(), next_it = it; it != stream_free_blocks_.end();
-         it = next_it) {
-      ++next_it;  // Points to element after `it` to allow erasing `it` in the loop body
-      auto other_event = it->first.event;
-      if (other_event != stream_event.event) {
-        free_list& other_blocks = it->second;
-
-        block_type const block = [&]() {
-          if (merge_first) {
-            merge_lists(stream_event, blocks, other_event, std::move(other_blocks));
-
-            RMM_LOG_DEBUG("[A][Stream {:p}][{}B][Merged stream {:p}]",
-                          fmt::ptr(stream_event.stream),
-                          size,
-                          fmt::ptr(it->first.stream));
-
-            stream_free_blocks_.erase(it);
-
-            block_type const block =
-              blocks.get_block(size);  // get the best fit block in merged lists
-            if (block.is_valid()) { return allocate_and_insert_remainder(block, size, blocks); }
-          } else {
-            block_type const block = other_blocks.get_block(size);
-            if (block.is_valid()) {
-              // Since we found a block associated with a different stream, we have to insert a wait
-              // on the stream's associated event into the allocating stream.
-              RMM_CUDA_TRY(cudaStreamWaitEvent(stream_event.stream, other_event, 0));
-              return allocate_and_insert_remainder(block, size, other_blocks);
-            }
-          }
-          return block_type{};
-        }();
+    auto find_block = [&](auto iter) {
+      auto other_event   = iter->first.event;
+      auto& other_blocks = iter->second;
+      if (merge_first) {
+        merge_lists(stream_event, blocks, other_event, std::move(other_blocks));
+
+        RMM_LOG_DEBUG("[A][Stream {:p}][{}B][Merged stream {:p}]",
+                      fmt::ptr(stream_event.stream),
+                      size,
+                      fmt::ptr(iter->first.stream));
+
+        stream_free_blocks_.erase(iter);
+
+        block_type const block = blocks.get_block(size);  // get the best fit block in merged lists
+        if (block.is_valid()) { return allocate_and_insert_remainder(block, size, blocks); }
+      } else {
+        block_type const block = other_blocks.get_block(size);
+        if (block.is_valid()) {
+          // Since we found a block associated with a different stream, we have to insert a wait
+          // on the stream's associated event into the allocating stream.
+          RMM_CUDA_TRY(cudaStreamWaitEvent(stream_event.stream, other_event, 0));
+          return allocate_and_insert_remainder(block, size, other_blocks);
+        }
+      }
+      return block_type{};
+    };
+
+    for (auto iter = stream_free_blocks_.begin(), next_iter = iter;
+         iter != stream_free_blocks_.end();
+         iter = next_iter) {
+      ++next_iter;  // Points to element after `iter` to allow erasing `iter` in the loop body
+
+      if (iter->first.event != stream_event.event) {
+        block_type const block = find_block(iter);
 
         if (block.is_valid()) {
           RMM_LOG_DEBUG((merge_first) ? "[A][Stream {:p}][{}B][Found after merging stream {:p}]"
                                       : "[A][Stream {:p}][{}B][Taken from stream {:p}]",
                         fmt::ptr(stream_event.stream),
                         size,
-                        fmt::ptr(it->first.stream));
+                        fmt::ptr(iter->first.stream));
           return block;
         }
       }
diff --git a/include/rmm/mr/device/fixed_size_memory_resource.hpp b/include/rmm/mr/device/fixed_size_memory_resource.hpp
index 6ff02bcc3..4c29881fb 100644
--- a/include/rmm/mr/device/fixed_size_memory_resource.hpp
+++ b/include/rmm/mr/device/fixed_size_memory_resource.hpp
@@ -81,7 +81,7 @@ class fixed_size_memory_resource
    * @brief Destroy the `fixed_size_memory_resource` and free all memory allocated from upstream.
    *
    */
-  ~fixed_size_memory_resource() { release(); }
+  ~fixed_size_memory_resource() override { release(); }
 
   fixed_size_memory_resource()                                  = delete;
   fixed_size_memory_resource(fixed_size_memory_resource const&) = delete;
diff --git a/include/rmm/mr/device/owning_wrapper.hpp b/include/rmm/mr/device/owning_wrapper.hpp
index 17a5b4565..977ae0c11 100644
--- a/include/rmm/mr/device/owning_wrapper.hpp
+++ b/include/rmm/mr/device/owning_wrapper.hpp
@@ -119,13 +119,13 @@ class owning_wrapper : public device_memory_resource {
    * @brief Returns a constant reference to the wrapped resource.
    *
    */
-  Resource const& wrapped() const noexcept { return *wrapped_; }
+  [[nodiscard]] Resource const& wrapped() const noexcept { return *wrapped_; }
 
   /**
    * @brief Returns reference to the wrapped resource.
    *
    */
-  Resource& wrapped() noexcept { return *wrapped_; }
+  [[nodiscard]] Resource& wrapped() noexcept { return *wrapped_; }
 
   /**
    * @copydoc rmm::mr::device_memory_resource::supports_streams()
diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp
index e446a9332..526852355 100644
--- a/include/rmm/mr/device/pool_memory_resource.hpp
+++ b/include/rmm/mr/device/pool_memory_resource.hpp
@@ -99,7 +99,7 @@ class pool_memory_resource final
    * @brief Destroy the `pool_memory_resource` and deallocate all memory it allocated using
    * the upstream resource.
    */
-  ~pool_memory_resource() { release(); }
+  ~pool_memory_resource() override { release(); }
 
   pool_memory_resource()                            = delete;
   pool_memory_resource(pool_memory_resource const&) = delete;
@@ -195,6 +195,7 @@ class pool_memory_resource final
    * @param initial_size The optional initial size for the pool
    * @param maximum_size The optional maximum size for the pool
    */
+  // NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
   void initialize_pool(thrust::optional<std::size_t> initial_size,
                        thrust::optional<std::size_t> maximum_size)
   {

From 87959eccb742376680e94d8b70b477622857c92b Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 15 Sep 2021 12:48:18 +1000
Subject: [PATCH 62/72] Fix potential leak and exception warnings

---
 benchmarks/replay/replay.cpp | 179 ++++++++++++++++++-----------------
 1 file changed, 94 insertions(+), 85 deletions(-)

diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp
index 4e9bbffcf..aa8c077da 100644
--- a/benchmarks/replay/replay.cpp
+++ b/benchmarks/replay/replay.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -45,7 +45,7 @@
 #include <thread>
 
 /// MR factory functions
-std::shared_ptr<rmm::mr::device_memory_resource> make_cuda(std::size_t /*unused*/ = 0)
+std::shared_ptr<rmm::mr::device_memory_resource> make_cuda(std::size_t = 0)
 {
   return std::make_shared<rmm::mr::cuda_memory_resource>();
 }
@@ -57,18 +57,20 @@ std::shared_ptr<rmm::mr::device_memory_resource> make_simulated(std::size_t simu
 
 inline auto make_pool(std::size_t simulated_size)
 {
-  return simulated_size == 0
-           ? rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda())
-           : rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
-               make_simulated(simulated_size), simulated_size, simulated_size);
+  if (simulated_size > 0) {
+    return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
+      make_simulated(simulated_size), simulated_size, simulated_size);
+  }
+  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda());
 }
 
 inline auto make_arena(std::size_t simulated_size)
 {
-  return simulated_size == 0
-           ? rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(make_cuda())
-           : rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(
-               make_simulated(simulated_size), simulated_size, simulated_size);
+  if (simulated_size > 0) {
+    return rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(
+      make_simulated(simulated_size), simulated_size, simulated_size);
+  }
+  return rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(make_cuda());
 }
 
 inline auto make_binning(std::size_t simulated_size)
@@ -325,93 +327,100 @@ void declare_benchmark(std::string const& name,
 // Usage: REPLAY_BENCHMARK -f "path/to/log/file"
 int main(int argc, char** argv)
 {
-  // benchmark::Initialize will remove GBench command line arguments it
-  // recognizes and leave any remaining arguments
-  ::benchmark::Initialize(&argc, argv);
-
-  // Parse for replay arguments:
-  auto args = [&argc, &argv]() {
-    cxxopts::Options options(
-      "RMM Replay Benchmark",
-      "Replays and benchmarks allocation activity captured from RMM logging.");
-
-    options.add_options()("f,file", "Name of RMM log file.", cxxopts::value<std::string>());
-    options.add_options()("r,resource",
-                          "Type of device_memory_resource",
-                          cxxopts::value<std::string>()->default_value("pool"));
-    options.add_options()("s,size",
-                          "Size of simulated GPU memory in GiB. Not supported for the cuda memory "
-                          "resource.",
-                          cxxopts::value<float>()->default_value("0"));
-    options.add_options()("v,verbose",
-                          "Enable verbose printing of log events",
-                          cxxopts::value<bool>()->default_value("false"));
-
-    auto args = options.parse(argc, argv);
-
-    if (args.count("file") == 0) {
-      std::cout << options.help() << std::endl;
-      exit(0);
-    }
+  try {
+    // benchmark::Initialize will remove GBench command line arguments it
+    // recognizes and leave any remaining arguments
+    ::benchmark::Initialize(&argc, argv);
+
+    // Parse for replay arguments:
+    auto args = [&argc, &argv]() {
+      cxxopts::Options options(
+        "RMM Replay Benchmark",
+        "Replays and benchmarks allocation activity captured from RMM logging.");
+
+      options.add_options()("f,file", "Name of RMM log file.", cxxopts::value<std::string>());
+      options.add_options()("r,resource",
+                            "Type of device_memory_resource",
+                            cxxopts::value<std::string>()->default_value("pool"));
+      options.add_options()(
+        "s,size",
+        "Size of simulated GPU memory in GiB. Not supported for the cuda memory "
+        "resource.",
+        cxxopts::value<float>()->default_value("0"));
+      options.add_options()("v,verbose",
+                            "Enable verbose printing of log events",
+                            cxxopts::value<bool>()->default_value("false"));
+
+      auto args = options.parse(argc, argv);
+
+      if (args.count("file") == 0) {
+        std::cout << options.help() << std::endl;
+        exit(0);
+      }
 
-    return args;
-  }();
+      return args;
+    }();
 
-  auto filename = args["file"].as<std::string>();
+    auto filename = args["file"].as<std::string>();
 
-  auto per_thread_events = [filename]() {
-    try {
-      auto events = parse_per_thread_events(filename);
-      return events;
-    } catch (std::exception const& e) {
-      std::cout << "Failed to parse events: " << e.what() << std::endl;
-      return std::vector<std::vector<rmm::detail::event>>{};
-    }
-  }();
+    auto per_thread_events = [filename]() {
+      try {
+        auto events = parse_per_thread_events(filename);
+        return events;
+      } catch (std::exception const& e) {
+        std::cout << "Failed to parse events: " << e.what() << std::endl;
+        return std::vector<std::vector<rmm::detail::event>>{};
+      }
+    }();
 
 #ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM
-  std::cout << "Using CUDA per-thread default stream.\n";
+    std::cout << "Using CUDA per-thread default stream.\n";
 #endif
 
-  auto const simulated_size =
-    static_cast<std::size_t>(args["size"].as<float>() * static_cast<float>(1U << 30U));
-  if (simulated_size != 0 && args["resource"].as<std::string>() != "cuda") {
-    std::cout << "Simulating GPU with memory size of " << simulated_size << " bytes.\n";
-  }
+    auto const simulated_size =
+      static_cast<std::size_t>(args["size"].as<float>() * static_cast<float>(1U << 30U));
+    if (simulated_size != 0 && args["resource"].as<std::string>() != "cuda") {
+      std::cout << "Simulating GPU with memory size of " << simulated_size << " bytes.\n";
+    }
 
-  std::cout << "Total Events: "
-            << std::accumulate(
-                 per_thread_events.begin(),
-                 per_thread_events.end(),
-                 0,
-                 [](std::size_t accum, auto const& events) { return accum + events.size(); })
-            << std::endl;
-
-  for (std::size_t thread = 0; thread < per_thread_events.size(); ++thread) {
-    std::cout << "Thread " << thread << ": " << per_thread_events[thread].size() << " events\n";
-    if (args["verbose"].as<bool>()) {
-      for (auto const& event : per_thread_events[thread]) {
-        std::cout << event << std::endl;
+    std::cout << "Total Events: "
+              << std::accumulate(
+                   per_thread_events.begin(),
+                   per_thread_events.end(),
+                   0,
+                   [](std::size_t accum, auto const& events) { return accum + events.size(); })
+              << std::endl;
+
+    for (std::size_t thread = 0; thread < per_thread_events.size(); ++thread) {
+      std::cout << "Thread " << thread << ": " << per_thread_events[thread].size() << " events\n";
+      if (args["verbose"].as<bool>()) {
+        for (auto const& event : per_thread_events[thread]) {
+          std::cout << event << std::endl;
+        }
       }
     }
-  }
 
-  auto const num_threads = per_thread_events.size();
-
-  // Uncomment to enable / change default log level
-  // rmm::logger().set_level(spdlog::level::trace);
+    auto const num_threads = per_thread_events.size();
+
+    // Uncomment to enable / change default log level
+    // rmm::logger().set_level(spdlog::level::trace);
+
+    if (args.count("resource") > 0) {
+      std::string mr_name = args["resource"].as<std::string>();
+      declare_benchmark(mr_name, simulated_size, per_thread_events, num_threads);
+    } else {
+      std::array<std::string, 4> mrs{"pool", "arena", "binning", "cuda"};
+      std::for_each(std::cbegin(mrs),
+                    std::cend(mrs),
+                    [&simulated_size, &per_thread_events, &num_threads](auto const& mr) {
+                      declare_benchmark(mr, simulated_size, per_thread_events, num_threads);
+                    });
+    }
 
-  if (args.count("resource") > 0) {
-    std::string mr_name = args["resource"].as<std::string>();
-    declare_benchmark(mr_name, simulated_size, per_thread_events, num_threads);
-  } else {
-    std::array<std::string, 4> mrs{"pool", "arena", "binning", "cuda"};
-    std::for_each(std::cbegin(mrs),
-                  std::cend(mrs),
-                  [&simulated_size, &per_thread_events, &num_threads](auto const& mr) {
-                    declare_benchmark(mr, simulated_size, per_thread_events, num_threads);
-                  });
+    ::benchmark::RunSpecifiedBenchmarks();
+  } catch (std::exception const& e) {
+    std::cout << "Exception caught: " << e.what() << std::endl;
   }
 
-  ::benchmark::RunSpecifiedBenchmarks();
+  return 0;
 }

From cca3880da2da41c0f49d0a713750531410c5ed3e Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 15 Sep 2021 13:12:43 +1000
Subject: [PATCH 63/72] Fix uninitialized member clang-tidy warning

---
 include/rmm/mr/device/detail/free_list.hpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/rmm/mr/device/detail/free_list.hpp b/include/rmm/mr/device/detail/free_list.hpp
index e561fe249..1e8623431 100644
--- a/include/rmm/mr/device/detail/free_list.hpp
+++ b/include/rmm/mr/device/detail/free_list.hpp
@@ -25,6 +25,9 @@ namespace rmm::mr::detail {
 struct block_base {
   void* ptr{};  ///< Raw memory pointer
 
+  block_base() = default;
+  block_base(void* ptr) : ptr{ptr} {};
+
   /// Returns the raw pointer for this block
   [[nodiscard]] inline void* pointer() const { return ptr; }
   /// Returns true if this block is valid (non-null), false otherwise

From 496474fecf13c1eeb3d27cd06c08f24c188dc9f0 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 15 Sep 2021 13:34:36 +1000
Subject: [PATCH 64/72] nodiscard

---
 include/rmm/device_uvector.hpp | 2 +-
 tests/device_uvector_tests.cpp | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp
index 49d56a0de..a52cb2ea8 100644
--- a/include/rmm/device_uvector.hpp
+++ b/include/rmm/device_uvector.hpp
@@ -301,7 +301,7 @@ class device_uvector {
    * @param stream The stream on which to perform the copy
    * @return The value of the specified element
    */
-  value_type element(std::size_t element_index, cuda_stream_view stream) const
+  [[nodiscard]] value_type element(std::size_t element_index, cuda_stream_view stream) const
   {
     RMM_EXPECTS(
       element_index < size(), rmm::out_of_range, "Attempt to access out of bounds element.");
diff --git a/tests/device_uvector_tests.cpp b/tests/device_uvector_tests.cpp
index 09aa7e527..00ad66832 100644
--- a/tests/device_uvector_tests.cpp
+++ b/tests/device_uvector_tests.cpp
@@ -158,7 +158,9 @@ TYPED_TEST(TypedUVectorTest, OOBGetElement)
 {
   auto const size{12345};
   rmm::device_uvector<TypeParam> vec(size, this->stream());
-  EXPECT_THROW(vec.element(vec.size() + 1, this->stream()), rmm::out_of_range);
+  // avoid error due to nodiscard function
+  auto foo = [&]() { return vec.element(vec.size() + 1, this->stream()); };
+  EXPECT_THROW(foo(), rmm::out_of_range);
 }
 
 TYPED_TEST(TypedUVectorTest, GetSetElement)

From 6f4d73926ecda54bc4d9cfee5bd55e426fca1625 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 15 Sep 2021 13:35:07 +1000
Subject: [PATCH 65/72] tidying

---
 .../multi_stream_allocations_bench.cu         | 129 ++++++++++--------
 1 file changed, 69 insertions(+), 60 deletions(-)

diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
index dbcea2a45..5ed1b31f9 100644
--- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
+++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
@@ -113,7 +113,10 @@ inline auto make_binning()
   auto pool = make_pool();
   // Add a binning_memory_resource with fixed-size bins of sizes 256, 512, 1024, 2048 and 4096KiB
   // Larger allocations will use the pool resource
-  auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(pool, 18, 22);
+  constexpr auto min_bin_pow2{18};
+  constexpr auto max_bin_pow2{22};
+  auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(
+    pool, min_bin_pow2, max_bin_pow2);
   return mr;
 }
 
@@ -191,69 +194,75 @@ void run_profile(std::string const& resource_name, int kernel_count, int stream_
 
 int main(int argc, char** argv)
 {
-  ::benchmark::Initialize(&argc, argv);
-
-  // Parse for replay arguments:
-  cxxopts::Options options(
-    "RMM Multi Stream Allocations Benchmark",
-    "Benchmarks interleaving temporary allocations with compute-bound kernels.");
-
-  options.add_options()(  //
-    "p,profile",
-    "Profiling mode: run once",
-    cxxopts::value<bool>()->default_value("false"));
-
-  options.add_options()(  //
-    "r,resource",
-    "Type of device_memory_resource",
-    cxxopts::value<std::string>()->default_value("pool"));
-
-  options.add_options()(  //
-    "k,kernels",
-    "Number of kernels to run: (default: 8)",
-    cxxopts::value<int>()->default_value("8"));
-
-  options.add_options()(  //
-    "s,streams",
-    "Number of streams in stream pool (default: 8)",
-    cxxopts::value<int>()->default_value("8"));
-
-  options.add_options()(  //
-    "w,warm",
-    "Ensure each stream has enough memory to satisfy allocations.",
-    cxxopts::value<bool>()->default_value("false"));
-
-  auto args = options.parse(argc, argv);
-
-  if (args.count("profile") > 0) {
-    auto resource_name = args["resource"].as<std::string>();
-    auto num_kernels   = args["kernels"].as<int>();
-    auto num_streams   = args["streams"].as<int>();
-    auto prewarm       = args["warm"].as<bool>();
-    try {
-      run_profile(resource_name, num_kernels, num_streams, prewarm);
-    } catch (std::exception const& e) {
-      std::cout << "Exception caught: " << e.what() << std::endl;
-    }
-  } else {
-    auto resource_names = std::vector<std::string>();
-
-    if (args.count("resource") > 0) {
-      resource_names.emplace_back(args["resource"].as<std::string>());
+  try {
+    ::benchmark::Initialize(&argc, argv);
+
+    // Parse for replay arguments:
+    cxxopts::Options options(
+      "RMM Multi Stream Allocations Benchmark",
+      "Benchmarks interleaving temporary allocations with compute-bound kernels.");
+
+    options.add_options()(  //
+      "p,profile",
+      "Profiling mode: run once",
+      cxxopts::value<bool>()->default_value("false"));
+
+    options.add_options()(  //
+      "r,resource",
+      "Type of device_memory_resource",
+      cxxopts::value<std::string>()->default_value("pool"));
+
+    options.add_options()(  //
+      "k,kernels",
+      "Number of kernels to run: (default: 8)",
+      cxxopts::value<int>()->default_value("8"));
+
+    options.add_options()(  //
+      "s,streams",
+      "Number of streams in stream pool (default: 8)",
+      cxxopts::value<int>()->default_value("8"));
+
+    options.add_options()(  //
+      "w,warm",
+      "Ensure each stream has enough memory to satisfy allocations.",
+      cxxopts::value<bool>()->default_value("false"));
+
+    auto args = options.parse(argc, argv);
+
+    if (args.count("profile") > 0) {
+      auto resource_name = args["resource"].as<std::string>();
+      auto num_kernels   = args["kernels"].as<int>();
+      auto num_streams   = args["streams"].as<int>();
+      auto prewarm       = args["warm"].as<bool>();
+      try {
+        run_profile(resource_name, num_kernels, num_streams, prewarm);
+      } catch (std::exception const& e) {
+        std::cout << "Exception caught: " << e.what() << std::endl;
+      }
     } else {
-      resource_names.emplace_back("cuda");
+      auto resource_names = std::vector<std::string>();
+
+      if (args.count("resource") > 0) {
+        resource_names.emplace_back(args["resource"].as<std::string>());
+      } else {
+        resource_names.emplace_back("cuda");
 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
-      resource_names.emplace_back("cuda_async");
+        resource_names.emplace_back("cuda_async");
 #endif
-      resource_names.emplace_back("pool");
-      resource_names.emplace_back("arena");
-      resource_names.emplace_back("binning");
-    }
+        resource_names.emplace_back("pool");
+        resource_names.emplace_back("arena");
+        resource_names.emplace_back("binning");
+      }
 
-    for (auto& resource_name : resource_names) {
-      declare_benchmark(resource_name);
-    }
+      for (auto& resource_name : resource_names) {
+        declare_benchmark(resource_name);
+      }
 
-    ::benchmark::RunSpecifiedBenchmarks();
+      ::benchmark::RunSpecifiedBenchmarks();
+    }
+  } catch (std::exception const& e) {
+    std::cout << "Exception caught: " << e.what() << std::endl;
   }
+
+  return 0;
 }

From 8459c72bbbedee03d98474bb9697e06eafec6468 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 15 Sep 2021 14:39:04 +1000
Subject: [PATCH 66/72] nodiscard

---
 include/rmm/mr/device/limiting_resource_adaptor.hpp | 2 +-
 include/rmm/mr/device/polymorphic_allocator.hpp     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp
index 810228715..c2a4dfe35 100644
--- a/include/rmm/mr/device/limiting_resource_adaptor.hpp
+++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp
@@ -69,7 +69,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
    *
    * @return Upstream* Pointer to the upstream resource.
    */
-  Upstream* get_upstream() const noexcept { return upstream_; }
+  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
 
   /**
    * @brief Checks whether the upstream resource supports streams.
diff --git a/include/rmm/mr/device/polymorphic_allocator.hpp b/include/rmm/mr/device/polymorphic_allocator.hpp
index 643d1b6fb..5c87ef7f7 100644
--- a/include/rmm/mr/device/polymorphic_allocator.hpp
+++ b/include/rmm/mr/device/polymorphic_allocator.hpp
@@ -221,7 +221,7 @@ class stream_allocator_adaptor {
    * @brief Returns the underlying stream-ordered allocator
    *
    */
-  Allocator underlying_allocator() const noexcept { return alloc_; }
+  [[nodiscard]] Allocator underlying_allocator() const noexcept { return alloc_; }
 
  private:
   Allocator alloc_;          ///< Underlying allocator used for (de)allocation

From 5b9159823df0c226b513a766b8bb01915bc16579 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 15 Sep 2021 14:39:16 +1000
Subject: [PATCH 67/72] nolints

---
 tests/cuda_stream_tests.cpp    |  2 +-
 tests/device_buffer_tests.cu   | 22 ++++++++++++----------
 tests/device_scalar_tests.cpp  |  5 +++--
 tests/device_uvector_tests.cpp | 14 +++++++-------
 4 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/tests/cuda_stream_tests.cpp b/tests/cuda_stream_tests.cpp
index 96cae868e..f801226c6 100644
--- a/tests/cuda_stream_tests.cpp
+++ b/tests/cuda_stream_tests.cpp
@@ -47,7 +47,7 @@ TEST_F(CudaStreamTest, MoveConstructor)
   rmm::cuda_stream stream_a;
   auto const view_a         = stream_a.view();
   rmm::cuda_stream stream_b = std::move(stream_a);
-  // NOLINTNEXTLINE(bugprone-use-after-move)
+  // NOLINTNEXTLINE(bugprone-use-after-move, clang-analyzer-cplusplus.Move)
   EXPECT_FALSE(stream_a.is_valid());  // Any other operations on stream_a are UB, may segfault
   EXPECT_EQ(stream_b, view_a);
 }
diff --git a/tests/device_buffer_tests.cu b/tests/device_buffer_tests.cu
index ff71dfba1..2f8d34bad 100644
--- a/tests/device_buffer_tests.cu
+++ b/tests/device_buffer_tests.cu
@@ -293,9 +293,10 @@ TYPED_TEST(DeviceBufferTest, MoveConstructor)
   EXPECT_EQ(mr, buff_new.memory_resource());
 
   // Original buffer should be empty
-  EXPECT_EQ(nullptr, buff.data());                     // NOLINT(bugprone-use-after-move)
-  EXPECT_EQ(0, buff.size());                           // NOLINT(bugprone-use-after-move)
-  EXPECT_EQ(0, buff.capacity());                       // NOLINT(bugprone-use-after-move)
+  EXPECT_EQ(nullptr,
+            buff.data());         // NOLINT(bugprone-use-after-move, clang-analyzer-cplusplus.Move)
+  EXPECT_EQ(0, buff.size());      // NOLINT(bugprone-use-after-move)
+  EXPECT_EQ(0, buff.capacity());  // NOLINT(bugprone-use-after-move)
   EXPECT_EQ(rmm::cuda_stream_default, buff.stream());  // NOLINT(bugprone-use-after-move)
   EXPECT_NE(nullptr, buff.memory_resource());          // NOLINT(bugprone-use-after-move)
 }
@@ -321,9 +322,10 @@ TYPED_TEST(DeviceBufferTest, MoveConstructorStream)
   EXPECT_EQ(mr, buff_new.memory_resource());
 
   // Original buffer should be empty
-  EXPECT_EQ(nullptr, buff.data());                    // NOLINT(bugprone-use-after-move)
-  EXPECT_EQ(0, buff.size());                          // NOLINT(bugprone-use-after-move)
-  EXPECT_EQ(0, buff.capacity());                      // NOLINT(bugprone-use-after-move)
+  EXPECT_EQ(nullptr,
+            buff.data());         // NOLINT(bugprone-use-after-move, clang-analyzer-cplusplus.Move)
+  EXPECT_EQ(0, buff.size());      // NOLINT(bugprone-use-after-move)
+  EXPECT_EQ(0, buff.capacity());  // NOLINT(bugprone-use-after-move)
   EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream());  // NOLINT(bugprone-use-after-move)
   EXPECT_NE(nullptr, buff.memory_resource());         // NOLINT(bugprone-use-after-move)
 }
@@ -349,7 +351,7 @@ TYPED_TEST(DeviceBufferTest, MoveAssignmentToDefault)
   EXPECT_EQ(mr, dest.memory_resource());
 
   // `from` should be empty
-  EXPECT_EQ(nullptr, src.data());
+  EXPECT_EQ(nullptr, src.data());  // NOLINT(bugprone-use-after-move,clang-analyzer-cplusplus.Move)
   EXPECT_EQ(0, src.size());
   EXPECT_EQ(0, src.capacity());
   EXPECT_EQ(rmm::cuda_stream_default, src.stream());
@@ -377,7 +379,7 @@ TYPED_TEST(DeviceBufferTest, MoveAssignment)
   EXPECT_EQ(mr, dest.memory_resource());
 
   // `from` should be empty
-  EXPECT_EQ(nullptr, src.data());
+  EXPECT_EQ(nullptr, src.data());  // NOLINT(bugprone-use-after-move,clang-analyzer-cplusplus.Move)
   EXPECT_EQ(0, src.size());
   EXPECT_EQ(0, src.capacity());
   EXPECT_EQ(rmm::cuda_stream_default, src.stream());
@@ -393,8 +395,8 @@ TYPED_TEST(DeviceBufferTest, SelfMoveAssignment)
   auto* mr      = buff.memory_resource();
   auto stream   = buff.stream();
 
-  buff = std::move(buff);  // self-move-assignment shouldn't modify the buffer
-  EXPECT_NE(nullptr, buff.data());
+  buff = std::move(buff);           // self-move-assignment shouldn't modify the buffer
+  EXPECT_NE(nullptr, buff.data());  // NOLINT(bugprone-use-after-move,clang-analyzer-cplusplus.Move)
   EXPECT_EQ(ptr, buff.data());
   EXPECT_EQ(size, buff.size());
   EXPECT_EQ(capacity, buff.capacity());
diff --git a/tests/device_scalar_tests.cpp b/tests/device_scalar_tests.cpp
index 65f963132..63c471094 100644
--- a/tests/device_scalar_tests.cpp
+++ b/tests/device_scalar_tests.cpp
@@ -93,13 +93,14 @@ TYPED_TEST(DeviceScalarTest, MoveCtor)
   EXPECT_NE(nullptr, scalar.data());
   EXPECT_EQ(this->value, scalar.value(this->stream));
 
-  auto original_pointer = scalar.data();
-  auto original_value   = scalar.value(this->stream);
+  auto* original_pointer = scalar.data();
+  auto original_value    = scalar.value(this->stream);
 
   rmm::device_scalar<TypeParam> moved_to{std::move(scalar)};
   EXPECT_NE(nullptr, moved_to.data());
   EXPECT_EQ(moved_to.data(), original_pointer);
   EXPECT_EQ(moved_to.value(this->stream), original_value);
+  // NOLINTNEXTLINE(bugprone-use-after-move,clang-analyzer-cplusplus.Move)
   EXPECT_EQ(nullptr, scalar.data());
 }
 
diff --git a/tests/device_uvector_tests.cpp b/tests/device_uvector_tests.cpp
index 00ad66832..dce2cbbf6 100644
--- a/tests/device_uvector_tests.cpp
+++ b/tests/device_uvector_tests.cpp
@@ -65,8 +65,8 @@ TYPED_TEST(TypedUVectorTest, ResizeSmaller)
 {
   auto const original_size{12345};
   rmm::device_uvector<TypeParam> vec(original_size, this->stream());
-  auto original_data  = vec.data();
-  auto original_begin = vec.begin();
+  auto* original_data  = vec.data();
+  auto* original_begin = vec.begin();
 
   auto smaller_size = vec.size() - 1;
   vec.resize(smaller_size, this->stream());
@@ -86,8 +86,8 @@ TYPED_TEST(TypedUVectorTest, ResizeLarger)
 {
   auto const original_size{12345};
   rmm::device_uvector<TypeParam> vec(original_size, this->stream());
-  auto original_data  = vec.data();
-  auto original_begin = vec.begin();
+  auto* original_data  = vec.data();
+  auto* original_begin = vec.begin();
 
   auto larger_size = vec.size() + 1;
   vec.resize(larger_size, this->stream());
@@ -97,8 +97,8 @@ TYPED_TEST(TypedUVectorTest, ResizeLarger)
   EXPECT_EQ(vec.size(), larger_size);
   EXPECT_EQ(vec.capacity(), larger_size);
 
-  auto larger_data  = vec.data();
-  auto larger_begin = vec.begin();
+  auto* larger_data  = vec.data();
+  auto* larger_begin = vec.begin();
 
   // shrink_to_fit shouldn't have any effect
   vec.shrink_to_fit(this->stream());
@@ -127,7 +127,7 @@ TYPED_TEST(TypedUVectorTest, Release)
   auto const original_size{12345};
   rmm::device_uvector<TypeParam> vec(original_size, this->stream());
 
-  auto original_data = vec.data();
+  auto* original_data = vec.data();
 
   rmm::device_buffer storage = vec.release();
 

From 3bfaa55184c145b14b57b765925c8c63bc008e00 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 15 Sep 2021 14:39:31 +1000
Subject: [PATCH 68/72] nodiscard

---
 include/rmm/device_scalar.hpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp
index ff8461599..099abc08e 100644
--- a/include/rmm/device_scalar.hpp
+++ b/include/rmm/device_scalar.hpp
@@ -147,7 +147,10 @@ class device_scalar {
    * @return T The value of the scalar.
    * @param stream CUDA stream on which to perform the copy and synchronize.
    */
-  value_type value(cuda_stream_view stream) const { return _storage.front_element(stream); }
+  [[nodiscard]] value_type value(cuda_stream_view stream) const
+  {
+    return _storage.front_element(stream);
+  }
 
   /**
    * @brief Sets the value of the `device_scalar` to the value of `v`.
@@ -222,7 +225,7 @@ class device_scalar {
    * streams (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`), otherwise there may
    * be a race condition.
    */
-  pointer data() noexcept { return static_cast<pointer>(_storage.data()); }
+  [[nodiscard]] pointer data() noexcept { return static_cast<pointer>(_storage.data()); }
 
   /**
    * @brief Returns const pointer to object in device memory.
@@ -232,7 +235,10 @@ class device_scalar {
    * streams (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`), otherwise there may
    * be a race condition.
    */
-  const_pointer data() const noexcept { return static_cast<const_pointer>(_storage.data()); }
+  [[nodiscard]] const_pointer data() const noexcept
+  {
+    return static_cast<const_pointer>(_storage.data());
+  }
 
  private:
   rmm::device_uvector<T> _storage;

From b7ea38c9ce312140f0eb1ef66828afe285b044ce Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 15 Sep 2021 14:39:47 +1000
Subject: [PATCH 69/72] nodiscard and nolint

---
 .../rmm/mr/device/logging_resource_adaptor.hpp    | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp
index 0bb707a6c..26448f887 100644
--- a/include/rmm/mr/device/logging_resource_adaptor.hpp
+++ b/include/rmm/mr/device/logging_resource_adaptor.hpp
@@ -123,7 +123,7 @@ class logging_resource_adaptor final : public device_memory_resource {
    *
    * @return Upstream* Pointer to the upstream resource.
    */
-  Upstream* get_upstream() const noexcept { return upstream_; }
+  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
 
   /**
    * @brief Checks whether the upstream resource supports streams.
@@ -162,12 +162,6 @@ class logging_resource_adaptor final : public device_memory_resource {
   }
 
  private:
-  // make_logging_adaptor needs access to private get_default_filename
-  template <typename T>
-  friend logging_resource_adaptor<T> make_logging_adaptor(T* upstream,
-                                                          std::string const& filename,
-                                                          bool auto_flush);
-
   /**
    * @brief Return the value of the environment variable RMM_LOG_FILE.
    *
@@ -273,6 +267,13 @@ class logging_resource_adaptor final : public device_memory_resource {
     return upstream_->get_mem_info(stream);
   }
 
+  // make_logging_adaptor needs access to private get_default_filename
+  template <typename T>
+  // NOLINTNEXTLINE(readability-redundant-declaration)
+  friend logging_resource_adaptor<T> make_logging_adaptor(T* upstream,
+                                                          std::string const& filename,
+                                                          bool auto_flush);
+
   std::shared_ptr<spdlog::logger> logger_;  ///< spdlog logger object
 
   Upstream* upstream_;  ///< The upstream resource used for satisfying

From 098e08ad42e9762a4d6d8bf0ae598bd372126809 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 15 Sep 2021 14:40:21 +1000
Subject: [PATCH 70/72] cmake style

---
 cmake/thirdparty/get_gtest.cmake | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cmake/thirdparty/get_gtest.cmake b/cmake/thirdparty/get_gtest.cmake
index 8473fdbf2..a515b4e5a 100644
--- a/cmake/thirdparty/get_gtest.cmake
+++ b/cmake/thirdparty/get_gtest.cmake
@@ -12,7 +12,6 @@
 # the License.
 # =============================================================================
 
-
 function(find_and_configure_gtest)
   include(${rapids-cmake-dir}/cpm/gtest.cmake)
   rapids_cpm_gtest()

From c1f98725aad95006e009d6451999b5d9c4b025a6 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 15 Sep 2021 14:55:01 +1000
Subject: [PATCH 71/72] cmake docstring

---
 cmake/thirdparty/get_gtest.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cmake/thirdparty/get_gtest.cmake b/cmake/thirdparty/get_gtest.cmake
index a515b4e5a..4d4daff44 100644
--- a/cmake/thirdparty/get_gtest.cmake
+++ b/cmake/thirdparty/get_gtest.cmake
@@ -12,6 +12,7 @@
 # the License.
 # =============================================================================
 
+# Use CPM to find or clone gtest
 function(find_and_configure_gtest)
   include(${rapids-cmake-dir}/cpm/gtest.cmake)
   rapids_cpm_gtest()

From ce3b3edcbb5538a2e7a0f41497b6f367fa5a5ae5 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 16 Sep 2021 09:59:11 +1000
Subject: [PATCH 72/72] Fix device_uvector::set_element_async for
 non-fundamental types.

---
 include/rmm/device_uvector.hpp | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp
index a52cb2ea8..b4b12c824 100644
--- a/include/rmm/device_uvector.hpp
+++ b/include/rmm/device_uvector.hpp
@@ -209,15 +209,22 @@ class device_uvector {
   {
     RMM_EXPECTS(
       element_index < size(), rmm::out_of_range, "Attempt to access out of bounds element.");
+
     if constexpr (std::is_same<value_type, bool>::value) {
       RMM_CUDA_TRY(
         cudaMemsetAsync(element_ptr(element_index), value, sizeof(value), stream.value()));
-    } else if (std::is_fundamental<value_type>::value and value == value_type{0}) {
-      set_element_to_zero_async(element_index, stream);
-    } else {
-      RMM_CUDA_TRY(cudaMemcpyAsync(
-        element_ptr(element_index), &value, sizeof(value), cudaMemcpyDefault, stream.value()));
+      return;
+    }
+
+    if constexpr (std::is_fundamental<value_type>::value) {
+      if (value == value_type{0}) {
+        set_element_to_zero_async(element_index, stream);
+        return;
+      }
     }
+
+    RMM_CUDA_TRY(cudaMemcpyAsync(
+      element_ptr(element_index), &value, sizeof(value), cudaMemcpyDefault, stream.value()));
   }
 
   // We delete the r-value reference overload to prevent asynchronously copying from a literal or
@@ -379,7 +386,7 @@ class device_uvector {
    *
    * @return The `device_buffer` used to store the vector elements
    */
-  [[nodiscard]] device_buffer release() noexcept { return std::move(_storage); }
+  device_buffer release() noexcept { return std::move(_storage); }
 
   /**
    * @brief Returns the number of elements that can be held in currently allocated storage.