diff --git a/cpp/include/raft/core/device_container_policy.hpp b/cpp/include/raft/core/device_container_policy.hpp
index eef981e56f..011de307db 100644
--- a/cpp/include/raft/core/device_container_policy.hpp
+++ b/cpp/include/raft/core/device_container_policy.hpp
@@ -164,10 +164,19 @@ class device_uvector_policy {
  public:
   auto create(raft::resources const& res, size_t n) -> container_type
   {
-    return container_type(n, resource::get_cuda_stream(res), resource::get_workspace_resource(res));
+    if (mr_ == nullptr) {
+      // NB: not using the workspace resource by default!
+      //     The workspace resource is for short-lived temporary allocations.
+      return container_type(n, resource::get_cuda_stream(res));
+    } else {
+      return container_type(n, resource::get_cuda_stream(res), mr_);
+    }
   }
 
-  device_uvector_policy() = default;
+  constexpr device_uvector_policy() = default;
+  constexpr explicit device_uvector_policy(rmm::mr::device_memory_resource* mr) noexcept : mr_(mr)
+  {
+  }
 
   [[nodiscard]] constexpr auto access(container_type& c, size_t n) const noexcept -> reference
   {
@@ -181,6 +190,9 @@ class device_uvector_policy {
 
   [[nodiscard]] auto make_accessor_policy() noexcept { return accessor_policy{}; }
   [[nodiscard]] auto make_accessor_policy() const noexcept { return const_accessor_policy{}; }
+
+ private:
+  rmm::mr::device_memory_resource* mr_{nullptr};
 };
 
 }  // namespace raft
diff --git a/cpp/include/raft/core/device_mdarray.hpp b/cpp/include/raft/core/device_mdarray.hpp
index 68273db15c..fe543c97dd 100644
--- a/cpp/include/raft/core/device_mdarray.hpp
+++ b/cpp/include/raft/core/device_mdarray.hpp
@@ -112,7 +112,7 @@ auto make_device_mdarray(raft::resources const& handle,
   using mdarray_t = device_mdarray<ElementType, decltype(exts), LayoutPolicy>;
 
   typename mdarray_t::mapping_type layout{exts};
-  typename mdarray_t::container_policy_type policy{};
+  typename mdarray_t::container_policy_type policy{mr};
 
   return mdarray_t{handle, layout, policy};
 }
diff --git a/cpp/include/raft/core/device_resources.hpp b/cpp/include/raft/core/device_resources.hpp
index c620a688b9..cf06920a8c 100644
--- a/cpp/include/raft/core/device_resources.hpp
+++ b/cpp/include/raft/core/device_resources.hpp
@@ -21,6 +21,7 @@
 
 #include <memory>
 #include <mutex>
+#include <optional>
 #include <string>
 #include <unordered_map>
 #include <utility>
@@ -60,12 +61,12 @@ namespace raft {
 class device_resources : public resources {
  public:
   device_resources(const device_resources& handle,
-                   rmm::mr::device_memory_resource* workspace_resource)
+                   std::shared_ptr<rmm::mr::device_memory_resource> workspace_resource,
+                   std::optional<std::size_t> allocation_limit = std::nullopt)
     : resources{handle}
   {
     // replace the resource factory for the workspace_resources
-    resources::add_resource_factory(
-      std::make_shared<resource::workspace_resource_factory>(workspace_resource));
+    resource::set_workspace_resource(*this, workspace_resource, allocation_limit);
   }
 
   device_resources(const device_resources& handle) : resources{handle} {}
@@ -80,10 +81,13 @@ class device_resources : public resources {
    * @param[in] stream_pool the stream pool used (which has default of nullptr if unspecified)
    * @param[in] workspace_resource an optional resource used by some functions for allocating
    *            temporary workspaces.
+   * @param[in] allocation_limit the total amount of memory in bytes available to the temporary
+   *            workspace resources.
    */
   device_resources(rmm::cuda_stream_view stream_view                  = rmm::cuda_stream_per_thread,
                    std::shared_ptr<rmm::cuda_stream_pool> stream_pool = {nullptr},
-                   rmm::mr::device_memory_resource* workspace_resource = nullptr)
+                   std::shared_ptr<rmm::mr::device_memory_resource> workspace_resource = {nullptr},
+                   std::optional<std::size_t> allocation_limit = std::nullopt)
     : resources{}
   {
     resources::add_resource_factory(std::make_shared<resource::device_id_resource_factory>());
@@ -91,8 +95,9 @@ class device_resources : public resources {
       std::make_shared<resource::cuda_stream_resource_factory>(stream_view));
     resources::add_resource_factory(
       std::make_shared<resource::cuda_stream_pool_resource_factory>(stream_pool));
-    resources::add_resource_factory(
-      std::make_shared<resource::workspace_resource_factory>(workspace_resource));
+    if (workspace_resource) {
+      resource::set_workspace_resource(*this, workspace_resource, allocation_limit);
+    }
   }
 
   /** Destroys all held-up resources */
@@ -255,4 +260,4 @@ class stream_syncer {
 
 }  // namespace raft
 
-#endif
\ No newline at end of file
+#endif
diff --git a/cpp/include/raft/core/handle.hpp b/cpp/include/raft/core/handle.hpp
index 2a6b5657e2..124ab8c315 100644
--- a/cpp/include/raft/core/handle.hpp
+++ b/cpp/include/raft/core/handle.hpp
@@ -32,7 +32,8 @@ namespace raft {
  */
 class handle_t : public raft::device_resources {
  public:
-  handle_t(const handle_t& handle, rmm::mr::device_memory_resource* workspace_resource)
+  handle_t(const handle_t& handle,
+           std::shared_ptr<rmm::mr::device_memory_resource> workspace_resource)
     : device_resources(handle, workspace_resource)
   {
   }
@@ -51,9 +52,9 @@ class handle_t : public raft::device_resources {
    * @param[in] workspace_resource an optional resource used by some functions for allocating
    *            temporary workspaces.
    */
-  handle_t(rmm::cuda_stream_view stream_view                   = rmm::cuda_stream_per_thread,
-           std::shared_ptr<rmm::cuda_stream_pool> stream_pool  = {nullptr},
-           rmm::mr::device_memory_resource* workspace_resource = nullptr)
+  handle_t(rmm::cuda_stream_view stream_view                  = rmm::cuda_stream_per_thread,
+           std::shared_ptr<rmm::cuda_stream_pool> stream_pool = {nullptr},
+           std::shared_ptr<rmm::mr::device_memory_resource> workspace_resource = {nullptr})
     : device_resources{stream_view, stream_pool, workspace_resource}
   {
   }
diff --git a/cpp/include/raft/core/resource/detail/device_memory_resource.hpp b/cpp/include/raft/core/resource/detail/device_memory_resource.hpp
new file mode 100644
index 0000000000..9d3f13689d
--- /dev/null
+++ b/cpp/include/raft/core/resource/detail/device_memory_resource.hpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <raft/core/logger.hpp>
+#include <raft/core/resource/device_memory_resource.hpp>
+
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+#include <mutex>
+#include <set>
+#include <string>
+
+namespace raft::resource::detail {
+
+/**
+ * Warn a user of the calling algorithm if they use the default non-pooled memory allocator,
+ * as it may hurt the performance.
+ *
+ * This helper function is designed to produce the warning once for a given `user_name`.
+ *
+ * @param[in] res
+ * @param[in] user_name the name of the algorithm or any other identification.
+ *
+ */
+inline void warn_non_pool_workspace(resources const& res, std::string user_name)
+{
+  // Detect if the plain cuda memory resource is used for the workspace
+  if (rmm::mr::cuda_memory_resource{}.is_equal(*get_workspace_resource(res)->get_upstream())) {
+    static std::set<std::string> notified_names{};
+    static std::mutex mutex{};
+    std::lock_guard<std::mutex> guard(mutex);
+    auto [it, inserted] = notified_names.insert(std::move(user_name));
+    if (inserted) {
+      RAFT_LOG_WARN(
+        "[%s] the default cuda resource is used for the raft workspace allocations. This may lead "
+        "to a significant slowdown for this algorithm. Consider using the default pool resource "
+        "(`raft::resource::set_workspace_to_pool_resource`) or set your own resource explicitly "
+        "(`raft::resource::set_workspace_resource`).",
+        it->c_str());
+    }
+  }
+}
+
+}  // namespace raft::resource::detail
diff --git a/cpp/include/raft/core/resource/device_memory_resource.hpp b/cpp/include/raft/core/resource/device_memory_resource.hpp
index ebc41e0f8e..af684df747 100644
--- a/cpp/include/raft/core/resource/device_memory_resource.hpp
+++ b/cpp/include/raft/core/resource/device_memory_resource.hpp
@@ -15,24 +15,49 @@
  */
 #pragma once
 
+#include <raft/core/operators.hpp>
 #include <raft/core/resource/resource_types.hpp>
 #include <raft/core/resources.hpp>
+#include <raft/util/cudart_utils.hpp>
+
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/limiting_resource_adaptor.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
+#include <rmm/mr/device/pool_memory_resource.hpp>
+
+#include <cstddef>
+#include <optional>
 
 namespace raft::resource {
-class device_memory_resource : public resource {
+class limiting_memory_resource : public resource {
  public:
-  device_memory_resource(rmm::mr::device_memory_resource* mr_ = nullptr) : mr(mr_)
+  limiting_memory_resource(std::shared_ptr<rmm::mr::device_memory_resource> mr,
+                           std::size_t allocation_limit,
+                           std::optional<std::size_t> alignment)
+    : upstream_(mr), mr_(make_adaptor(mr, allocation_limit, alignment))
   {
-    if (mr_ == nullptr) { mr = rmm::mr::get_current_device_resource(); }
   }
-  void* get_resource() override { return mr; }
 
-  ~device_memory_resource() override {}
+  auto get_resource() -> void* override { return &mr_; }
+
+  ~limiting_memory_resource() override = default;
 
  private:
-  rmm::mr::device_memory_resource* mr;
+  std::shared_ptr<rmm::mr::device_memory_resource> upstream_;
+  rmm::mr::limiting_resource_adaptor<rmm::mr::device_memory_resource> mr_;
+
+  static inline auto make_adaptor(std::shared_ptr<rmm::mr::device_memory_resource> upstream,
+                                  std::size_t limit,
+                                  std::optional<std::size_t> alignment)
+    -> rmm::mr::limiting_resource_adaptor<rmm::mr::device_memory_resource>
+  {
+    auto p = upstream.get();
+    if (alignment.has_value()) {
+      return rmm::mr::limiting_resource_adaptor(p, limit, alignment.value());
+    } else {
+      return rmm::mr::limiting_resource_adaptor(p, limit);
+    }
+  }
 };
 
 /**
@@ -41,36 +66,173 @@ class device_memory_resource : public resource {
  */
 class workspace_resource_factory : public resource_factory {
  public:
-  workspace_resource_factory(rmm::mr::device_memory_resource* mr_ = nullptr) : mr(mr_) {}
-  resource_type get_resource_type() override { return resource_type::WORKSPACE_RESOURCE; }
-  resource* make_resource() override { return new device_memory_resource(mr); }
+  explicit workspace_resource_factory(
+    std::shared_ptr<rmm::mr::device_memory_resource> mr = {nullptr},
+    std::optional<std::size_t> allocation_limit         = std::nullopt,
+    std::optional<std::size_t> alignment                = std::nullopt)
+    : allocation_limit_(allocation_limit.value_or(default_allocation_limit())),
+      alignment_(alignment),
+      mr_(mr ? mr : default_plain_resource())
+  {
+  }
+
+  auto get_resource_type() -> resource_type override { return resource_type::WORKSPACE_RESOURCE; }
+  auto make_resource() -> resource* override
+  {
+    return new limiting_memory_resource(mr_, allocation_limit_, alignment_);
+  }
+
+  /** Construct a sensible default pool memory resource. */
+  static inline auto default_pool_resource(std::size_t limit)
+    -> std::shared_ptr<rmm::mr::device_memory_resource>
+  {
+    // Set the default granularity to 1 GiB
+    constexpr std::size_t kOneGb = 1024lu * 1024lu * 1024lu;
+    // The initial size of the pool. The choice of this value only affects the performance a little
+    // bit. Heuristics:
+    //   1) the pool shouldn't be too big from the beginning independently of the limit;
+    //   2) otherwise, set it to half the max size to avoid too many resize calls.
+    auto min_size = std::min<std::size_t>(kOneGb, limit / 2lu);
+    // The pool is going to be place behind the limiting resource adaptor. This means the user won't
+    // be able to allocate more than 'limit' bytes of memory anyway. At the same time, the pool
+    // itself may consume a little bit more memory than the 'limit' due to memory fragmentation.
+    // Therefore, we look for a compromise, such that:
+    //   1) 'limit' is accurate - the user should be more likely to run into the limiting
+    //      resource adaptor bad_alloc error than into the pool bad_alloc error.
+    //   2) The pool doesn't grab too much memory on top of the 'limit'.
+    auto max_size = std::min<std::size_t>(limit + kOneGb / 2lu, limit * 3lu / 2lu);
+    auto upstream = rmm::mr::get_current_device_resource();
+    RAFT_LOG_DEBUG(
+      "Setting the workspace pool resource; memory limit = %zu, initial pool size = %zu, max pool "
+      "size = %zu.",
+      limit,
+      min_size,
+      max_size);
+    return std::make_shared<rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>>(
+      upstream, min_size, max_size);
+  }
+
+  /**
+   * Get the global memory resource wrapped into an unmanaged shared_ptr (with no deleter).
+   *
+   * Note: the lifetime of the underlying `rmm::mr::get_current_device_resource()` is managed
+   * somewhere else, since it's passed by a raw pointer. Hence, this shared_ptr wrapper is not
+   * allowed to delete the pointer on destruction.
+   */
+  static inline auto default_plain_resource() -> std::shared_ptr<rmm::mr::device_memory_resource>
+  {
+    return std::shared_ptr<rmm::mr::device_memory_resource>{rmm::mr::get_current_device_resource(),
+                                                            void_op{}};
+  }
 
  private:
-  rmm::mr::device_memory_resource* mr;
+  std::size_t allocation_limit_;
+  std::optional<std::size_t> alignment_;
+  std::shared_ptr<rmm::mr::device_memory_resource> mr_;
+
+  static inline auto default_allocation_limit() -> std::size_t
+  {
+    std::size_t free_size{};
+    std::size_t total_size{};
+    RAFT_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
+    // Note, the workspace does not claim all this memory from the start, so it's still usable by
+    // the main resource as well.
+    // This limit is merely an order for algorithm internals to plan the batching accordingly.
+    return total_size / 2;
+  }
 };
 
 /**
  * Load a temp workspace resource from a resources instance (and populate it on the res
  * if needed).
+ *
  * @param res raft resources object for managing resources
  * @return device memory resource object
  */
-inline rmm::mr::device_memory_resource* get_workspace_resource(resources const& res)
+inline auto get_workspace_resource(resources const& res)
+  -> rmm::mr::limiting_resource_adaptor<rmm::mr::device_memory_resource>*
 {
   if (!res.has_resource_factory(resource_type::WORKSPACE_RESOURCE)) {
     res.add_resource_factory(std::make_shared<workspace_resource_factory>());
   }
-  return res.get_resource<rmm::mr::device_memory_resource>(resource_type::WORKSPACE_RESOURCE);
+  return res.get_resource<rmm::mr::limiting_resource_adaptor<rmm::mr::device_memory_resource>>(
+    resource_type::WORKSPACE_RESOURCE);
+};
+
+/** Get the total size of the workspace resource. */
+inline auto get_workspace_total_bytes(resources const& res) -> size_t
+{
+  return get_workspace_resource(res)->get_allocation_limit();
+};
+
+/** Get the already allocated size of the workspace resource. */
+inline auto get_workspace_used_bytes(resources const& res) -> size_t
+{
+  return get_workspace_resource(res)->get_allocated_bytes();
+};
+
+/** Get the available size of the workspace resource. */
+inline auto get_workspace_free_bytes(resources const& res) -> size_t
+{
+  const auto* p = get_workspace_resource(res);
+  return p->get_allocation_limit() - p->get_allocated_bytes();
+};
+
+/**
+ * Set a temporary workspace resource on a resources instance.
+ *
+ * @param res raft resources object for managing resources
+ * @param mr an optional RMM device_memory_resource
+ * @param allocation_limit
+ *   the total amount of memory in bytes available to the temporary workspace resources.
+ * @param alignment optional alignment requirements passed to RMM allocations
+ *
+ */
+inline void set_workspace_resource(resources const& res,
+                                   std::shared_ptr<rmm::mr::device_memory_resource> mr = {nullptr},
+                                   std::optional<std::size_t> allocation_limit = std::nullopt,
+                                   std::optional<std::size_t> alignment        = std::nullopt)
+{
+  res.add_resource_factory(
+    std::make_shared<workspace_resource_factory>(mr, allocation_limit, alignment));
+};
+
+/**
+ * Set the temporary workspace resource to a pool on top of the global memory resource
+ * (`rmm::mr::get_current_device_resource()`.
+ *
+ * @param res raft resources object for managing resources
+ * @param allocation_limit
+ *   the total amount of memory in bytes available to the temporary workspace resources;
+ *   if not provided, a last used or default limit is used.
+ *
+ */
+inline void set_workspace_to_pool_resource(
+  resources const& res, std::optional<std::size_t> allocation_limit = std::nullopt)
+{
+  if (!allocation_limit.has_value()) { allocation_limit = get_workspace_total_bytes(res); }
+  res.add_resource_factory(std::make_shared<workspace_resource_factory>(
+    workspace_resource_factory::default_pool_resource(*allocation_limit),
+    allocation_limit,
+    std::nullopt));
 };
 
 /**
- * Set a temp workspace resource on a resources instance.
+ * Set the temporary workspace resource the same as the global memory resource
+ * (`rmm::mr::get_current_device_resource()`.
+ *
+ * Note, the workspace resource is always limited; the limit here defines how much of the global
+ * memory resource can be consumed by the workspace allocations.
  *
  * @param res raft resources object for managing resources
- * @param mr a valid rmm device_memory_resource
+ * @param allocation_limit
+ *   the total amount of memory in bytes available to the temporary workspace resources.
  */
-inline void set_workspace_resource(resources const& res, rmm::mr::device_memory_resource* mr)
+inline void set_workspace_to_global_resource(
+  resources const& res, std::optional<std::size_t> allocation_limit = std::nullopt)
 {
-  res.add_resource_factory(std::make_shared<workspace_resource_factory>(mr));
+  res.add_resource_factory(std::make_shared<workspace_resource_factory>(
+    workspace_resource_factory::default_plain_resource(), allocation_limit, std::nullopt));
 };
+
 }  // namespace raft::resource
diff --git a/cpp/include/raft/core/resources.hpp b/cpp/include/raft/core/resources.hpp
index e0f51b61b4..d5bd176d50 100644
--- a/cpp/include/raft/core/resources.hpp
+++ b/cpp/include/raft/core/resources.hpp
@@ -95,6 +95,11 @@ class resources {
     RAFT_EXPECTS(rtype != resource::resource_type::LAST_KEY,
                  "LAST_KEY is a placeholder and not a valid resource factory type.");
     factories_.at(rtype) = std::make_pair(rtype, factory);
+    // Clear the corresponding resource, so that on next `get_resource` the new factory is used
+    if (resources_.at(rtype).first != resource::resource_type::LAST_KEY) {
+      resources_.at(rtype) = std::make_pair(resource::resource_type::LAST_KEY,
+                                            std::make_shared<resource::empty_resource>());
+    }
   }
 
   /**
diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_serialize.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_serialize.cuh
index b00d308827..61a6046273 100644
--- a/cpp/include/raft/neighbors/detail/ivf_flat_serialize.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_flat_serialize.cuh
@@ -45,7 +45,7 @@ struct check_index_layout {
                 "paste in the new size and consider updating the serialization logic");
 };
 
-template struct check_index_layout<sizeof(index<double, std::uint64_t>), 296>;
+template struct check_index_layout<sizeof(index<double, std::uint64_t>), 328>;
 
 /**
  * Save the index to file.
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
index 4a54d33a02..199cb74fbe 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
@@ -29,6 +29,7 @@
 #include <raft/core/logger.hpp>
 #include <raft/core/nvtx.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/detail/device_memory_resource.hpp>
 #include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/add.cuh>
@@ -346,10 +347,10 @@ void train_per_subset(raft::resources const& handle,
                       const float* trainset,   // [n_rows, dim]
                       const uint32_t* labels,  // [n_rows]
                       uint32_t kmeans_n_iters,
-                      rmm::mr::device_memory_resource* managed_memory,
-                      rmm::mr::device_memory_resource* device_memory)
+                      rmm::mr::device_memory_resource* managed_memory)
 {
-  auto stream = resource::get_cuda_stream(handle);
+  auto stream        = resource::get_cuda_stream(handle);
+  auto device_memory = resource::get_workspace_resource(handle);
 
   rmm::device_uvector<float> pq_centers_tmp(index.pq_centers().size(), stream, device_memory);
   rmm::device_uvector<float> sub_trainset(n_rows * size_t(index.pq_len()), stream, device_memory);
@@ -392,10 +393,6 @@ void train_per_subset(raft::resources const& handle,
                  index.pq_len(),
                  stream);
 
-    // clone the handle and attached the device memory resource to it
-    const resources new_handle(handle);
-    resource::set_workspace_resource(new_handle, device_memory);
-
     // train PQ codebook for this subspace
     auto sub_trainset_view =
       raft::make_device_matrix_view<const float, IdxT>(sub_trainset.data(), n_rows, index.pq_len());
@@ -409,7 +406,7 @@ void train_per_subset(raft::resources const& handle,
     raft::cluster::kmeans_balanced_params kmeans_params;
     kmeans_params.n_iters = kmeans_n_iters;
     kmeans_params.metric  = raft::distance::DistanceType::L2Expanded;
-    raft::cluster::kmeans_balanced::helpers::build_clusters(new_handle,
+    raft::cluster::kmeans_balanced::helpers::build_clusters(handle,
                                                             kmeans_params,
                                                             sub_trainset_view,
                                                             centers_tmp_view,
@@ -427,10 +424,10 @@ void train_per_cluster(raft::resources const& handle,
                        const float* trainset,   // [n_rows, dim]
                        const uint32_t* labels,  // [n_rows]
                        uint32_t kmeans_n_iters,
-                       rmm::mr::device_memory_resource* managed_memory,
-                       rmm::mr::device_memory_resource* device_memory)
+                       rmm::mr::device_memory_resource* managed_memory)
 {
-  auto stream = resource::get_cuda_stream(handle);
+  auto stream        = resource::get_cuda_stream(handle);
+  auto device_memory = resource::get_workspace_resource(handle);
 
   rmm::device_uvector<float> pq_centers_tmp(index.pq_centers().size(), stream, device_memory);
   rmm::device_uvector<uint32_t> cluster_sizes(index.n_lists(), stream, managed_memory);
@@ -474,10 +471,6 @@ void train_per_cluster(raft::resources const& handle,
                      indices + cluster_offsets[l],
                      device_memory);
 
-    // clone the handle and attached the device memory resource to it
-    const resources new_handle(handle);
-    resource::set_workspace_resource(new_handle, device_memory);
-
     // limit the cluster size to bound the training time.
     // [sic] we interpret the data as pq_len-dimensional
     size_t big_enough     = 256ul * std::max<size_t>(index.pq_book_size(), index.pq_dim());
@@ -498,7 +491,7 @@ void train_per_cluster(raft::resources const& handle,
     raft::cluster::kmeans_balanced_params kmeans_params;
     kmeans_params.n_iters = kmeans_n_iters;
     kmeans_params.metric  = raft::distance::DistanceType::L2Expanded;
-    raft::cluster::kmeans_balanced::helpers::build_clusters(new_handle,
+    raft::cluster::kmeans_balanced::helpers::build_clusters(handle,
                                                             kmeans_params,
                                                             rot_vectors_view,
                                                             centers_tmp_view,
@@ -1325,6 +1318,8 @@ void extend(raft::resources const& handle,
 {
   common::nvtx::range<common::nvtx::domain::raft> fun_scope(
     "ivf_pq::extend(%zu, %u)", size_t(n_rows), index->dim());
+
+  resource::detail::warn_non_pool_workspace(handle, "raft::ivf_pq::extend");
   auto stream           = resource::get_cuda_stream(handle);
   const auto n_clusters = index->n_lists();
 
@@ -1523,6 +1518,7 @@ auto build(raft::resources const& handle,
 {
   common::nvtx::range<common::nvtx::domain::raft> fun_scope(
     "ivf_pq::build(%zu, %u)", size_t(n_rows), dim);
+  resource::detail::warn_non_pool_workspace(handle, "raft::ivf_pq::build");
   static_assert(std::is_same_v<T, float> || std::is_same_v<T, uint8_t> || std::is_same_v<T, int8_t>,
                 "Unsupported data type");
 
@@ -1543,24 +1539,18 @@ auto build(raft::resources const& handle,
       size_t(n_rows) / std::max<size_t>(params.kmeans_trainset_fraction * n_rows, index.n_lists()));
     size_t n_rows_train = n_rows / trainset_ratio;
 
-    rmm::mr::device_memory_resource* device_memory = nullptr;
-    auto pool_guard = raft::get_pool_memory_resource(device_memory, 1024 * 1024);
-    if (pool_guard) { RAFT_LOG_DEBUG("ivf_pq::build: using pool memory resource"); }
-
+    auto* device_memory = resource::get_workspace_resource(handle);
     rmm::mr::managed_memory_resource managed_memory_upstream;
     rmm::mr::pool_memory_resource<rmm::mr::managed_memory_resource> managed_memory(
       &managed_memory_upstream, 1024 * 1024);
 
     // If the trainset is small enough to comfortably fit into device memory, put it there.
     // Otherwise, use the managed memory.
+    constexpr size_t kTolerableRatio                     = 4;
     rmm::mr::device_memory_resource* big_memory_resource = &managed_memory;
-    {
-      size_t free_mem, total_mem;
-      constexpr size_t kTolerableRatio = 4;
-      RAFT_CUDA_TRY(cudaMemGetInfo(&free_mem, &total_mem));
-      if (sizeof(float) * n_rows_train * index.dim() * kTolerableRatio < free_mem) {
-        big_memory_resource = device_memory;
-      }
+    if (sizeof(float) * n_rows_train * index.dim() * kTolerableRatio <
+        resource::get_workspace_free_bytes(handle)) {
+      big_memory_resource = device_memory;
     }
 
     // Besides just sampling, we transform the input dataset into floats to make it easier
@@ -1709,8 +1699,7 @@ auto build(raft::resources const& handle,
                          trainset.data(),
                          labels.data(),
                          params.kmeans_n_iters,
-                         &managed_memory,
-                         device_memory);
+                         &managed_memory);
         break;
       case codebook_gen::PER_CLUSTER:
         train_per_cluster(handle,
@@ -1719,8 +1708,7 @@ auto build(raft::resources const& handle,
                           trainset.data(),
                           labels.data(),
                           params.kmeans_n_iters,
-                          &managed_memory,
-                          device_memory);
+                          &managed_memory);
         break;
       default: RAFT_FAIL("Unreachable code");
     }
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
index 82b1ac1542..298083d1e5 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
@@ -31,6 +31,8 @@
 #include <raft/core/logger.hpp>
 #include <raft/core/nvtx.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/detail/device_memory_resource.hpp>
+#include <raft/core/resource/device_memory_resource.hpp>
 #include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/gemm.cuh>
@@ -429,10 +431,10 @@ void ivfpq_search_worker(raft::resources const& handle,
                          float* distances,                   // [n_queries, topK]
                          float scaling_factor,
                          double preferred_shmem_carveout,
-                         IvfSampleFilterT sample_filter,
-                         rmm::mr::device_memory_resource* mr)
+                         IvfSampleFilterT sample_filter)
 {
   auto stream = resource::get_cuda_stream(handle);
+  auto mr     = resource::get_workspace_resource(handle);
 
   bool manage_local_topk = is_local_topk_feasible(topK, n_probes, n_queries);
   auto topk_len          = manage_local_topk ? n_probes * topK : max_samples;
@@ -677,6 +679,7 @@ struct ivfpq_search {
  * A heuristic for bounding the number of queries per batch, to improve GPU utilization.
  * (based on the number of SMs and the work size).
  *
+ * @param res is used to query the workspace size
  * @param k top-k
  * @param n_probes number of selected clusters per query
  * @param n_queries number of queries hoped to be processed at once.
@@ -685,7 +688,8 @@ struct ivfpq_search {
  *
  * @return maximum recommended batch size.
  */
-inline auto get_max_batch_size(uint32_t k,
+inline auto get_max_batch_size(raft::resources const& res,
+                               uint32_t k,
                                uint32_t n_probes,
                                uint32_t n_queries,
                                uint32_t max_samples) -> uint32_t
@@ -704,11 +708,11 @@ inline auto get_max_batch_size(uint32_t k,
   auto ws_size = [k, n_probes, max_samples](uint32_t bs) -> uint64_t {
     return uint64_t(is_local_topk_feasible(k, n_probes, bs) ? k * n_probes : max_samples) * bs;
   };
-  constexpr uint64_t kMaxWsSize = 1024 * 1024 * 1024;
-  if (ws_size(max_batch_size) > kMaxWsSize) {
+  auto max_ws_size = resource::get_workspace_free_bytes(res);
+  if (ws_size(max_batch_size) > max_ws_size) {
     uint32_t smaller_batch_size = bound_by_power_of_two(max_batch_size);
     // gradually reduce the batch size until we fit into the max size limit.
-    while (smaller_batch_size > 1 && ws_size(smaller_batch_size) > kMaxWsSize) {
+    while (smaller_batch_size > 1 && ws_size(smaller_batch_size) > max_ws_size) {
       smaller_batch_size >>= 1;
     }
     return smaller_batch_size;
@@ -728,8 +732,7 @@ inline void search(raft::resources const& handle,
                    uint32_t k,
                    IdxT* neighbors,
                    float* distances,
-                   rmm::mr::device_memory_resource* mr = nullptr,
-                   IvfSampleFilterT sample_filter      = IvfSampleFilterT())
+                   IvfSampleFilterT sample_filter = IvfSampleFilterT())
 {
   static_assert(std::is_same_v<T, float> || std::is_same_v<T, uint8_t> || std::is_same_v<T, int8_t>,
                 "Unsupported element type.");
@@ -739,6 +742,7 @@ inline void search(raft::resources const& handle,
     params.n_probes,
     k,
     index.dim());
+  resource::detail::warn_non_pool_workspace(handle, "raft::ivf_pq::search");
 
   RAFT_EXPECTS(
     params.internal_distance_dtype == CUDA_R_16F || params.internal_distance_dtype == CUDA_R_32F,
@@ -775,15 +779,11 @@ inline void search(raft::resources const& handle,
     max_samples = ms;
   }
 
-  auto pool_guard = raft::get_pool_memory_resource(mr, n_queries * n_probes * k * 16);
-  if (pool_guard) {
-    RAFT_LOG_DEBUG("ivf_pq::search: using pool memory resource with initial size %zu bytes",
-                   n_queries * n_probes * k * 16ull);
-  }
+  auto mr = resource::get_workspace_resource(handle);
 
   // Maximum number of query vectors to search at the same time.
   const auto max_queries = std::min<uint32_t>(std::max<uint32_t>(n_queries, 1), 4096);
-  auto max_batch_size    = get_max_batch_size(k, n_probes, max_queries, max_samples);
+  auto max_batch_size    = get_max_batch_size(handle, k, n_probes, max_queries, max_samples);
 
   rmm::device_uvector<float> float_queries(max_queries * dim_ext, stream, mr);
   rmm::device_uvector<float> rot_queries(max_queries * index.rot_dim(), stream, mr);
@@ -845,8 +845,7 @@ inline void search(raft::resources const& handle,
                       distances + uint64_t(k) * (offset_q + offset_b),
                       utils::config<T>::kDivisor / utils::config<float>::kDivisor,
                       params.preferred_shmem_carveout,
-                      sample_filter,
-                      mr);
+                      sample_filter);
     }
   }
 }
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_serialize.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_serialize.cuh
index ff5bd8ef89..f01035cad3 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_serialize.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_serialize.cuh
@@ -48,7 +48,7 @@ struct check_index_layout {
 };
 
 // TODO: Recompute this and come back to it.
-template struct check_index_layout<sizeof(index<std::uint64_t>), 448>;
+template struct check_index_layout<sizeof(index<std::uint64_t>), 480>;
 
 /**
  * Write the index to an output stream
diff --git a/cpp/include/raft/neighbors/ivf_pq-ext.cuh b/cpp/include/raft/neighbors/ivf_pq-ext.cuh
index 1595f55d8c..fcfe837e2d 100644
--- a/cpp/include/raft/neighbors/ivf_pq-ext.cuh
+++ b/cpp/include/raft/neighbors/ivf_pq-ext.cuh
@@ -92,8 +92,7 @@ void search_with_filtering(raft::resources const& handle,
                            uint32_t k,
                            IdxT* neighbors,
                            float* distances,
-                           rmm::mr::device_memory_resource* mr = nullptr,
-                           IvfSampleFilterT sample_filter      = IvfSampleFilterT()) RAFT_EXPLICIT;
+                           IvfSampleFilterT sample_filter = IvfSampleFilterT{}) RAFT_EXPLICIT;
 
 template <typename T, typename IdxT>
 void search(raft::resources const& handle,
@@ -103,8 +102,34 @@ void search(raft::resources const& handle,
             uint32_t n_queries,
             uint32_t k,
             IdxT* neighbors,
-            float* distances,
-            rmm::mr::device_memory_resource* mr = nullptr) RAFT_EXPLICIT;
+            float* distances) RAFT_EXPLICIT;
+
+template <typename T, typename IdxT, typename IvfSampleFilterT>
+[[deprecated(
+  "Drop the `mr` argument and use `raft::resource::set_workspace_resource` instead")]] void
+search_with_filtering(raft::resources const& handle,
+                      const raft::neighbors::ivf_pq::search_params& params,
+                      const index<IdxT>& idx,
+                      const T* queries,
+                      uint32_t n_queries,
+                      uint32_t k,
+                      IdxT* neighbors,
+                      float* distances,
+                      rmm::mr::device_memory_resource* mr,
+                      IvfSampleFilterT sample_filter = IvfSampleFilterT{}) RAFT_EXPLICIT;
+
+template <typename T, typename IdxT>
+[[deprecated(
+  "Drop the `mr` argument and use `raft::resource::set_workspace_resource` instead")]] void
+search(raft::resources const& handle,
+       const raft::neighbors::ivf_pq::search_params& params,
+       const index<IdxT>& idx,
+       const T* queries,
+       uint32_t n_queries,
+       uint32_t k,
+       IdxT* neighbors,
+       float* distances,
+       rmm::mr::device_memory_resource* mr) RAFT_EXPLICIT;
 
 }  // namespace raft::neighbors::ivf_pq
 
@@ -182,7 +207,17 @@ instantiate_raft_neighbors_ivf_pq_extend(uint8_t, int64_t);
     uint32_t k,                                                      \
     IdxT* neighbors,                                                 \
     float* distances,                                                \
-    rmm::mr::device_memory_resource* mr)
+    rmm::mr::device_memory_resource* mr);                            \
+                                                                     \
+  extern template void raft::neighbors::ivf_pq::search<T, IdxT>(     \
+    raft::resources const& handle,                                   \
+    const raft::neighbors::ivf_pq::search_params& params,            \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,                 \
+    const T* queries,                                                \
+    uint32_t n_queries,                                              \
+    uint32_t k,                                                      \
+    IdxT* neighbors,                                                 \
+    float* distances)
 
 instantiate_raft_neighbors_ivf_pq_search(float, int64_t);
 instantiate_raft_neighbors_ivf_pq_search(int8_t, int64_t);
diff --git a/cpp/include/raft/neighbors/ivf_pq-inl.cuh b/cpp/include/raft/neighbors/ivf_pq-inl.cuh
index ad9d95f790..ccf8717486 100644
--- a/cpp/include/raft/neighbors/ivf_pq-inl.cuh
+++ b/cpp/include/raft/neighbors/ivf_pq-inl.cuh
@@ -16,17 +16,18 @@
 
 #pragma once
 
-#include <raft/core/resource/device_memory_resource.hpp>
 #include <raft/neighbors/detail/ivf_pq_build.cuh>
 #include <raft/neighbors/detail/ivf_pq_search.cuh>
 #include <raft/neighbors/ivf_pq_serialize.cuh>
 #include <raft/neighbors/ivf_pq_types.hpp>
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/device_memory_resource.hpp>
 #include <raft/core/resources.hpp>
 
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device/per_device_resource.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+#include <memory>  // shared_ptr
 
 namespace raft::neighbors::ivf_pq {
 
@@ -165,7 +166,7 @@ void search_with_filtering(raft::resources const& handle,
                            raft::device_matrix_view<const T, uint32_t, row_major> queries,
                            raft::device_matrix_view<IdxT, uint32_t, row_major> neighbors,
                            raft::device_matrix_view<float, uint32_t, row_major> distances,
-                           IvfSampleFilterT sample_filter = IvfSampleFilterT())
+                           IvfSampleFilterT sample_filter = IvfSampleFilterT{})
 {
   RAFT_EXPECTS(
     queries.extent(0) == neighbors.extent(0) && queries.extent(0) == distances.extent(0),
@@ -186,7 +187,6 @@ void search_with_filtering(raft::resources const& handle,
                  k,
                  neighbors.data_handle(),
                  distances.data_handle(),
-                 resource::get_workspace_resource(handle),
                  sample_filter);
 }
 
@@ -229,7 +229,7 @@ void search(raft::resources const& handle,
                         queries,
                         neighbors,
                         distances,
-                        raft::neighbors::filtering::none_ivf_sample_filter());
+                        raft::neighbors::filtering::none_ivf_sample_filter{});
 }
 
 /** @} */  // end group ivf_pq
@@ -353,20 +353,17 @@ void extend(raft::resources const& handle,
  * eliminate entirely allocations happening within `search`:
  * @code{.cpp}
  *   ...
- *   // Create a pooling memory resource with a pre-defined initial size.
- *   rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> mr(
- *     rmm::mr::get_current_device_resource(), 1024 * 1024);
  *   // use default search parameters
  *   ivf_pq::search_params search_params;
  *   filtering::none_ivf_sample_filter filter;
  *   // Use the same allocator across multiple searches to reduce the number of
  *   // cuda memory allocations
  *   ivf_pq::search_with_filtering(
- *     handle, search_params, index, queries1, N1, K, out_inds1, out_dists1, &mr, filter);
+ *     handle, search_params, index, queries1, N1, K, out_inds1, out_dists1, filter);
  *   ivf_pq::search_with_filtering(
- *     handle, search_params, index, queries2, N2, K, out_inds2, out_dists2, &mr, filter);
+ *     handle, search_params, index, queries2, N2, K, out_inds2, out_dists2, filter);
  *   ivf_pq::search_with_filtering(
- *     handle, search_params, index, queries3, N3, K, out_inds3, out_dists3, &mr, filter);
+ *     handle, search_params, index, queries3, N3, K, out_inds3, out_dists3, nfilter);
  *   ...
  * @endcode
  * The exact size of the temporary buffer depends on multiple factors and is an implementation
@@ -385,8 +382,6 @@ void extend(raft::resources const& handle,
  * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset
  * [n_queries, k]
  * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k]
- * @param[in] mr an optional memory resource to use across the searches (you can provide a large
- * enough memory pool here to avoid memory allocations within search).
  * @param[in] sample_filter a filter the greenlights samples for a given query
  */
 template <typename T, typename IdxT, typename IvfSampleFilterT>
@@ -398,11 +393,41 @@ void search_with_filtering(raft::resources const& handle,
                            uint32_t k,
                            IdxT* neighbors,
                            float* distances,
-                           rmm::mr::device_memory_resource* mr = nullptr,
-                           IvfSampleFilterT sample_filter      = IvfSampleFilterT())
+                           IvfSampleFilterT sample_filter = IvfSampleFilterT{})
 {
-  detail::search(
-    handle, params, idx, queries, n_queries, k, neighbors, distances, mr, sample_filter);
+  detail::search(handle, params, idx, queries, n_queries, k, neighbors, distances, sample_filter);
+}
+
+/**
+ * This function is deprecated and will be removed in a future.
+ * Please drop the `mr` argument and use `raft::resource::set_workspace_resource` instead.
+ */
+template <typename T, typename IdxT, typename IvfSampleFilterT>
+[[deprecated(
+  "Drop the `mr` argument and use `raft::resource::set_workspace_resource` instead")]] void
+search_with_filtering(raft::resources const& handle,
+                      const search_params& params,
+                      const index<IdxT>& idx,
+                      const T* queries,
+                      uint32_t n_queries,
+                      uint32_t k,
+                      IdxT* neighbors,
+                      float* distances,
+                      rmm::mr::device_memory_resource* mr,
+                      IvfSampleFilterT sample_filter = IvfSampleFilterT{})
+{
+  if (mr != nullptr) {
+    // Shallow copy of the resource with the automatic lifespan:
+    //                               change the workspace resource temporarily
+    raft::resources res_local(handle);
+    resource::set_workspace_resource(
+      res_local, std::shared_ptr<rmm::mr::device_memory_resource>{mr, void_op{}});
+    return search_with_filtering(
+      res_local, params, idx, queries, n_queries, k, neighbors, distances, sample_filter);
+  } else {
+    return search_with_filtering(
+      handle, params, idx, queries, n_queries, k, neighbors, distances, sample_filter);
+  }
 }
 
 /**
@@ -444,8 +469,6 @@ void search_with_filtering(raft::resources const& handle,
  * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset
  * [n_queries, k]
  * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k]
- * @param[in] mr an optional memory resource to use across the searches (you can provide a large
- * enough memory pool here to avoid memory allocations within search).
  */
 template <typename T, typename IdxT>
 void search(raft::resources const& handle,
@@ -455,10 +478,46 @@ void search(raft::resources const& handle,
             uint32_t n_queries,
             uint32_t k,
             IdxT* neighbors,
-            float* distances,
-            rmm::mr::device_memory_resource* mr = nullptr)
+            float* distances)
+{
+  return search_with_filtering(handle,
+                               params,
+                               idx,
+                               queries,
+                               n_queries,
+                               k,
+                               neighbors,
+                               distances,
+                               raft::neighbors::filtering::none_ivf_sample_filter{});
+}
+
+/**
+ * This function is deprecated and will be removed in a future.
+ * Please drop the `mr` argument and use `raft::resource::set_workspace_resource` instead.
+ */
+template <typename T, typename IdxT>
+[[deprecated(
+  "Drop the `mr` argument and use `raft::resource::set_workspace_resource` instead")]] void
+search(raft::resources const& handle,
+       const search_params& params,
+       const index<IdxT>& idx,
+       const T* queries,
+       uint32_t n_queries,
+       uint32_t k,
+       IdxT* neighbors,
+       float* distances,
+       rmm::mr::device_memory_resource* mr)
 {
-  detail::search(handle, params, idx, queries, n_queries, k, neighbors, distances, mr);
+  return search_with_filtering(handle,
+                               params,
+                               idx,
+                               queries,
+                               n_queries,
+                               k,
+                               neighbors,
+                               distances,
+                               mr,
+                               raft::neighbors::filtering::none_ivf_sample_filter{});
 }
 
 }  // namespace raft::neighbors::ivf_pq
diff --git a/cpp/test/core/handle.cpp b/cpp/test/core/handle.cpp
index 8c5e023df3..a1ad4385a7 100644
--- a/cpp/test/core/handle.cpp
+++ b/cpp/test/core/handle.cpp
@@ -25,6 +25,7 @@
 #include <raft/core/resource/cuda_stream.hpp>
 #include <raft/core/resource/cuda_stream_pool.hpp>
 #include <raft/core/resource/device_memory_resource.hpp>
+#include <rmm/device_buffer.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/mr/device/pool_memory_resource.hpp>
 #include <unordered_map>
@@ -274,39 +275,61 @@ TEST(Raft, WorkspaceResource)
 {
   raft::handle_t handle;
 
-  ASSERT_TRUE(dynamic_cast<const rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>*>(
-                resource::get_workspace_resource(handle)) == nullptr);
-  ASSERT_EQ(rmm::mr::get_current_device_resource(), resource::get_workspace_resource(handle));
+  // The returned resource is always a limiting adaptor
+  auto* orig_mr = resource::get_workspace_resource(handle)->get_upstream();
 
-  auto pool_mr = new rmm::mr::pool_memory_resource(rmm::mr::get_current_device_resource());
-  std::shared_ptr<rmm::cuda_stream_pool> pool = {nullptr};
-  raft::handle_t handle2(rmm::cuda_stream_per_thread, pool, pool_mr);
+  // Let's create a pooled resource
+  auto pool_mr = std::shared_ptr<rmm::mr::device_memory_resource>{
+    new rmm::mr::pool_memory_resource(rmm::mr::get_current_device_resource())};
 
-  ASSERT_TRUE(dynamic_cast<const rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>*>(
-                resource::get_workspace_resource(handle2)) != nullptr);
-  ASSERT_EQ(pool_mr, resource::get_workspace_resource(handle2));
+  // A tiny workspace of 1MB
+  size_t max_size = 1024 * 1024;
 
-  delete pool_mr;
-}
-
-TEST(Raft, WorkspaceResourceCopy)
-{
-  auto stream_pool = std::make_shared<rmm::cuda_stream_pool>(10);
+  // Replace the resource
+  resource::set_workspace_resource(handle, pool_mr, max_size);
+  auto new_mr = resource::get_workspace_resource(handle);
 
-  handle_t handle(rmm::cuda_stream_per_thread, stream_pool);
+  // By this point, the orig_mr likely points to a non-existent resource; don't dereference!
+  ASSERT_NE(orig_mr, new_mr);
+  ASSERT_EQ(pool_mr.get(), new_mr->get_upstream());
+  // We can safely reset pool_mr, because the shared_ptr to the pool memory stays in the resource
+  pool_mr.reset();
 
-  auto pool_mr = new rmm::mr::pool_memory_resource(rmm::mr::get_current_device_resource());
+  auto stream = resource::get_cuda_stream(handle);
+  rmm::device_buffer buf(max_size / 2, stream, new_mr);
 
-  handle_t copied_handle(handle, pool_mr);
+  // Note, the underlying pool allocator likely uses more space than reported here
+  ASSERT_EQ(max_size, resource::get_workspace_total_bytes(handle));
+  ASSERT_EQ(buf.size(), resource::get_workspace_used_bytes(handle));
+  ASSERT_EQ(max_size - buf.size(), resource::get_workspace_free_bytes(handle));
 
-  assert_handles_equal(handle, copied_handle);
+  // this should throw, becaise we partially used the space.
+  ASSERT_THROW((rmm::device_buffer{max_size, stream, new_mr}), rmm::bad_alloc);
+}
 
-  // Assert the workspace_resources are what we expect
-  ASSERT_TRUE(dynamic_cast<const rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>*>(
-                resource::get_workspace_resource(handle)) == nullptr);
+TEST(Raft, WorkspaceResourceCopy)
+{
+  raft::handle_t res;
+  auto orig_mr   = resource::get_workspace_resource(res);
+  auto orig_size = resource::get_workspace_total_bytes(res);
 
-  ASSERT_TRUE(dynamic_cast<const rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>*>(
-                resource::get_workspace_resource(copied_handle)) != nullptr);
+  {
+    // create a new handle in the inner scope and update the workspace resource for it.
+    raft::resources tmp_res(res);
+    resource::set_workspace_resource(
+      tmp_res,
+      std::shared_ptr<rmm::mr::device_memory_resource>{
+        new rmm::mr::pool_memory_resource(rmm::mr::get_current_device_resource())},
+      orig_size * 2);
+
+    ASSERT_EQ(orig_mr, resource::get_workspace_resource(res));
+    ASSERT_EQ(orig_size, resource::get_workspace_total_bytes(res));
+
+    ASSERT_NE(orig_mr, resource::get_workspace_resource(tmp_res));
+    ASSERT_NE(orig_size, resource::get_workspace_total_bytes(tmp_res));
+  }
+  ASSERT_EQ(orig_mr, resource::get_workspace_resource(res));
+  ASSERT_EQ(orig_size, resource::get_workspace_total_bytes(res));
 }
 
 TEST(Raft, HandleCopy)
diff --git a/cpp/test/util/device_atomics.cu b/cpp/test/util/device_atomics.cu
index 355cb0d4dd..56f798b617 100644
--- a/cpp/test/util/device_atomics.cu
+++ b/cpp/test/util/device_atomics.cu
@@ -60,7 +60,7 @@ TEST(Raft, AtomicIncWarp)
 
   // Check that count is correct and that each thread index is contained in the
   // array exactly once.
-  ASSERT_EQ(num_elts, counter.value(s));
+  ASSERT_EQ(num_elts, counter.value(s));  // NB: accessing the counter synchronizes `s`
   std::sort(out_host.begin(), out_host.end());
   for (int i = 0; i < num_elts; ++i) {
     ASSERT_EQ(i, out_host[i]);