From f7ba6ab47ac994e6a1363119c01eee5dd6304181 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Tue, 11 Jun 2024 17:47:19 -0700
Subject: [PATCH] Pinned vector factory that uses the global pool (#15895)

closes https://github.com/rapidsai/cudf/issues/15612
Expanded the set of vector factories to cover pinned vectors. The functions return `cudf::detail::host_vector`, which use a type-erased allocator, allowing us to utilize the runtime configurable global pinned (previously host) resource.
The `pinned_host_vector` type has been removed as it can only support the non-pooled pinned allocations. Its use is not replaced with `cudf::detail::host_vector`.
Moved the global host (now pinned) resource out of cuIO and changed the type to host_device. User-specified resources are now required to allocate device-accessible memory. The name has been changed to pinned to reflect the new requirement.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Alessandro Bellina (https://github.com/abellina)
  - Yunsong Wang (https://github.com/PointKernel)
  - Mark Harris (https://github.com/harrism)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/15895
---
 cpp/CMakeLists.txt                            |   1 +
 cpp/benchmarks/fixture/nvbench_fixture.hpp    |  13 +-
 cpp/benchmarks/io/cuio_common.cpp             |  12 +
 cpp/benchmarks/io/cuio_common.hpp             |   4 +-
 .../io/parquet/parquet_reader_multithread.cpp |   2 +-
 cpp/benchmarks/io/text/multibyte_split.cpp    |  10 +-
 .../{rmm_host_vector.hpp => host_vector.hpp}  |  18 +-
 .../detail/utilities/pinned_host_vector.hpp   | 216 ------------------
 .../detail/utilities/vector_factories.hpp     |  38 ++-
 cpp/include/cudf/io/memory_resource.hpp       |  65 ------
 cpp/include/cudf/utilities/pinned_memory.hpp  |  58 +++++
 cpp/src/io/csv/reader_impl.cu                 |   1 +
 cpp/src/io/orc/reader_impl_chunking.cu        |   1 +
 cpp/src/io/orc/writer_impl.cu                 |   5 +-
 cpp/src/io/parquet/reader_impl_helpers.cpp    |   2 +
 cpp/src/io/parquet/writer_impl.cu             |   3 +-
 cpp/src/io/text/bgzip_data_chunk_source.cu    |  16 +-
 .../io/text/data_chunk_source_factories.cpp   |  51 ++---
 cpp/src/io/utilities/config_utils.cpp         | 214 +----------------
 cpp/src/io/utilities/hostdevice_vector.hpp    |   9 +-
 cpp/src/utilities/pinned_memory.cpp           | 216 ++++++++++++++++++
 cpp/tests/CMakeLists.txt                      |   5 +-
 cpp/tests/io/json_test.cpp                    |   6 +-
 .../utilities_tests/io_utilities_tests.cpp    |  45 ----
 .../utilities_tests/pinned_memory_tests.cpp   |  65 ++++++
 .../java/ai/rapids/cudf/PinnedMemoryPool.java |  12 +-
 java/src/main/java/ai/rapids/cudf/Rmm.java    |   2 +-
 java/src/main/native/src/RmmJni.cpp           |  34 +--
 28 files changed, 487 insertions(+), 637 deletions(-)
 rename cpp/include/cudf/detail/utilities/{rmm_host_vector.hpp => host_vector.hpp} (93%)
 delete mode 100644 cpp/include/cudf/detail/utilities/pinned_host_vector.hpp
 delete mode 100644 cpp/include/cudf/io/memory_resource.hpp
 create mode 100644 cpp/include/cudf/utilities/pinned_memory.hpp
 create mode 100644 cpp/src/utilities/pinned_memory.cpp
 create mode 100644 cpp/tests/utilities_tests/pinned_memory_tests.cpp

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index ca85996b990..aab0a9b2d49 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -664,6 +664,7 @@ add_library(
   src/utilities/default_stream.cpp
   src/utilities/linked_column.cpp
   src/utilities/logger.cpp
+  src/utilities/pinned_memory.cpp
   src/utilities/stacktrace.cpp
   src/utilities/stream_pool.cpp
   src/utilities/traits.cpp
diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp
index ebcbcb17e98..df1492690bb 100644
--- a/cpp/benchmarks/fixture/nvbench_fixture.hpp
+++ b/cpp/benchmarks/fixture/nvbench_fixture.hpp
@@ -15,8 +15,8 @@
  */
 #pragma once
 
-#include <cudf/io/memory_resource.hpp>
 #include <cudf/utilities/error.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
 
 #include <rmm/cuda_device.hpp>
 #include <rmm/mr/device/arena_memory_resource.hpp>
@@ -81,17 +81,18 @@ struct nvbench_base_fixture {
               "\nExpecting: cuda, pool, async, arena, managed, or managed_pool");
   }
 
-  inline rmm::host_async_resource_ref make_cuio_host_pinned()
+  inline rmm::host_device_async_resource_ref make_cuio_host_pinned()
   {
     static std::shared_ptr<rmm::mr::pinned_host_memory_resource> mr =
       std::make_shared<rmm::mr::pinned_host_memory_resource>();
     return *mr;
   }
 
-  inline rmm::host_async_resource_ref create_cuio_host_memory_resource(std::string const& mode)
+  inline rmm::host_device_async_resource_ref create_cuio_host_memory_resource(
+    std::string const& mode)
   {
     if (mode == "pinned") return make_cuio_host_pinned();
-    if (mode == "pinned_pool") return cudf::io::get_host_memory_resource();
+    if (mode == "pinned_pool") return cudf::get_pinned_memory_resource();
     CUDF_FAIL("Unknown cuio_host_mem parameter: " + mode + "\nExpecting: pinned or pinned_pool");
   }
 
@@ -112,14 +113,14 @@ struct nvbench_base_fixture {
     rmm::mr::set_current_device_resource(mr.get());
     std::cout << "RMM memory resource = " << rmm_mode << "\n";
 
-    cudf::io::set_host_memory_resource(create_cuio_host_memory_resource(cuio_host_mode));
+    cudf::set_pinned_memory_resource(create_cuio_host_memory_resource(cuio_host_mode));
     std::cout << "CUIO host memory resource = " << cuio_host_mode << "\n";
   }
 
   ~nvbench_base_fixture()
   {
     // Ensure the the pool is freed before the CUDA context is destroyed:
-    cudf::io::set_host_memory_resource(this->make_cuio_host_pinned());
+    cudf::set_pinned_memory_resource(this->make_cuio_host_pinned());
   }
 
   std::shared_ptr<rmm::mr::device_memory_resource> mr;
diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp
index 37ced8ea703..645994f3f0d 100644
--- a/cpp/benchmarks/io/cuio_common.cpp
+++ b/cpp/benchmarks/io/cuio_common.cpp
@@ -19,6 +19,9 @@
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/detail/utilities/logger.hpp>
 
+#include <rmm/mr/pinned_host_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
+
 #include <unistd.h>
 
 #include <cstdio>
@@ -28,6 +31,14 @@
 
 temp_directory const cuio_source_sink_pair::tmpdir{"cudf_gbench"};
 
+// Don't use cudf's pinned pool for the source data
+rmm::host_async_resource_ref pinned_memory_resource()
+{
+  static rmm::mr::pinned_host_memory_resource mr = rmm::mr::pinned_host_memory_resource{};
+
+  return mr;
+}
+
 std::string random_file_in_dir(std::string const& dir_path)
 {
   // `mkstemp` modifies the template in place
@@ -41,6 +52,7 @@ std::string random_file_in_dir(std::string const& dir_path)
 
 cuio_source_sink_pair::cuio_source_sink_pair(io_type type)
   : type{type},
+    pinned_buffer({pinned_memory_resource(), cudf::get_default_stream()}),
     d_buffer{0, cudf::get_default_stream()},
     file_name{random_file_in_dir(tmpdir.path())},
     void_sink{cudf::io::data_sink::create()}
diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp
index d4f39a5f243..64d6021cf50 100644
--- a/cpp/benchmarks/io/cuio_common.hpp
+++ b/cpp/benchmarks/io/cuio_common.hpp
@@ -18,7 +18,7 @@
 
 #include <cudf_test/file_utilities.hpp>
 
-#include <cudf/detail/utilities/pinned_host_vector.hpp>
+#include <cudf/detail/utilities/host_vector.hpp>
 #include <cudf/io/data_sink.hpp>
 #include <cudf/io/datasource.hpp>
 
@@ -79,7 +79,7 @@ class cuio_source_sink_pair {
 
   io_type const type;
   std::vector<char> h_buffer;
-  cudf::detail::pinned_host_vector<char> pinned_buffer;
+  cudf::detail::host_vector<char> pinned_buffer;
   rmm::device_uvector<std::byte> d_buffer;
   std::string const file_name;
   std::unique_ptr<cudf::io::data_sink> void_sink;
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp
index a67d1932951..b4c8ed78ed8 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp
@@ -20,9 +20,9 @@
 #include <benchmarks/io/nvbench_helpers.hpp>
 
 #include <cudf/detail/utilities/stream_pool.hpp>
-#include <cudf/io/memory_resource.hpp>
 #include <cudf/io/parquet.hpp>
 #include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
 #include <cudf/utilities/thread_pool.hpp>
 
 #include <nvtx3/nvtx3.hpp>
diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
index b5d855d8881..67705863d41 100644
--- a/cpp/benchmarks/io/text/multibyte_split.cpp
+++ b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,7 +22,6 @@
 #include <cudf_test/file_utilities.hpp>
 
 #include <cudf/column/column_factories.hpp>
-#include <cudf/detail/utilities/pinned_host_vector.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 #include <cudf/io/text/detail/bgzip_utils.hpp>
@@ -132,9 +131,10 @@ static void bench_multibyte_split(nvbench::state& state,
 
   auto const delim_factor = static_cast<double>(delim_percent) / 100;
   std::unique_ptr<cudf::io::datasource> datasource;
-  auto device_input      = create_random_input(file_size_approx, delim_factor, 0.05, delim);
-  auto host_input        = std::vector<char>{};
-  auto host_pinned_input = cudf::detail::pinned_host_vector<char>{};
+  auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim);
+  auto host_input   = std::vector<char>{};
+  auto host_pinned_input =
+    cudf::detail::make_pinned_vector_async<char>(0, cudf::get_default_stream());
 
   if (source_type != data_chunk_source_type::device &&
       source_type != data_chunk_source_type::host_pinned) {
diff --git a/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp b/cpp/include/cudf/detail/utilities/host_vector.hpp
similarity index 93%
rename from cpp/include/cudf/detail/utilities/rmm_host_vector.hpp
rename to cpp/include/cudf/detail/utilities/host_vector.hpp
index 6901a19473e..6a115177ab5 100644
--- a/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp
+++ b/cpp/include/cudf/detail/utilities/host_vector.hpp
@@ -19,6 +19,7 @@
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 
+#include <rmm/aligned.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <thrust/host_vector.h>
@@ -32,8 +33,6 @@ namespace cudf::detail {
 /*! \p rmm_host_allocator is a CUDA-specific host memory allocator
  *  that employs \c a `rmm::host_async_resource_ref` for allocation.
  *
- * This implementation is ported from pinned_host_vector in cudf.
- *
  *  \see https://en.cppreference.com/w/cpp/memory/allocator
  */
 template <typename T>
@@ -42,8 +41,6 @@ class rmm_host_allocator;
 /*! \p rmm_host_allocator is a CUDA-specific host memory allocator
  *  that employs \c an `cudf::host_async_resource_ref` for allocation.
  *
- * This implementation is ported from pinned_host_vector in cudf.
- *
  *  \see https://en.cppreference.com/w/cpp/memory/allocator
  */
 template <>
@@ -70,8 +67,7 @@ class rmm_host_allocator<void> {
  * The \p rmm_host_allocator provides an interface for host memory allocation through the user
  * provided \c `rmm::host_async_resource_ref`. The \p rmm_host_allocator does not take ownership of
  * this reference and therefore it is the user's responsibility to ensure its lifetime for the
- * duration of the lifetime of the \p rmm_host_allocator. This implementation is ported from
- * pinned_host_vector in cudf.
+ * duration of the lifetime of the \p rmm_host_allocator.
  *
  *  \see https://en.cppreference.com/w/cpp/memory/allocator
  */
@@ -121,8 +117,12 @@ class rmm_host_allocator {
   inline pointer allocate(size_type cnt)
   {
     if (cnt > this->max_size()) { throw std::bad_alloc(); }  // end if
-    return static_cast<pointer>(
-      mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream));
+    auto const result =
+      mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
+    // Synchronize to ensure the memory is allocated before thrust::host_vector initialization
+    // TODO: replace thrust::host_vector with a type that does not require synchronization
+    stream.synchronize();
+    return static_cast<pointer>(result);
   }
 
   /**
@@ -182,6 +182,6 @@ class rmm_host_allocator {
  * @brief A vector class with rmm host memory allocator
  */
 template <typename T>
-using rmm_host_vector = thrust::host_vector<T, rmm_host_allocator<T>>;
+using host_vector = thrust::host_vector<T, rmm_host_allocator<T>>;
 
 }  // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp b/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp
deleted file mode 100644
index c22b6a6ba15..00000000000
--- a/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- *  Copyright (c) 2008-2024, NVIDIA CORPORATION
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <cudf/utilities/error.hpp>
-
-#include <thrust/host_vector.h>
-
-#include <cstddef>
-#include <limits>
-#include <new>  // for bad_alloc
-
-namespace cudf::detail {
-
-/*! \p pinned_allocator is a CUDA-specific host memory allocator
- *  that employs \c cudaMallocHost for allocation.
- *
- * This implementation is ported from the experimental/pinned_allocator
- * that Thrust used to provide.
- *
- *  \see https://en.cppreference.com/w/cpp/memory/allocator
- */
-template <typename T>
-class pinned_allocator;
-
-/*! \p pinned_allocator is a CUDA-specific host memory allocator
- *  that employs \c cudaMallocHost for allocation.
- *
- * This implementation is ported from the experimental/pinned_allocator
- * that Thrust used to provide.
- *
- *  \see https://en.cppreference.com/w/cpp/memory/allocator
- */
-template <>
-class pinned_allocator<void> {
- public:
-  using value_type      = void;            ///< The type of the elements in the allocator
-  using pointer         = void*;           ///< The type returned by address() / allocate()
-  using const_pointer   = void const*;     ///< The type returned by address()
-  using size_type       = std::size_t;     ///< The type used for the size of the allocation
-  using difference_type = std::ptrdiff_t;  ///< The type of the distance between two pointers
-
-  /**
-   * @brief converts a `pinned_allocator<void>` to `pinned_allocator<U>`
-   */
-  template <typename U>
-  struct rebind {
-    using other = pinned_allocator<U>;  ///< The rebound type
-  };
-};
-
-/*! \p pinned_allocator is a CUDA-specific host memory allocator
- *  that employs \c cudaMallocHost for allocation.
- *
- * This implementation is ported from the experimental/pinned_allocator
- * that Thrust used to provide.
- *
- *  \see https://en.cppreference.com/w/cpp/memory/allocator
- */
-template <typename T>
-class pinned_allocator {
- public:
-  using value_type      = T;               ///< The type of the elements in the allocator
-  using pointer         = T*;              ///< The type returned by address() / allocate()
-  using const_pointer   = T const*;        ///< The type returned by address()
-  using reference       = T&;              ///< The parameter type for address()
-  using const_reference = T const&;        ///< The parameter type for address()
-  using size_type       = std::size_t;     ///< The type used for the size of the allocation
-  using difference_type = std::ptrdiff_t;  ///< The type of the distance between two pointers
-
-  /**
-   * @brief converts a `pinned_allocator<T>` to `pinned_allocator<U>`
-   */
-  template <typename U>
-  struct rebind {
-    using other = pinned_allocator<U>;  ///< The rebound type
-  };
-
-  /**
-   * @brief pinned_allocator's null constructor does nothing.
-   */
-  __host__ __device__ inline pinned_allocator() {}
-
-  /**
-   * @brief pinned_allocator's null destructor does nothing.
-   */
-  __host__ __device__ inline ~pinned_allocator() {}
-
-  /**
-   * @brief pinned_allocator's copy constructor does nothing.
-   */
-  __host__ __device__ inline pinned_allocator(pinned_allocator const&) {}
-
-  /**
-   * @brief  pinned_allocator's copy constructor does nothing.
-   *
-   *  This version of pinned_allocator's copy constructor
-   *  is templated on the \c value_type of the pinned_allocator
-   *  to copy from.  It is provided merely for convenience; it
-   *  does nothing.
-   */
-  template <typename U>
-  __host__ __device__ inline pinned_allocator(pinned_allocator<U> const&)
-  {
-  }
-
-  /**
-   * @brief This method returns the address of a \c reference of
-   *  interest.
-   *
-   *  @param r The \c reference of interest.
-   *  @return \c r's address.
-   */
-  __host__ __device__ inline pointer address(reference r) { return &r; }
-
-  /**
-   * @brief This method returns the address of a \c const_reference
-   *  of interest.
-   *
-   *  @param r The \c const_reference of interest.
-   *  @return \c r's address.
-   */
-  __host__ __device__ inline const_pointer address(const_reference r) { return &r; }
-
-  /**
-   * @brief This method allocates storage for objects in pinned host
-   *  memory.
-   *
-   *  @param cnt The number of objects to allocate.
-   *  @return a \c pointer to the newly allocated objects.
-   *  @note The second parameter to this function is meant as a
-   *        hint pointer to a nearby memory location, but is
-   *        not used by this allocator.
-   *  @note This method does not invoke \p value_type's constructor.
-   *        It is the responsibility of the caller to initialize the
-   *        objects at the returned \c pointer.
-   */
-  __host__ inline pointer allocate(size_type cnt, const_pointer /*hint*/ = 0)
-  {
-    if (cnt > this->max_size()) { throw std::bad_alloc(); }  // end if
-
-    pointer result(0);
-    CUDF_CUDA_TRY(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
-    return result;
-  }
-
-  /**
-   * @brief This method deallocates pinned host memory previously allocated
-   *  with this \c pinned_allocator.
-   *
-   *  @param p A \c pointer to the previously allocated memory.
-   *  @note The second parameter is the number of objects previously allocated
-   *        but is ignored by this allocator.
-   *  @note This method does not invoke \p value_type's destructor.
-   *        It is the responsibility of the caller to destroy
-   *        the objects stored at \p p.
-   */
-  __host__ inline void deallocate(pointer p, size_type /*cnt*/)
-  {
-    auto dealloc_worked = cudaFreeHost(p);
-    (void)dealloc_worked;
-    assert(dealloc_worked == cudaSuccess);
-  }
-
-  /**
-   * @brief This method returns the maximum size of the \c cnt parameter
-   *  accepted by the \p allocate() method.
-   *
-   *  @return The maximum number of objects that may be allocated
-   *          by a single call to \p allocate().
-   */
-  inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); }
-
-  /**
-   * @brief This method tests this \p pinned_allocator for equality to
-   *  another.
-   *
-   *  @param x The other \p pinned_allocator of interest.
-   *  @return This method always returns \c true.
-   */
-  __host__ __device__ inline bool operator==(pinned_allocator const& x) const { return true; }
-
-  /**
-   * @brief This method tests this \p pinned_allocator for inequality
-   *  to another.
-   *
-   *  @param x The other \p pinned_allocator of interest.
-   *  @return This method always returns \c false.
-   */
-  __host__ __device__ inline bool operator!=(pinned_allocator const& x) const
-  {
-    return !operator==(x);
-  }
-};
-
-/**
- * @brief A vector class with pinned host memory allocator
- */
-template <typename T>
-using pinned_host_vector = thrust::host_vector<T, pinned_allocator<T>>;
-
-}  // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp
index 293a4096c57..20cb55bb1c7 100644
--- a/cpp/include/cudf/detail/utilities/vector_factories.hpp
+++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp
@@ -21,8 +21,10 @@
  * @file vector_factories.hpp
  */
 
+#include <cudf/detail/utilities/host_vector.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
 #include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -380,7 +382,7 @@ thrust::host_vector<T> make_host_vector_async(device_span<T const> v, rmm::cuda_
  * @brief Asynchronously construct a `std::vector` containing a copy of data from a device
  * container
  *
- * @note This function synchronizes `stream`.
+ * @note This function does not synchronize `stream`.
  *
  * @tparam Container The type of the container to copy from
  * @tparam T The type of the data to copy
@@ -439,6 +441,40 @@ thrust::host_vector<typename Container::value_type> make_host_vector_sync(
   return make_host_vector_sync(device_span<typename Container::value_type const>{c}, stream);
 }
 
+/**
+ * @brief Asynchronously construct a pinned `cudf::detail::host_vector` of the given size
+ *
+ * @note This function may not synchronize `stream`.
+ *
+ * @tparam T The type of the vector data
+ * @param size The number of elements in the created vector
+ * @param stream The stream on which to allocate memory
+ * @return A host_vector of the given size
+ */
+template <typename T>
+host_vector<T> make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream)
+{
+  return host_vector<T>(size, {cudf::get_pinned_memory_resource(), stream});
+}
+
+/**
+ * @brief Synchronously construct a pinned `cudf::detail::host_vector` of the given size
+ *
+ * @note This function synchronizes `stream`.
+ *
+ * @tparam T The type of the vector data
+ * @param size The number of elements in the created vector
+ * @param stream The stream on which to allocate memory
+ * @return A host_vector of the given size
+ */
+template <typename T>
+host_vector<T> make_pinned_vector_sync(size_t size, rmm::cuda_stream_view stream)
+{
+  auto result = make_pinned_vector_async<T>(size, stream);
+  stream.synchronize();
+  return result;
+}
+
 }  // namespace detail
 
 }  // namespace cudf
diff --git a/cpp/include/cudf/io/memory_resource.hpp b/cpp/include/cudf/io/memory_resource.hpp
deleted file mode 100644
index a36e220ae7b..00000000000
--- a/cpp/include/cudf/io/memory_resource.hpp
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <rmm/resource_ref.hpp>
-
-#include <optional>
-
-namespace cudf::io {
-
-/**
- * @brief Set the rmm resource to be used for host memory allocations by
- * cudf::detail::hostdevice_vector
- *
- * hostdevice_vector is a utility class that uses a pair of host and device-side buffers for
- * bouncing state between the cpu and the gpu. The resource set with this function (typically a
- * pinned memory allocator) is what it uses to allocate space for it's host-side buffer.
- *
- * @param mr The rmm resource to be used for host-side allocations
- * @return The previous resource that was in use
- */
-rmm::host_async_resource_ref set_host_memory_resource(rmm::host_async_resource_ref mr);
-
-/**
- * @brief Get the rmm resource being used for host memory allocations by
- * cudf::detail::hostdevice_vector
- *
- * @return The rmm resource used for host-side allocations
- */
-rmm::host_async_resource_ref get_host_memory_resource();
-
-/**
- * @brief Options to configure the default host memory resource
- */
-struct host_mr_options {
-  std::optional<size_t> pool_size;  ///< The size of the pool to use for the default host memory
-                                    ///< resource. If not set, the default pool size is used.
-};
-
-/**
- * @brief Configure the size of the default host memory resource.
- *
- * @throws cudf::logic_error if called after the default host memory resource has been created
- *
- * @param opts Options to configure the default host memory resource
- * @return True if this call successfully configured the host memory resource, false if a
- * a resource was already configured.
- */
-bool config_default_host_memory_resource(host_mr_options const& opts);
-
-}  // namespace cudf::io
diff --git a/cpp/include/cudf/utilities/pinned_memory.hpp b/cpp/include/cudf/utilities/pinned_memory.hpp
new file mode 100644
index 00000000000..b423eab6d38
--- /dev/null
+++ b/cpp/include/cudf/utilities/pinned_memory.hpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <rmm/resource_ref.hpp>
+
+#include <optional>
+
+namespace cudf {
+
+/**
+ * @brief Set the rmm resource to be used for pinned memory allocations.
+ *
+ * @param mr The rmm resource to be used for pinned allocations
+ * @return The previous resource that was in use
+ */
+rmm::host_device_async_resource_ref set_pinned_memory_resource(
+  rmm::host_device_async_resource_ref mr);
+
+/**
+ * @brief Get the rmm resource being used for pinned memory allocations.
+ *
+ * @return The rmm resource used for pinned allocations
+ */
+rmm::host_device_async_resource_ref get_pinned_memory_resource();
+
+/**
+ * @brief Options to configure the default pinned memory resource
+ */
+struct pinned_mr_options {
+  std::optional<size_t> pool_size;  ///< The size of the pool to use for the default pinned memory
+                                    ///< resource. If not set, the default pool size is used.
+};
+
+/**
+ * @brief Configure the size of the default pinned memory resource.
+ *
+ * @param opts Options to configure the default pinned memory resource
+ * @return True if this call successfully configured the pinned memory resource, false if a
+ * a resource was already configured.
+ */
+bool config_default_pinned_memory_resource(pinned_mr_options const& opts);
+
+}  // namespace cudf
diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu
index 5dee0c17a33..05faded651d 100644
--- a/cpp/src/io/csv/reader_impl.cu
+++ b/cpp/src/io/csv/reader_impl.cu
@@ -27,6 +27,7 @@
 #include "io/utilities/parsing_utils.cuh"
 
 #include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/detail/utilities/logger.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/detail/utilities/visitor_overload.hpp>
 #include <cudf/io/csv.hpp>
diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu
index 5034aa14a95..43301826003 100644
--- a/cpp/src/io/orc/reader_impl_chunking.cu
+++ b/cpp/src/io/orc/reader_impl_chunking.cu
@@ -22,6 +22,7 @@
 
 #include <cudf/detail/timezone.hpp>
 #include <cudf/detail/utilities/integer_utils.hpp>
+#include <cudf/detail/utilities/logger.hpp>
 #include <cudf/utilities/error.hpp>
 
 #include <rmm/device_buffer.hpp>
diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu
index 344e216cdc8..e9e031a407a 100644
--- a/cpp/src/io/orc/writer_impl.cu
+++ b/cpp/src/io/orc/writer_impl.cu
@@ -27,7 +27,6 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
-#include <cudf/detail/utilities/pinned_host_vector.hpp>
 #include <cudf/detail/utilities/stream_pool.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/strings/strings_column_view.hpp>
@@ -2339,7 +2338,7 @@ auto convert_table_to_orc_data(table_view const& input,
                       std::move(streams),
                       std::move(stripes),
                       std::move(stripe_dicts.views),
-                      cudf::detail::pinned_host_vector<uint8_t>()};
+                      cudf::detail::make_pinned_vector_async<uint8_t>(0, stream)};
   }
 
   // Allocate intermediate output stream buffer
@@ -2407,7 +2406,7 @@ auto convert_table_to_orc_data(table_view const& input,
     return max_stream_size;
   }();
 
-  cudf::detail::pinned_host_vector<uint8_t> bounce_buffer(max_out_stream_size);
+  auto bounce_buffer = cudf::detail::make_pinned_vector_async<uint8_t>(max_out_stream_size, stream);
 
   auto intermediate_stats = gather_statistic_blobs(stats_freq, orc_table, segmentation, stream);
 
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp
index eb653c6b9ac..9de8a9e2719 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.cpp
+++ b/cpp/src/io/parquet/reader_impl_helpers.cpp
@@ -23,6 +23,8 @@
 #include "ipc/Message_generated.h"
 #include "ipc/Schema_generated.h"
 
+#include <cudf/detail/utilities/logger.hpp>
+
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
 
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index 1dfced94f5b..6d466748c17 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -36,7 +36,6 @@
 #include <cudf/detail/get_value.cuh>
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/detail/utilities/linked_column.hpp>
-#include <cudf/detail/utilities/pinned_host_vector.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/lists/detail/dremel.hpp>
 #include <cudf/lists/lists_column_view.hpp>
@@ -2278,7 +2277,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta,
   }
 
   auto bounce_buffer =
-    cudf::detail::pinned_host_vector<uint8_t>(all_device_write ? 0 : max_write_size);
+    cudf::detail::make_pinned_vector_async<uint8_t>(all_device_write ? 0 : max_write_size, stream);
 
   return std::tuple{std::move(agg_meta),
                     std::move(pages),
diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu
index faa09e586ab..0e3ce779089 100644
--- a/cpp/src/io/text/bgzip_data_chunk_source.cu
+++ b/cpp/src/io/text/bgzip_data_chunk_source.cu
@@ -19,8 +19,9 @@
 #include "io/utilities/config_utils.hpp"
 
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/host_vector.hpp>
 #include <cudf/detail/utilities/integer_utils.hpp>
-#include <cudf/detail/utilities/pinned_host_vector.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 #include <cudf/io/text/detail/bgzip_utils.hpp>
 #include <cudf/utilities/default_stream.hpp>
@@ -66,7 +67,7 @@ struct bgzip_nvcomp_transform_functor {
 class bgzip_data_chunk_reader : public data_chunk_reader {
  private:
   template <typename T>
-  static void copy_to_device(cudf::detail::pinned_host_vector<T> const& host,
+  static void copy_to_device(cudf::detail::host_vector<T> const& host,
                              rmm::device_uvector<T>& device,
                              rmm::cuda_stream_view stream)
   {
@@ -84,9 +85,9 @@ class bgzip_data_chunk_reader : public data_chunk_reader {
       1 << 16;  // 64k offset allocation, resized on demand
 
     cudaEvent_t event;
-    cudf::detail::pinned_host_vector<char> h_compressed_blocks;
-    cudf::detail::pinned_host_vector<std::size_t> h_compressed_offsets;
-    cudf::detail::pinned_host_vector<std::size_t> h_decompressed_offsets;
+    cudf::detail::host_vector<char> h_compressed_blocks;
+    cudf::detail::host_vector<std::size_t> h_compressed_offsets;
+    cudf::detail::host_vector<std::size_t> h_decompressed_offsets;
     rmm::device_uvector<char> d_compressed_blocks;
     rmm::device_uvector<char> d_decompressed_blocks;
     rmm::device_uvector<std::size_t> d_compressed_offsets;
@@ -103,7 +104,10 @@ class bgzip_data_chunk_reader : public data_chunk_reader {
     bool is_decompressed{};
 
     decompression_blocks(rmm::cuda_stream_view init_stream)
-      : d_compressed_blocks(0, init_stream),
+      : h_compressed_blocks{cudf::detail::make_pinned_vector_async<char>(0, init_stream)},
+        h_compressed_offsets{cudf::detail::make_pinned_vector_async<std::size_t>(0, init_stream)},
+        h_decompressed_offsets{cudf::detail::make_pinned_vector_async<std::size_t>(0, init_stream)},
+        d_compressed_blocks(0, init_stream),
         d_decompressed_blocks(0, init_stream),
         d_compressed_offsets(0, init_stream),
         d_decompressed_offsets(0, init_stream),
diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp
index 9d1d0498ace..596ca3458c8 100644
--- a/cpp/src/io/text/data_chunk_source_factories.cpp
+++ b/cpp/src/io/text/data_chunk_source_factories.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,10 +14,12 @@
  * limitations under the License.
  */
 
+#include "cudf/utilities/default_stream.hpp"
 #include "io/text/device_data_chunks.hpp"
 
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/pinned_host_vector.hpp>
+#include <cudf/detail/utilities/host_vector.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 
 #include <rmm/device_buffer.hpp>
@@ -31,8 +33,15 @@ namespace cudf::io::text {
 namespace {
 
 struct host_ticket {
-  cudaEvent_t event;
-  cudf::detail::pinned_host_vector<char> buffer;
+  cudaEvent_t event{};  // tracks the completion of the last device-to-host copy.
+  cudf::detail::host_vector<char> buffer;
+
+  host_ticket() : buffer{cudf::detail::make_pinned_vector_sync<char>(0, cudf::get_default_stream())}
+  {
+    cudaEventCreate(&event);
+  }
+
+  ~host_ticket() { cudaEventDestroy(event); }
 };
 
 /**
@@ -43,20 +52,7 @@ class datasource_chunk_reader : public data_chunk_reader {
   constexpr static int num_tickets = 2;
 
  public:
-  datasource_chunk_reader(datasource* source) : _source(source)
-  {
-    // create an event to track the completion of the last device-to-host copy.
-    for (auto& ticket : _tickets) {
-      CUDF_CUDA_TRY(cudaEventCreate(&(ticket.event)));
-    }
-  }
-
-  ~datasource_chunk_reader() override
-  {
-    for (auto& ticket : _tickets) {
-      CUDF_CUDA_TRY(cudaEventDestroy(ticket.event));
-    }
-  }
+  datasource_chunk_reader(datasource* source) : _source(source) {}
 
   void skip_bytes(std::size_t size) override
   {
@@ -84,7 +80,9 @@ class datasource_chunk_reader : public data_chunk_reader {
       CUDF_CUDA_TRY(cudaEventSynchronize(h_ticket.event));
 
       // resize the host buffer as necessary to contain the requested number of bytes
-      if (h_ticket.buffer.size() < read_size) { h_ticket.buffer.resize(read_size); }
+      if (h_ticket.buffer.size() < read_size) {
+        h_ticket.buffer = cudf::detail::make_pinned_vector_sync<char>(read_size, stream);
+      }
 
       _source->host_read(_offset, read_size, reinterpret_cast<uint8_t*>(h_ticket.buffer.data()));
 
@@ -120,17 +118,6 @@ class istream_data_chunk_reader : public data_chunk_reader {
   istream_data_chunk_reader(std::unique_ptr<std::istream> datastream)
     : _datastream(std::move(datastream))
   {
-    // create an event to track the completion of the last device-to-host copy.
-    for (auto& ticket : _tickets) {
-      CUDF_CUDA_TRY(cudaEventCreate(&(ticket.event)));
-    }
-  }
-
-  ~istream_data_chunk_reader() override
-  {
-    for (auto& ticket : _tickets) {
-      CUDF_CUDA_TRY(cudaEventDestroy(ticket.event));
-    }
   }
 
   void skip_bytes(std::size_t size) override { _datastream->ignore(size); };
@@ -148,7 +135,9 @@ class istream_data_chunk_reader : public data_chunk_reader {
     CUDF_CUDA_TRY(cudaEventSynchronize(h_ticket.event));
 
     // resize the host buffer as necessary to contain the requested number of bytes
-    if (h_ticket.buffer.size() < read_size) { h_ticket.buffer.resize(read_size); }
+    if (h_ticket.buffer.size() < read_size) {
+      h_ticket.buffer = cudf::detail::make_pinned_vector_sync<char>(read_size, stream);
+    }
 
     // read data from the host istream in to the pinned host memory buffer
     _datastream->read(h_ticket.buffer.data(), read_size);
diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp
index dad1135e766..20ac89b4d53 100644
--- a/cpp/src/io/utilities/config_utils.cpp
+++ b/cpp/src/io/utilities/config_utils.cpp
@@ -16,22 +16,12 @@
 
 #include "config_utils.hpp"
 
-#include <cudf/detail/utilities/stream_pool.hpp>
-#include <cudf/io/memory_resource.hpp>
 #include <cudf/utilities/error.hpp>
-#include <cudf/utilities/export.hpp>
-
-#include <rmm/cuda_device.hpp>
-#include <rmm/mr/device/pool_memory_resource.hpp>
-#include <rmm/mr/pinned_host_memory_resource.hpp>
-#include <rmm/resource_ref.hpp>
 
 #include <cstdlib>
 #include <string>
 
-namespace cudf::io {
-
-namespace detail {
+namespace cudf::io::detail {
 
 namespace cufile_integration {
 
@@ -90,204 +80,4 @@ bool is_stable_enabled() { return is_all_enabled() or get_env_policy() == usage_
 
 }  // namespace nvcomp_integration
 
-}  // namespace detail
-
-namespace {
-class fixed_pinned_pool_memory_resource {
-  using upstream_mr    = rmm::mr::pinned_host_memory_resource;
-  using host_pooled_mr = rmm::mr::pool_memory_resource<upstream_mr>;
-
- private:
-  upstream_mr upstream_mr_{};
-  size_t pool_size_{0};
-  // Raw pointer to avoid a segfault when the pool is destroyed on exit
-  host_pooled_mr* pool_{nullptr};
-  void* pool_begin_{nullptr};
-  void* pool_end_{nullptr};
-  cuda::stream_ref stream_{cudf::detail::global_cuda_stream_pool().get_stream().value()};
-
- public:
-  fixed_pinned_pool_memory_resource(size_t size)
-    : pool_size_{size}, pool_{new host_pooled_mr(upstream_mr_, size, size)}
-  {
-    if (pool_size_ == 0) { return; }
-
-    // Allocate full size from the pinned pool to figure out the beginning and end address
-    pool_begin_ = pool_->allocate_async(pool_size_, stream_);
-    pool_end_   = static_cast<void*>(static_cast<uint8_t*>(pool_begin_) + pool_size_);
-    pool_->deallocate_async(pool_begin_, pool_size_, stream_);
-  }
-
-  void* do_allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream)
-  {
-    if (bytes <= pool_size_) {
-      try {
-        return pool_->allocate_async(bytes, alignment, stream);
-      } catch (...) {
-        // If the pool is exhausted, fall back to the upstream memory resource
-      }
-    }
-
-    return upstream_mr_.allocate_async(bytes, alignment, stream);
-  }
-
-  void do_deallocate_async(void* ptr,
-                           std::size_t bytes,
-                           std::size_t alignment,
-                           cuda::stream_ref stream) noexcept
-  {
-    if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr <= pool_end_) {
-      pool_->deallocate_async(ptr, bytes, alignment, stream);
-    } else {
-      upstream_mr_.deallocate_async(ptr, bytes, alignment, stream);
-    }
-  }
-
-  void* allocate_async(std::size_t bytes, cuda::stream_ref stream)
-  {
-    return do_allocate_async(bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
-  }
-
-  void* allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream)
-  {
-    return do_allocate_async(bytes, alignment, stream);
-  }
-
-  void* allocate(std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
-  {
-    auto const result = do_allocate_async(bytes, alignment, stream_);
-    stream_.wait();
-    return result;
-  }
-
-  void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream) noexcept
-  {
-    return do_deallocate_async(ptr, bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
-  }
-
-  void deallocate_async(void* ptr,
-                        std::size_t bytes,
-                        std::size_t alignment,
-                        cuda::stream_ref stream) noexcept
-  {
-    return do_deallocate_async(ptr, bytes, alignment, stream);
-  }
-
-  void deallocate(void* ptr,
-                  std::size_t bytes,
-                  std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept
-  {
-    deallocate_async(ptr, bytes, alignment, stream_);
-    stream_.wait();
-  }
-
-  bool operator==(fixed_pinned_pool_memory_resource const& other) const
-  {
-    return pool_ == other.pool_ and stream_ == other.stream_;
-  }
-
-  bool operator!=(fixed_pinned_pool_memory_resource const& other) const
-  {
-    return !operator==(other);
-  }
-
-  [[maybe_unused]] friend void get_property(fixed_pinned_pool_memory_resource const&,
-                                            cuda::mr::device_accessible) noexcept
-  {
-  }
-
-  [[maybe_unused]] friend void get_property(fixed_pinned_pool_memory_resource const&,
-                                            cuda::mr::host_accessible) noexcept
-  {
-  }
-};
-
-static_assert(cuda::mr::resource_with<fixed_pinned_pool_memory_resource,
-                                      cuda::mr::device_accessible,
-                                      cuda::mr::host_accessible>,
-              "");
-
-}  // namespace
-
-CUDF_EXPORT rmm::host_async_resource_ref& make_default_pinned_mr(std::optional<size_t> config_size)
-{
-  static fixed_pinned_pool_memory_resource mr = [config_size]() {
-    auto const size = [&config_size]() -> size_t {
-      if (auto const env_val = getenv("LIBCUDF_PINNED_POOL_SIZE"); env_val != nullptr) {
-        return std::atol(env_val);
-      }
-
-      if (config_size.has_value()) { return *config_size; }
-
-      size_t free{}, total{};
-      CUDF_CUDA_TRY(cudaMemGetInfo(&free, &total));
-      // 0.5% of the total device memory, capped at 100MB
-      return std::min(total / 200, size_t{100} * 1024 * 1024);
-    }();
-
-    // rmm requires the pool size to be a multiple of 256 bytes
-    auto const aligned_size = (size + 255) & ~255;
-    CUDF_LOG_INFO("Pinned pool size = {}", aligned_size);
-
-    // make the pool with max size equal to the initial size
-    return fixed_pinned_pool_memory_resource{aligned_size};
-  }();
-
-  static rmm::host_async_resource_ref mr_ref{mr};
-  return mr_ref;
-}
-
-CUDF_EXPORT std::mutex& host_mr_mutex()
-{
-  static std::mutex map_lock;
-  return map_lock;
-}
-
-// Must be called with the host_mr_mutex mutex held
-CUDF_EXPORT rmm::host_async_resource_ref& make_host_mr(std::optional<host_mr_options> const& opts,
-                                                       bool* did_configure = nullptr)
-{
-  static rmm::host_async_resource_ref* mr_ref = nullptr;
-  bool configured                             = false;
-  if (mr_ref == nullptr) {
-    configured = true;
-    mr_ref     = &make_default_pinned_mr(opts ? opts->pool_size : std::nullopt);
-  }
-
-  // If the user passed an out param to detect whether this call configured a resource
-  // set the result
-  if (did_configure != nullptr) { *did_configure = configured; }
-
-  return *mr_ref;
-}
-
-// Must be called with the host_mr_mutex mutex held
-CUDF_EXPORT rmm::host_async_resource_ref& host_mr()
-{
-  static rmm::host_async_resource_ref mr_ref = make_host_mr(std::nullopt);
-  return mr_ref;
-}
-
-rmm::host_async_resource_ref set_host_memory_resource(rmm::host_async_resource_ref mr)
-{
-  std::scoped_lock lock{host_mr_mutex()};
-  auto last_mr = host_mr();
-  host_mr()    = mr;
-  return last_mr;
-}
-
-rmm::host_async_resource_ref get_host_memory_resource()
-{
-  std::scoped_lock lock{host_mr_mutex()};
-  return host_mr();
-}
-
-bool config_default_host_memory_resource(host_mr_options const& opts)
-{
-  std::scoped_lock lock{host_mr_mutex()};
-  auto did_configure = false;
-  make_host_mr(opts, &did_configure);
-  return did_configure;
-}
-
-}  // namespace cudf::io
+}  // namespace cudf::io::detail
diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp
index 0883ac3609f..1ae27a2f4ae 100644
--- a/cpp/src/io/utilities/hostdevice_vector.hpp
+++ b/cpp/src/io/utilities/hostdevice_vector.hpp
@@ -16,11 +16,10 @@
 
 #pragma once
 
-#include "config_utils.hpp"
 #include "hostdevice_span.hpp"
 
-#include <cudf/detail/utilities/rmm_host_vector.hpp>
-#include <cudf/io/memory_resource.hpp>
+#include <cudf/detail/utilities/host_vector.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/span.hpp>
@@ -53,7 +52,7 @@ class hostdevice_vector {
   }
 
   explicit hostdevice_vector(size_t initial_size, size_t max_size, rmm::cuda_stream_view stream)
-    : h_data({cudf::io::get_host_memory_resource(), stream}), d_data(max_size, stream)
+    : h_data{make_pinned_vector_async<T>(0, stream)}, d_data(max_size, stream)
   {
     CUDF_EXPECTS(initial_size <= max_size, "initial_size cannot be larger than max_size");
 
@@ -173,7 +172,7 @@ class hostdevice_vector {
   }
 
  private:
-  cudf::detail::rmm_host_vector<T> h_data;
+  cudf::detail::host_vector<T> h_data;
   rmm::device_uvector<T> d_data;
 };
 
diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp
new file mode 100644
index 00000000000..5d2e3ac332a
--- /dev/null
+++ b/cpp/src/utilities/pinned_memory.cpp
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/detail/utilities/logger.hpp>
+#include <cudf/detail/utilities/stream_pool.hpp>
+#include <cudf/utilities/error.hpp>
+#include <cudf/utilities/export.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
+
+#include <rmm/cuda_device.hpp>
+#include <rmm/mr/device/pool_memory_resource.hpp>
+#include <rmm/mr/pinned_host_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
+
+namespace cudf {
+
+namespace {
+class fixed_pinned_pool_memory_resource {
+  using upstream_mr    = rmm::mr::pinned_host_memory_resource;
+  using host_pooled_mr = rmm::mr::pool_memory_resource<upstream_mr>;
+
+ private:
+  upstream_mr upstream_mr_{};
+  size_t pool_size_{0};
+  // Raw pointer to avoid a segfault when the pool is destroyed on exit
+  host_pooled_mr* pool_{nullptr};
+  void* pool_begin_{nullptr};
+  void* pool_end_{nullptr};
+  cuda::stream_ref stream_{cudf::detail::global_cuda_stream_pool().get_stream().value()};
+
+ public:
+  fixed_pinned_pool_memory_resource(size_t size)
+    : pool_size_{size}, pool_{new host_pooled_mr(upstream_mr_, size, size)}
+  {
+    if (pool_size_ == 0) { return; }
+
+    // Allocate full size from the pinned pool to figure out the beginning and end address
+    pool_begin_ = pool_->allocate_async(pool_size_, stream_);
+    pool_end_   = static_cast<void*>(static_cast<uint8_t*>(pool_begin_) + pool_size_);
+    pool_->deallocate_async(pool_begin_, pool_size_, stream_);
+  }
+
+  void* allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream)
+  {
+    if (bytes <= pool_size_) {
+      try {
+        return pool_->allocate_async(bytes, alignment, stream);
+      } catch (...) {
+        // If the pool is exhausted, fall back to the upstream memory resource
+      }
+    }
+
+    return upstream_mr_.allocate_async(bytes, alignment, stream);
+  }
+
+  void* allocate_async(std::size_t bytes, cuda::stream_ref stream)
+  {
+    return allocate_async(bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
+  }
+
+  void* allocate(std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
+  {
+    auto const result = allocate_async(bytes, alignment, stream_);
+    stream_.wait();
+    return result;
+  }
+
+  void deallocate_async(void* ptr,
+                        std::size_t bytes,
+                        std::size_t alignment,
+                        cuda::stream_ref stream) noexcept
+  {
+    if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr < pool_end_) {
+      pool_->deallocate_async(ptr, bytes, alignment, stream);
+    } else {
+      upstream_mr_.deallocate_async(ptr, bytes, alignment, stream);
+    }
+  }
+
+  void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream) noexcept
+  {
+    return deallocate_async(ptr, bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
+  }
+
+  void deallocate(void* ptr,
+                  std::size_t bytes,
+                  std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept
+  {
+    deallocate_async(ptr, bytes, alignment, stream_);
+    stream_.wait();
+  }
+
+  bool operator==(fixed_pinned_pool_memory_resource const& other) const
+  {
+    return pool_ == other.pool_ and stream_ == other.stream_;
+  }
+
+  bool operator!=(fixed_pinned_pool_memory_resource const& other) const
+  {
+    return !operator==(other);
+  }
+
+  friend void get_property(fixed_pinned_pool_memory_resource const&,
+                           cuda::mr::device_accessible) noexcept
+  {
+  }
+
+  friend void get_property(fixed_pinned_pool_memory_resource const&,
+                           cuda::mr::host_accessible) noexcept
+  {
+  }
+};
+
+static_assert(cuda::mr::resource_with<fixed_pinned_pool_memory_resource,
+                                      cuda::mr::device_accessible,
+                                      cuda::mr::host_accessible>,
+              "Pinned pool mr must be accessible from both host and device");
+
+CUDF_EXPORT rmm::host_device_async_resource_ref& make_default_pinned_mr(
+  std::optional<size_t> config_size)
+{
+  static fixed_pinned_pool_memory_resource mr = [config_size]() {
+    auto const size = [&config_size]() -> size_t {
+      if (auto const env_val = getenv("LIBCUDF_PINNED_POOL_SIZE"); env_val != nullptr) {
+        return std::atol(env_val);
+      }
+
+      if (config_size.has_value()) { return *config_size; }
+
+      auto const total = rmm::available_device_memory().second;
+      // 0.5% of the total device memory, capped at 100MB
+      return std::min(total / 200, size_t{100} * 1024 * 1024);
+    }();
+
+    // rmm requires the pool size to be a multiple of 256 bytes
+    auto const aligned_size = rmm::align_up(size, rmm::RMM_DEFAULT_HOST_ALIGNMENT);
+    CUDF_LOG_INFO("Pinned pool size = {}", aligned_size);
+
+    // make the pool with max size equal to the initial size
+    return fixed_pinned_pool_memory_resource{aligned_size};
+  }();
+
+  static rmm::host_device_async_resource_ref mr_ref{mr};
+  return mr_ref;
+}
+
+CUDF_EXPORT std::mutex& host_mr_mutex()
+{
+  static std::mutex map_lock;
+  return map_lock;
+}
+
+// Must be called with the host_mr_mutex mutex held
+CUDF_EXPORT rmm::host_device_async_resource_ref& make_host_mr(
+  std::optional<pinned_mr_options> const& opts, bool* did_configure = nullptr)
+{
+  static rmm::host_device_async_resource_ref* mr_ref = nullptr;
+  bool configured                                    = false;
+  if (mr_ref == nullptr) {
+    configured = true;
+    mr_ref     = &make_default_pinned_mr(opts ? opts->pool_size : std::nullopt);
+  }
+
+  // If the user passed an out param to detect whether this call configured a resource
+  // set the result
+  if (did_configure != nullptr) { *did_configure = configured; }
+
+  return *mr_ref;
+}
+
+// Must be called with the host_mr_mutex mutex held
+CUDF_EXPORT rmm::host_device_async_resource_ref& host_mr()
+{
+  static rmm::host_device_async_resource_ref mr_ref = make_host_mr(std::nullopt);
+  return mr_ref;
+}
+
+}  // namespace
+
+rmm::host_device_async_resource_ref set_pinned_memory_resource(
+  rmm::host_device_async_resource_ref mr)
+{
+  std::scoped_lock lock{host_mr_mutex()};
+  auto last_mr = host_mr();
+  host_mr()    = mr;
+  return last_mr;
+}
+
+rmm::host_device_async_resource_ref get_pinned_memory_resource()
+{
+  std::scoped_lock lock{host_mr_mutex()};
+  return host_mr();
+}
+
+bool config_default_pinned_memory_resource(pinned_mr_options const& opts)
+{
+  std::scoped_lock lock{host_mr_mutex()};
+  auto did_configure = false;
+  make_host_mr(opts, &did_configure);
+  return did_configure;
+}
+
+}  // namespace cudf
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 826f879ddc0..f6d762cc2ec 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -380,15 +380,16 @@ ConfigureTest(
 # * utilities tests -------------------------------------------------------------------------------
 ConfigureTest(
   UTILITIES_TEST
-  utilities_tests/type_list_tests.cpp
   utilities_tests/column_debug_tests.cpp
   utilities_tests/column_utilities_tests.cpp
   utilities_tests/column_wrapper_tests.cpp
+  utilities_tests/default_stream_tests.cpp
   utilities_tests/io_utilities_tests.cpp
   utilities_tests/lists_column_wrapper_tests.cpp
   utilities_tests/logger_tests.cpp
-  utilities_tests/default_stream_tests.cpp
+  utilities_tests/pinned_memory_tests.cpp
   utilities_tests/type_check_tests.cpp
+  utilities_tests/type_list_tests.cpp
 )
 
 # ##################################################################################################
diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index 57aa2721756..4c01a1fb87b 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -28,13 +28,13 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/io/arrow_io_source.hpp>
 #include <cudf/io/json.hpp>
-#include <cudf/io/memory_resource.hpp>
 #include <cudf/strings/convert/convert_fixed_point.hpp>
 #include <cudf/strings/repeat_strings.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
 
 #include <rmm/mr/pinned_host_memory_resource.hpp>
 
@@ -2068,7 +2068,7 @@ TEST_F(JsonReaderTest, JSONLinesRecoveringSync)
                     size_t{128} * 1024 * 1024};
 
   // Set new resource
-  auto last_mr = cudf::io::set_host_memory_resource(mr);
+  auto last_mr = cudf::set_pinned_memory_resource(mr);
 
   /**
    * @brief Spark has the specific need to ignore extra characters that come after the first record
@@ -2158,7 +2158,7 @@ TEST_F(JsonReaderTest, JSONLinesRecoveringSync)
     float64_wrapper{c_data.cbegin(), c_data.cend(), c_validity.cbegin()});
 
   // Restore original memory source
-  cudf::io::set_host_memory_resource(last_mr);
+  cudf::set_pinned_memory_resource(last_mr);
 }
 
 TEST_F(JsonReaderTest, MixedTypes)
diff --git a/cpp/tests/utilities_tests/io_utilities_tests.cpp b/cpp/tests/utilities_tests/io_utilities_tests.cpp
index e5a153bf781..9ed8f18f5cc 100644
--- a/cpp/tests/utilities_tests/io_utilities_tests.cpp
+++ b/cpp/tests/utilities_tests/io_utilities_tests.cpp
@@ -16,14 +16,6 @@
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
-#include <cudf_test/table_utilities.hpp>
-
-#include <cudf/io/memory_resource.hpp>
-#include <cudf/io/parquet.hpp>
-
-#include <rmm/mr/device/pool_memory_resource.hpp>
-#include <rmm/mr/pinned_host_memory_resource.hpp>
-#include <rmm/resource_ref.hpp>
 
 #include <src/io/utilities/base64_utilities.hpp>
 
@@ -32,43 +24,6 @@ using cudf::io::detail::base64_encode;
 
 class IoUtilitiesTest : public cudf::test::BaseFixture {};
 
-TEST(IoUtilitiesTest, HostMemoryGetAndSet)
-{
-  // Global environment for temporary files
-  auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
-    ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
-
-  // pinned/pooled host memory resource
-  using host_pooled_mr = rmm::mr::pool_memory_resource<rmm::mr::pinned_host_memory_resource>;
-  host_pooled_mr mr(std::make_shared<rmm::mr::pinned_host_memory_resource>().get(),
-                    size_t{128} * 1024 * 1024);
-
-  // set new resource
-  auto last_mr = cudf::io::get_host_memory_resource();
-  cudf::io::set_host_memory_resource(mr);
-
-  constexpr int num_rows = 32 * 1024;
-  auto valids =
-    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 2; });
-  auto values = thrust::make_counting_iterator(0);
-
-  cudf::test::fixed_width_column_wrapper<int> col(values, values + num_rows, valids);
-
-  cudf::table_view expected({col});
-  auto filepath = temp_env->get_temp_filepath("IoUtilsMemTest.parquet");
-  cudf::io::parquet_writer_options out_args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  cudf::io::write_parquet(out_args);
-
-  cudf::io::parquet_reader_options const read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto const result = cudf::io::read_parquet(read_opts);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, expected);
-
-  // reset memory resource back
-  cudf::io::set_host_memory_resource(last_mr);
-}
-
 TEST(IoUtilitiesTest, Base64EncodeAndDecode)
 {
   // a vector of lorem ipsum strings
diff --git a/cpp/tests/utilities_tests/pinned_memory_tests.cpp b/cpp/tests/utilities_tests/pinned_memory_tests.cpp
new file mode 100644
index 00000000000..df9103640f4
--- /dev/null
+++ b/cpp/tests/utilities_tests/pinned_memory_tests.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/table_utilities.hpp>
+
+#include <cudf/io/parquet.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
+
+#include <rmm/mr/device/pool_memory_resource.hpp>
+#include <rmm/mr/pinned_host_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
+
+class PinnedMemoryTest : public cudf::test::BaseFixture {};
+
+TEST(PinnedMemoryTest, MemoryResourceGetAndSet)
+{
+  // Global environment for temporary files
+  auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
+    ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
+
+  // pinned/pooled host memory resource
+  using host_pooled_mr = rmm::mr::pool_memory_resource<rmm::mr::pinned_host_memory_resource>;
+  host_pooled_mr mr(std::make_shared<rmm::mr::pinned_host_memory_resource>().get(),
+                    4 * 1024 * 1024);
+
+  // set new resource
+  auto last_mr = cudf::get_pinned_memory_resource();
+  cudf::set_pinned_memory_resource(mr);
+
+  constexpr int num_rows = 32 * 1024;
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 2; });
+  auto values = thrust::make_counting_iterator(0);
+
+  cudf::test::fixed_width_column_wrapper<int> col(values, values + num_rows, valids);
+
+  cudf::table_view expected({col});
+  auto filepath = temp_env->get_temp_filepath("MemoryResourceGetAndSetTest.parquet");
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_args);
+
+  cudf::io::parquet_reader_options const read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto const result = cudf::io::read_parquet(read_opts);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, expected);
+
+  // reset memory resource back
+  cudf::set_pinned_memory_resource(last_mr);
+}
diff --git a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java
index 83b801db7fb..df0d9dc7c3e 100644
--- a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java
+++ b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java
@@ -128,9 +128,9 @@ public static synchronized void initialize(long poolSize, int gpuId) {
    *
    * @param poolSize size of the pool to initialize.
    * @param gpuId    gpu id to set to get memory pool from, -1 means to use default
-   * @param setCuioHostMemoryResource true if this pinned pool should be used by cuIO for host memory
+   * @param setCudfPinnedPoolMemoryResource true if this pinned pool should be used by cuDF for pinned memory
    */
-  public static synchronized void initialize(long poolSize, int gpuId, boolean setCuioHostMemoryResource) {
+  public static synchronized void initialize(long poolSize, int gpuId, boolean setCudfPinnedPoolMemoryResource) {
     if (isInitialized()) {
       throw new IllegalStateException("Can only initialize the pool once.");
     }
@@ -139,7 +139,7 @@ public static synchronized void initialize(long poolSize, int gpuId, boolean set
       t.setDaemon(true);
       return t;
     });
-    initFuture = initService.submit(() -> new PinnedMemoryPool(poolSize, gpuId, setCuioHostMemoryResource));
+    initFuture = initService.submit(() -> new PinnedMemoryPool(poolSize, gpuId, setCudfPinnedPoolMemoryResource));
     initService.shutdown();
   }
 
@@ -216,15 +216,15 @@ public static long getTotalPoolSizeBytes() {
     return 0;
   }
 
-  private PinnedMemoryPool(long poolSize, int gpuId, boolean setCuioHostMemoryResource) {
+  private PinnedMemoryPool(long poolSize, int gpuId, boolean setCudfPinnedPoolMemoryResource) {
     if (gpuId > -1) {
       // set the gpu device to use
       Cuda.setDevice(gpuId);
       Cuda.freeZero();
     }
     this.poolHandle = Rmm.newPinnedPoolMemoryResource(poolSize, poolSize);
-    if (setCuioHostMemoryResource) {
-      Rmm.setCuioPinnedPoolMemoryResource(this.poolHandle);
+    if (setCudfPinnedPoolMemoryResource) {
+      Rmm.setCudfPinnedPoolMemoryResource(this.poolHandle);
     }
     this.poolSize = poolSize;
   }
diff --git a/java/src/main/java/ai/rapids/cudf/Rmm.java b/java/src/main/java/ai/rapids/cudf/Rmm.java
index 4dee1b7aa24..ed029c918e4 100755
--- a/java/src/main/java/ai/rapids/cudf/Rmm.java
+++ b/java/src/main/java/ai/rapids/cudf/Rmm.java
@@ -597,7 +597,7 @@ static native long newEventHandlerResourceAdaptor(long handle, long trackerHandl
 
   public static native long newPinnedPoolMemoryResource(long initSize, long maxSize);
 
-  public static native long setCuioPinnedPoolMemoryResource(long poolPtr);
+  public static native long setCudfPinnedPoolMemoryResource(long poolPtr);
 
   public static native void releasePinnedPoolMemoryResource(long poolPtr);
 
diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp
index fa78f6ca4e2..8bd0f7793b4 100644
--- a/java/src/main/native/src/RmmJni.cpp
+++ b/java/src/main/native/src/RmmJni.cpp
@@ -16,7 +16,7 @@
 
 #include "cudf_jni_apis.hpp"
 
-#include <cudf/io/memory_resource.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
 
 #include <rmm/aligned.hpp>
 #include <rmm/mr/device/aligned_resource_adaptor.hpp>
@@ -395,15 +395,17 @@ class java_debug_event_handler_memory_resource final : public java_event_handler
   }
 };
 
-inline auto& prior_cuio_host_mr()
+inline auto& prior_cudf_pinned_mr()
 {
-  static rmm::host_async_resource_ref _prior_cuio_host_mr = cudf::io::get_host_memory_resource();
-  return _prior_cuio_host_mr;
+  static rmm::host_device_async_resource_ref _prior_cudf_pinned_mr =
+    cudf::get_pinned_memory_resource();
+  return _prior_cudf_pinned_mr;
 }
 
 /**
  * This is a pinned fallback memory resource that will try to allocate `pool`
- * and if that fails, attempt to allocate from the prior resource used by cuIO `prior_cuio_host_mr`.
+ * and if that fails, attempt to allocate from the prior resource used by cuDF
+ * `prior_cudf_pinned_mr`.
  *
  * We detect whether a pointer to free is inside of the pool by checking its address (see
  * constructor)
@@ -433,7 +435,7 @@ class pinned_fallback_host_memory_resource {
 
   /**
    * @brief Allocates pinned host memory of size at least \p bytes bytes from either the
-   *        _pool argument provided, or prior_cuio_host_mr.
+   *        _pool argument provided, or prior_cudf_pinned_mr.
    *
    * @throws rmm::bad_alloc if the requested allocation could not be fulfilled due to any other
    * reason.
@@ -450,7 +452,7 @@ class pinned_fallback_host_memory_resource {
       return _pool->allocate(bytes, alignment);
     } catch (const std::exception& unused) {
       // try to allocate using the underlying pinned resource
-      return prior_cuio_host_mr().allocate(bytes, alignment);
+      return prior_cudf_pinned_mr().allocate(bytes, alignment);
     }
     // we should not reached here
     return nullptr;
@@ -459,7 +461,7 @@ class pinned_fallback_host_memory_resource {
   /**
    * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes. We attempt
    *        to deallocate from _pool, if ptr is detected to be in the pool address range,
-   *        otherwise we deallocate from `prior_cuio_host_mr`.
+   *        otherwise we deallocate from `prior_cudf_pinned_mr`.
    *
    * @param ptr Pointer to be deallocated.
    * @param bytes Size of the allocation.
@@ -472,7 +474,7 @@ class pinned_fallback_host_memory_resource {
     if (ptr >= pool_begin_ && ptr <= pool_end_) {
       _pool->deallocate(ptr, bytes, alignment);
     } else {
-      prior_cuio_host_mr().deallocate(ptr, bytes, alignment);
+      prior_cudf_pinned_mr().deallocate(ptr, bytes, alignment);
     }
   }
 
@@ -1025,7 +1027,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Rmm_newPinnedPoolMemoryResource(JNIE
   CATCH_STD(env, 0)
 }
 
-JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_setCuioPinnedPoolMemoryResource(JNIEnv* env,
+JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_setCudfPinnedPoolMemoryResource(JNIEnv* env,
                                                                                jclass clazz,
                                                                                jlong pool_ptr)
 {
@@ -1035,7 +1037,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_setCuioPinnedPoolMemoryResource(J
     // create a pinned fallback pool that will allocate pinned memory
     // if the regular pinned pool is exhausted
     pinned_fallback_mr.reset(new pinned_fallback_host_memory_resource(pool));
-    prior_cuio_host_mr() = cudf::io::set_host_memory_resource(*pinned_fallback_mr);
+    prior_cudf_pinned_mr() = cudf::set_pinned_memory_resource(*pinned_fallback_mr);
   }
   CATCH_STD(env, )
 }
@@ -1047,8 +1049,8 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_releasePinnedPoolMemoryResource(J
   try {
     cudf::jni::auto_set_device(env);
     // set the cuio host memory resource to what it was before, or the same
-    // if we didn't overwrite it with setCuioPinnedPoolMemoryResource
-    cudf::io::set_host_memory_resource(prior_cuio_host_mr());
+    // if we didn't overwrite it with setCudfPinnedPoolMemoryResource
+    cudf::set_pinned_memory_resource(prior_cudf_pinned_mr());
     pinned_fallback_mr.reset();
     delete reinterpret_cast<rmm_pinned_pool_t*>(pool_ptr);
   }
@@ -1088,7 +1090,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Rmm_allocFromFallbackPinnedPool(JNIE
                                                                             jlong size)
 {
   cudf::jni::auto_set_device(env);
-  void* ret = cudf::io::get_host_memory_resource().allocate(size);
+  void* ret = cudf::get_pinned_memory_resource().allocate(size);
   return reinterpret_cast<jlong>(ret);
 }
 
@@ -1101,7 +1103,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_freeFromFallbackPinnedPool(JNIEnv
   try {
     cudf::jni::auto_set_device(env);
     void* cptr = reinterpret_cast<void*>(ptr);
-    cudf::io::get_host_memory_resource().deallocate(cptr, size);
+    cudf::get_pinned_memory_resource().deallocate(cptr, size);
   }
   CATCH_STD(env, )
 }
@@ -1112,7 +1114,7 @@ JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_Rmm_configureDefaultCudfPinnedPoo
 {
   try {
     cudf::jni::auto_set_device(env);
-    return cudf::io::config_default_host_memory_resource(cudf::io::host_mr_options{size});
+    return cudf::config_default_pinned_memory_resource(cudf::pinned_mr_options{size});
   }
   CATCH_STD(env, false)
 }