From 388042f0d89aed671bb0c36ffdb87f6caa9be8a9 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Mon, 9 Sep 2024 20:39:26 -0700
Subject: [PATCH 01/13] [WIP] Reorganize Cython to separate C++ bindings and
 make Cython classes public

---
 python/rmm/CMakeLists.txt                     |    4 +-
 python/rmm/rmm/cpp/CMakeLists.txt             |   35 +
 python/rmm/rmm/cpp/__init__.pxd               |   13 +
 python/rmm/rmm/cpp/_torch_allocator.cpp       |   64 +
 python/rmm/rmm/cpp/cpp_cuda_stream.pxd        |   28 +
 python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd   |   23 +
 python/rmm/rmm/cpp/cpp_cuda_stream_view.pxd   |   32 +
 python/rmm/rmm/cpp/cpp_device_buffer.pxd      |   58 +
 python/rmm/rmm/cpp/cpp_device_uvector.pxd     |   39 +
 python/rmm/rmm/cpp/cpp_logger.pyx             |   66 +
 python/rmm/rmm/cpp/cpp_memory_resource.pxd    |   34 +
 python/rmm/rmm/cpp/cpp_memory_resource.pyx    |  215 +++
 .../rmm/rmm/cpp/cpp_per_device_resource.pxd   |   36 +
 python/rmm/rmm/python/CMakeLists.txt          |   28 +
 python/rmm/rmm/python/__init__.pxd            |   13 +
 python/rmm/rmm/python/__init__.py             |   15 +
 python/rmm/rmm/python/cuda_stream.pyx         |   36 +
 python/rmm/rmm/python/device_buffer.pxd       |   71 +
 python/rmm/rmm/python/device_buffer.pyx       |  559 ++++++++
 python/rmm/rmm/python/helper.pxd              |   16 +
 python/rmm/rmm/python/helper.pyx              |   78 ++
 python/rmm/rmm/python/lib.pxd                 |   17 +
 python/rmm/rmm/python/lib.pyx                 |   13 +
 python/rmm/rmm/python/logger.pyx              |  208 +++
 python/rmm/rmm/python/memory_resource.pxd     |   83 ++
 python/rmm/rmm/python/memory_resource.pyx     | 1170 +++++++++++++++++
 26 files changed, 2953 insertions(+), 1 deletion(-)
 create mode 100644 python/rmm/rmm/cpp/CMakeLists.txt
 create mode 100644 python/rmm/rmm/cpp/__init__.pxd
 create mode 100644 python/rmm/rmm/cpp/_torch_allocator.cpp
 create mode 100644 python/rmm/rmm/cpp/cpp_cuda_stream.pxd
 create mode 100644 python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd
 create mode 100644 python/rmm/rmm/cpp/cpp_cuda_stream_view.pxd
 create mode 100644 python/rmm/rmm/cpp/cpp_device_buffer.pxd
 create mode 100644 python/rmm/rmm/cpp/cpp_device_uvector.pxd
 create mode 100644 python/rmm/rmm/cpp/cpp_logger.pyx
 create mode 100644 python/rmm/rmm/cpp/cpp_memory_resource.pxd
 create mode 100644 python/rmm/rmm/cpp/cpp_memory_resource.pyx
 create mode 100644 python/rmm/rmm/cpp/cpp_per_device_resource.pxd
 create mode 100644 python/rmm/rmm/python/CMakeLists.txt
 create mode 100644 python/rmm/rmm/python/__init__.pxd
 create mode 100644 python/rmm/rmm/python/__init__.py
 create mode 100644 python/rmm/rmm/python/cuda_stream.pyx
 create mode 100644 python/rmm/rmm/python/device_buffer.pxd
 create mode 100644 python/rmm/rmm/python/device_buffer.pyx
 create mode 100644 python/rmm/rmm/python/helper.pxd
 create mode 100644 python/rmm/rmm/python/helper.pyx
 create mode 100644 python/rmm/rmm/python/lib.pxd
 create mode 100644 python/rmm/rmm/python/lib.pyx
 create mode 100644 python/rmm/rmm/python/logger.pyx
 create mode 100644 python/rmm/rmm/python/memory_resource.pxd
 create mode 100644 python/rmm/rmm/python/memory_resource.pyx

diff --git a/python/rmm/CMakeLists.txt b/python/rmm/CMakeLists.txt
index 6c2515102..4c4e603ef 100644
--- a/python/rmm/CMakeLists.txt
+++ b/python/rmm/CMakeLists.txt
@@ -30,4 +30,6 @@ rapids_cython_init()
 add_compile_definitions("SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}")
 
 add_subdirectory(rmm/_cuda)
-add_subdirectory(rmm/_lib)
+# add_subdirectory(rmm/_lib)
+add_subdirectory(rmm/cpp)
+add_subdirectory(rmm/python)
diff --git a/python/rmm/rmm/cpp/CMakeLists.txt b/python/rmm/rmm/cpp/CMakeLists.txt
new file mode 100644
index 000000000..8ac924df4
--- /dev/null
+++ b/python/rmm/rmm/cpp/CMakeLists.txt
@@ -0,0 +1,35 @@
+# =============================================================================
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+set(cython_sources cpp_logger.pyx cpp_memory_resource.pyx)
+set(linked_libraries rmm::rmm)
+
+# Build all of the Cython targets
+rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}"
+                                                                               CXX)
+
+# mark all symbols in these Cython targets "hidden" by default, so they won't collide with symbols
+# loaded from other DSOs
+foreach(_cython_target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
+  set_target_properties(${_cython_target} PROPERTIES C_VISIBILITY_PRESET hidden
+                                                     CXX_VISIBILITY_PRESET hidden)
+endforeach()
+
+add_library(_torch_allocator SHARED _torch_allocator.cpp)
+# Want the output to be called _torch_allocator.so
+set_target_properties(_torch_allocator PROPERTIES PREFIX "" SUFFIX ".so")
+target_link_libraries(_torch_allocator PRIVATE rmm::rmm)
+cmake_path(RELATIVE_PATH CMAKE_CURRENT_SOURCE_DIR BASE_DIRECTORY "${PROJECT_SOURCE_DIR}"
+           OUTPUT_VARIABLE _torch_allocator_location)
+install(TARGETS _torch_allocator DESTINATION "${_torch_allocator_location}")
diff --git a/python/rmm/rmm/cpp/__init__.pxd b/python/rmm/rmm/cpp/__init__.pxd
new file mode 100644
index 000000000..46753baa3
--- /dev/null
+++ b/python/rmm/rmm/cpp/__init__.pxd
@@ -0,0 +1,13 @@
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/rmm/rmm/cpp/_torch_allocator.cpp b/python/rmm/rmm/cpp/_torch_allocator.cpp
new file mode 100644
index 000000000..bfe94c2d0
--- /dev/null
+++ b/python/rmm/rmm/cpp/_torch_allocator.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <rmm/cuda_device.hpp>
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
+
+#include <cuda_runtime_api.h>
+
+// These signatures must match those required by CUDAPluggableAllocator in
+// github.com/pytorch/pytorch/blob/main/torch/csrc/cuda/CUDAPluggableAllocator.h
+// Since the loading is done at runtime via dlopen, no error checking
+// can be performed for mismatching signatures.
+
+/**
+ * @brief Allocate memory of at least \p size bytes.
+ *
+ * @throws rmm::bad_alloc When the requested allocation cannot be satisfied.
+ *
+ * @param size The number of bytes to allocate
+ * @param device The device whose memory resource one should use
+ * @param stream CUDA stream to perform allocation on
+ * @return Pointer to the newly allocated memory
+ */
+extern "C" void* allocate(std::size_t size, int device, void* stream)
+{
+  rmm::cuda_device_id const device_id{device};
+  rmm::cuda_set_device_raii with_device{device_id};
+  auto mr = rmm::mr::get_per_device_resource_ref(device_id);
+  return mr.allocate_async(
+    size, rmm::CUDA_ALLOCATION_ALIGNMENT, rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
+}
+
+/**
+ * @brief Deallocate memory pointed to by \p ptr.
+ *
+ * @param ptr Pointer to be deallocated
+ * @param size The number of bytes in the allocation
+ * @param device The device whose memory resource one should use
+ * @param stream CUDA stream to perform deallocation on
+ */
+extern "C" void deallocate(void* ptr, std::size_t size, int device, void* stream)
+{
+  rmm::cuda_device_id const device_id{device};
+  rmm::cuda_set_device_raii with_device{device_id};
+  auto mr = rmm::mr::get_per_device_resource_ref(device_id);
+  mr.deallocate_async(ptr,
+                      size,
+                      rmm::CUDA_ALLOCATION_ALIGNMENT,
+                      rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
+}
diff --git a/python/rmm/rmm/cpp/cpp_cuda_stream.pxd b/python/rmm/rmm/cpp/cpp_cuda_stream.pxd
new file mode 100644
index 000000000..16b66ee2b
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_cuda_stream.pxd
@@ -0,0 +1,28 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from cuda.ccudart cimport cudaStream_t
+from libcpp cimport bool
+
+from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
+
+
+cdef extern from "rmm/cuda_stream.hpp" namespace "rmm" nogil:
+    cdef cppclass cuda_stream:
+        cuda_stream() except +
+        bool is_valid() except +
+        cudaStream_t value() except +
+        cuda_stream_view view() except +
+        void synchronize() except +
+        void synchronize_no_throw()
diff --git a/python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd b/python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd
new file mode 100644
index 000000000..553b38514
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd
@@ -0,0 +1,23 @@
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
+
+
+cdef extern from "rmm/cuda_stream_pool.hpp" namespace "rmm" nogil:
+    cdef cppclass cuda_stream_pool:
+        cuda_stream_pool(size_t pool_size)
+        cuda_stream_view get_stream()
+        cuda_stream_view get_stream(size_t stream_id) except +
+        size_t get_pool_size()
diff --git a/python/rmm/rmm/cpp/cpp_cuda_stream_view.pxd b/python/rmm/rmm/cpp/cpp_cuda_stream_view.pxd
new file mode 100644
index 000000000..bf0d33c24
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_cuda_stream_view.pxd
@@ -0,0 +1,32 @@
+# Copyright (c) 2020, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from cuda.ccudart cimport cudaStream_t
+from libcpp cimport bool
+
+
+cdef extern from "rmm/cuda_stream_view.hpp" namespace "rmm" nogil:
+    cdef cppclass cuda_stream_view:
+        cuda_stream_view()
+        cuda_stream_view(cudaStream_t)
+        cudaStream_t value()
+        bool is_default()
+        bool is_per_thread_default()
+        void synchronize() except +
+
+    cdef bool operator==(cuda_stream_view const, cuda_stream_view const)
+
+    const cuda_stream_view cuda_stream_default
+    const cuda_stream_view cuda_stream_legacy
+    const cuda_stream_view cuda_stream_per_thread
diff --git a/python/rmm/rmm/cpp/cpp_device_buffer.pxd b/python/rmm/rmm/cpp/cpp_device_buffer.pxd
new file mode 100644
index 000000000..1aa7634cf
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_device_buffer.pxd
@@ -0,0 +1,58 @@
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
+from rmm.cpp.cpp_memory_resource cimport device_memory_resource
+
+
+cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil:
+    cdef cppclass cuda_device_id:
+        ctypedef int value_type
+        cuda_device_id()
+        cuda_device_id(value_type id)
+        value_type value()
+
+    cdef cuda_device_id get_current_cuda_device()
+
+cdef extern from "rmm/prefetch.hpp" namespace "rmm" nogil:
+    cdef void prefetch(const void* ptr,
+                       size_t bytes,
+                       cuda_device_id device,
+                       cuda_stream_view stream) except +
+
+cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil:
+    cdef cppclass device_buffer:
+        device_buffer()
+        device_buffer(
+            size_t size,
+            cuda_stream_view stream,
+            device_memory_resource *
+        ) except +
+        device_buffer(
+            const void* source_data,
+            size_t size,
+            cuda_stream_view stream,
+            device_memory_resource *
+        ) except +
+        device_buffer(
+            const device_buffer buf,
+            cuda_stream_view stream,
+            device_memory_resource *
+        ) except +
+        void reserve(size_t new_capacity, cuda_stream_view stream) except +
+        void resize(size_t new_size, cuda_stream_view stream) except +
+        void shrink_to_fit(cuda_stream_view stream) except +
+        void* data()
+        size_t size()
+        size_t capacity()
diff --git a/python/rmm/rmm/cpp/cpp_device_uvector.pxd b/python/rmm/rmm/cpp/cpp_device_uvector.pxd
new file mode 100644
index 000000000..2cb647e3c
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_device_uvector.pxd
@@ -0,0 +1,39 @@
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rmm.cpp.cuda_stream_view cimport cuda_stream_view
+from rmm.cpp.device_buffer cimport device_buffer
+from rmm.cpp.memory_resource cimport device_memory_resource
+
+
+cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil:
+    cdef cppclass device_uvector[T]:
+        device_uvector(size_t size, cuda_stream_view  stream) except +
+        T* element_ptr(size_t index)
+        void set_element(size_t element_index, const T& v, cuda_stream_view s)
+        void set_element_async(
+            size_t element_index,
+            const T& v,
+            cuda_stream_view s
+        ) except +
+        T front_element(cuda_stream_view s) except +
+        T back_element(cuda_stream_view s) except +
+        void reserve(size_t new_capacity, cuda_stream_view stream) except +
+        void resize(size_t new_size, cuda_stream_view stream) except +
+        void shrink_to_fit(cuda_stream_view stream) except +
+        device_buffer release()
+        size_t capacity()
+        T* data()
+        size_t size()
+        device_memory_resource* memory_resource()
diff --git a/python/rmm/rmm/cpp/cpp_logger.pyx b/python/rmm/rmm/cpp/cpp_logger.pyx
new file mode 100644
index 000000000..241a748c3
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_logger.pyx
@@ -0,0 +1,66 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from libcpp cimport bool
+
+
+cdef extern from "spdlog/common.h" namespace "spdlog::level" nogil:
+    cpdef enum logging_level "spdlog::level::level_enum":
+        """
+        The debug logging level for RMM.
+
+        Debug logging prints messages to a log file. See
+        `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+        for more information.
+
+        Valid levels, in decreasing order of verbosity, are TRACE, DEBUG,
+        INFO, WARN, ERR, CRITICAL, and OFF. Default is INFO.
+
+        Examples
+        --------
+        >>> import rmm
+        >>> rmm.logging_level.DEBUG
+        <logging_level.DEBUG: 1>
+        >>> rmm.logging_level.DEBUG.value
+        1
+        >>> rmm.logging_level.DEBUG.name
+        'DEBUG'
+
+        See Also
+        --------
+        set_logging_level : Set the debug logging level
+        get_logging_level : Get the current debug logging level
+        """
+        TRACE "spdlog::level::trace"
+        DEBUG "spdlog::level::debug"
+        INFO "spdlog::level::info"
+        WARN "spdlog::level::warn"
+        ERR "spdlog::level::err"
+        CRITICAL "spdlog::level::critical"
+        OFF "spdlog::level::off"
+
+
+cdef extern from "spdlog/spdlog.h" namespace "spdlog" nogil:
+    cdef cppclass spdlog_logger "spdlog::logger":
+        spdlog_logger() except +
+        void set_level(logging_level level)
+        logging_level level()
+        void flush() except +
+        void flush_on(logging_level level)
+        logging_level flush_level()
+        bool should_log(logging_level msg_level)
+
+
+cdef extern from "rmm/logger.hpp" namespace "rmm" nogil:
+    cdef spdlog_logger& logger() except +
diff --git a/python/rmm/rmm/cpp/cpp_memory_resource.pxd b/python/rmm/rmm/cpp/cpp_memory_resource.pxd
new file mode 100644
index 000000000..cef5d4737
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_memory_resource.pxd
@@ -0,0 +1,34 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from libcpp.pair cimport pair
+
+from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
+
+
+cdef extern from "rmm/mr/device/device_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass device_memory_resource:
+        void* allocate(size_t bytes) except +
+        void* allocate(size_t bytes, cuda_stream_view stream) except +
+        void deallocate(void* ptr, size_t bytes) except +
+        void deallocate(
+            void* ptr,
+            size_t bytes,
+            cuda_stream_view stream
+        ) except +
+
+cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil:
+    size_t percent_of_free_device_memory(int percent) except +
+    pair[size_t, size_t] available_device_memory() except +
diff --git a/python/rmm/rmm/cpp/cpp_memory_resource.pyx b/python/rmm/rmm/cpp/cpp_memory_resource.pyx
new file mode 100644
index 000000000..50e201517
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_memory_resource.pyx
@@ -0,0 +1,215 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This import is needed for Cython typing in translate_python_except_to_cpp
+# See https://github.com/cython/cython/issues/5589
+from builtins import BaseException
+
+from libc.stddef cimport size_t
+from libc.stdint cimport int8_t, int64_t
+from libcpp cimport bool
+from libcpp.optional cimport optional
+from libcpp.pair cimport pair
+from libcpp.string cimport string
+
+from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
+from rmm.cpp.cpp_memory_resource cimport device_memory_resource
+
+# Transparent handle of a C++ exception
+ctypedef pair[int, string] CppExcept
+
+cdef CppExcept translate_python_except_to_cpp(err: BaseException) noexcept:
+    """Translate a Python exception into a C++ exception handle
+
+    The returned exception handle can then be thrown by `throw_cpp_except()`,
+    which MUST be done without holding the GIL.
+
+    This is useful when C++ calls a Python function and needs to catch or
+    propagate exceptions.
+    """
+    if isinstance(err, MemoryError):
+        return CppExcept(0, str.encode(str(err)))
+    return CppExcept(-1, str.encode(str(err)))
+
+# Implementation of `throw_cpp_except()`, which throws a given `CppExcept`.
+# This function MUST be called without the GIL otherwise the thrown C++
+# exception are translated back into a Python exception.
+cdef extern from *:
+    """
+    #include <stdexcept>
+    #include <utility>
+
+    void throw_cpp_except(std::pair<int, std::string> res) {
+        switch(res.first) {
+            case 0:
+                throw rmm::out_of_memory(res.second);
+            default:
+                throw std::runtime_error(res.second);
+        }
+    }
+    """
+    void throw_cpp_except(CppExcept) nogil
+
+
+# NOTE: Keep extern declarations in .pyx file as much as possible to avoid
+# leaking dependencies when importing RMM Cython .pxd files
+cdef extern from "rmm/mr/device/cuda_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass cuda_memory_resource(device_memory_resource):
+        cuda_memory_resource() except +
+
+cdef extern from "rmm/mr/device/managed_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass managed_memory_resource(device_memory_resource):
+        managed_memory_resource() except +
+
+cdef extern from "rmm/mr/device/system_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass system_memory_resource(device_memory_resource):
+        system_memory_resource() except +
+
+cdef extern from "rmm/mr/device/sam_headroom_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass sam_headroom_memory_resource(device_memory_resource):
+        sam_headroom_memory_resource(size_t headroom) except +
+
+cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+
+    cdef cppclass cuda_async_memory_resource(device_memory_resource):
+        cuda_async_memory_resource(
+            optional[size_t] initial_pool_size,
+            optional[size_t] release_threshold,
+            optional[allocation_handle_type] export_handle_type) except +
+
+# TODO: when we adopt Cython 3.0 use enum class
+cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \
+        namespace \
+        "rmm::mr::cuda_async_memory_resource::allocation_handle_type" \
+        nogil:
+    enum allocation_handle_type \
+            "rmm::mr::cuda_async_memory_resource::allocation_handle_type":
+        none
+        posix_file_descriptor
+        win32
+        win32_kmt
+
+
+cdef extern from "rmm/mr/device/pool_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass pool_memory_resource[Upstream](device_memory_resource):
+        pool_memory_resource(
+            Upstream* upstream_mr,
+            size_t initial_pool_size,
+            optional[size_t] maximum_pool_size) except +
+        size_t pool_size()
+
+cdef extern from "rmm/mr/device/fixed_size_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass fixed_size_memory_resource[Upstream](device_memory_resource):
+        fixed_size_memory_resource(
+            Upstream* upstream_mr,
+            size_t block_size,
+            size_t block_to_preallocate) except +
+
+cdef extern from "rmm/mr/device/callback_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    ctypedef void* (*allocate_callback_t)(size_t, cuda_stream_view, void*)
+    ctypedef void (*deallocate_callback_t)(void*, size_t, cuda_stream_view, void*)
+
+    cdef cppclass callback_memory_resource(device_memory_resource):
+        callback_memory_resource(
+            allocate_callback_t allocate_callback,
+            deallocate_callback_t deallocate_callback,
+            void* allocate_callback_arg,
+            void* deallocate_callback_arg
+        ) except +
+
+cdef extern from "rmm/mr/device/binning_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass binning_memory_resource[Upstream](device_memory_resource):
+        binning_memory_resource(Upstream* upstream_mr) except +
+        binning_memory_resource(
+            Upstream* upstream_mr,
+            int8_t min_size_exponent,
+            int8_t max_size_exponent) except +
+
+        void add_bin(size_t allocation_size) except +
+        void add_bin(
+            size_t allocation_size,
+            device_memory_resource* bin_resource) except +
+
+cdef extern from "rmm/mr/device/limiting_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass limiting_resource_adaptor[Upstream](device_memory_resource):
+        limiting_resource_adaptor(
+            Upstream* upstream_mr,
+            size_t allocation_limit) except +
+
+        size_t get_allocated_bytes() except +
+        size_t get_allocation_limit() except +
+
+cdef extern from "rmm/mr/device/logging_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass logging_resource_adaptor[Upstream](device_memory_resource):
+        logging_resource_adaptor(
+            Upstream* upstream_mr,
+            string filename) except +
+
+        void flush() except +
+
+cdef extern from "rmm/mr/device/statistics_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass statistics_resource_adaptor[Upstream](device_memory_resource):
+        struct counter:
+            counter()
+
+            int64_t value
+            int64_t peak
+            int64_t total
+
+        statistics_resource_adaptor(Upstream* upstream_mr) except +
+
+        counter get_bytes_counter() except +
+        counter get_allocations_counter() except +
+        pair[counter, counter] pop_counters() except +
+        pair[counter, counter] push_counters() except +
+
+cdef extern from "rmm/mr/device/tracking_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass tracking_resource_adaptor[Upstream](device_memory_resource):
+        tracking_resource_adaptor(
+            Upstream* upstream_mr,
+            bool capture_stacks) except +
+
+        size_t get_allocated_bytes() except +
+        string get_outstanding_allocations_str() except +
+        void log_outstanding_allocations() except +
+
+cdef extern from "rmm/mr/device/failure_callback_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    ctypedef bool (*failure_callback_t)(size_t, void*)
+    cdef cppclass failure_callback_resource_adaptor[Upstream](
+        device_memory_resource
+    ):
+        failure_callback_resource_adaptor(
+            Upstream* upstream_mr,
+            failure_callback_t callback,
+            void* callback_arg
+        ) except +
+
+cdef extern from "rmm/mr/device/prefetch_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass prefetch_resource_adaptor[Upstream](device_memory_resource):
+        prefetch_resource_adaptor(Upstream* upstream_mr) except +
diff --git a/python/rmm/rmm/cpp/cpp_per_device_resource.pxd b/python/rmm/rmm/cpp/cpp_per_device_resource.pxd
new file mode 100644
index 000000000..59f651687
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_per_device_resource.pxd
@@ -0,0 +1,36 @@
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from rmm.cpp.cpp_memory_resource cimport device_memory_resource
+
+
+cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil:
+    cdef cppclass cuda_device_id:
+        ctypedef int value_type
+
+        cuda_device_id(value_type id)
+
+        value_type value()
+
+cdef extern from "rmm/mr/device/per_device_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef device_memory_resource* set_current_device_resource(
+        device_memory_resource* new_mr
+    )
+    cdef device_memory_resource* get_current_device_resource()
+    cdef device_memory_resource* set_per_device_resource(
+        cuda_device_id id, device_memory_resource* new_mr
+    )
+    cdef device_memory_resource* get_per_device_resource (
+        cuda_device_id id
+    )
diff --git a/python/rmm/rmm/python/CMakeLists.txt b/python/rmm/rmm/python/CMakeLists.txt
new file mode 100644
index 000000000..558c0ce9a
--- /dev/null
+++ b/python/rmm/rmm/python/CMakeLists.txt
@@ -0,0 +1,28 @@
+# =============================================================================
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx
+                   helper.pyx)
+set(linked_libraries rmm::rmm)
+
+# Build all of the Cython targets
+rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}"
+                                                                               CXX)
+
+# mark all symbols in these Cython targets "hidden" by default, so they won't collide with symbols
+# loaded from other DSOs
+foreach(_cython_target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
+  set_target_properties(${_cython_target} PROPERTIES C_VISIBILITY_PRESET hidden
+                                                     CXX_VISIBILITY_PRESET hidden)
+endforeach()
diff --git a/python/rmm/rmm/python/__init__.pxd b/python/rmm/rmm/python/__init__.pxd
new file mode 100644
index 000000000..46753baa3
--- /dev/null
+++ b/python/rmm/rmm/python/__init__.pxd
@@ -0,0 +1,13 @@
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/rmm/rmm/python/__init__.py b/python/rmm/rmm/python/__init__.py
new file mode 100644
index 000000000..0b8672ef6
--- /dev/null
+++ b/python/rmm/rmm/python/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .device_buffer import DeviceBuffer
diff --git a/python/rmm/rmm/python/cuda_stream.pyx b/python/rmm/rmm/python/cuda_stream.pyx
new file mode 100644
index 000000000..f1529111d
--- /dev/null
+++ b/python/rmm/rmm/python/cuda_stream.pyx
@@ -0,0 +1,36 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cimport cython
+from cuda.ccudart cimport cudaStream_t
+from libcpp cimport bool
+
+from rmm.cpp.cpp_cuda_stream cimport cuda_stream
+
+
+@cython.final
+cdef class CudaStream:
+    """
+    Wrapper around a CUDA stream with RAII semantics.
+    When a CudaStream instance is GC'd, the underlying
+    CUDA stream is destroyed.
+    """
+    def __cinit__(self):
+        self.c_obj.reset(new cuda_stream())
+
+    cdef cudaStream_t value(self) except * nogil:
+        return self.c_obj.get()[0].value()
+
+    cdef bool is_valid(self) except * nogil:
+        return self.c_obj.get()[0].is_valid()
diff --git a/python/rmm/rmm/python/device_buffer.pxd b/python/rmm/rmm/python/device_buffer.pxd
new file mode 100644
index 000000000..d8b56d2fb
--- /dev/null
+++ b/python/rmm/rmm/python/device_buffer.pxd
@@ -0,0 +1,71 @@
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from libc.stdint cimport uintptr_t
+from libcpp.memory cimport unique_ptr
+
+from rmm._cuda.stream cimport Stream
+from rmm.cpp.cpp_device_buffer cimport device_buffer
+from rmm.python.memory_resource cimport DeviceMemoryResource
+
+
+cdef class DeviceBuffer:
+    cdef unique_ptr[device_buffer] c_obj
+
+    # Holds a reference to the DeviceMemoryResource used for allocation.
+    # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is
+    # needed for deallocation
+    cdef DeviceMemoryResource mr
+
+    # Holds a reference to the stream used by the underlying `device_buffer`.
+    # Ensures the stream does not get destroyed before this DeviceBuffer
+    cdef Stream stream
+
+    @staticmethod
+    cdef DeviceBuffer c_from_unique_ptr(
+        unique_ptr[device_buffer] ptr,
+        Stream stream=*,
+        DeviceMemoryResource mr=*,
+    )
+
+    @staticmethod
+    cdef DeviceBuffer c_to_device(const unsigned char[::1] b,
+                                  Stream stream=*) except *
+    cpdef copy_to_host(self, ary=*, Stream stream=*)
+    cpdef copy_from_host(self, ary, Stream stream=*)
+    cpdef copy_from_device(self, cuda_ary, Stream stream=*)
+    cpdef bytes tobytes(self, Stream stream=*)
+
+    cdef size_t c_size(self) except *
+    cpdef void reserve(self, size_t new_capacity, Stream stream=*) except *
+    cpdef void resize(self, size_t new_size, Stream stream=*) except *
+    cpdef size_t capacity(self) except *
+    cdef void* c_data(self) except *
+
+    cdef device_buffer c_release(self) except *
+
+cpdef DeviceBuffer to_device(const unsigned char[::1] b,
+                             Stream stream=*)
+cpdef void copy_ptr_to_host(uintptr_t db,
+                            unsigned char[::1] hb,
+                            Stream stream=*) except *
+
+cpdef void copy_host_to_ptr(const unsigned char[::1] hb,
+                            uintptr_t db,
+                            Stream stream=*) except *
+
+cpdef void copy_device_to_ptr(uintptr_t d_src,
+                              uintptr_t d_dst,
+                              size_t count,
+                              Stream stream=*) except *
diff --git a/python/rmm/rmm/python/device_buffer.pyx b/python/rmm/rmm/python/device_buffer.pyx
new file mode 100644
index 000000000..1d564ff33
--- /dev/null
+++ b/python/rmm/rmm/python/device_buffer.pyx
@@ -0,0 +1,559 @@
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+
+cimport cython
+from cpython.bytes cimport PyBytes_FromStringAndSize
+from libc.stdint cimport uintptr_t
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+
+from rmm._cuda.stream cimport Stream
+
+from rmm._cuda.stream import DEFAULT_STREAM
+
+cimport cuda.ccudart as ccudart
+from cuda.ccudart cimport (
+    cudaError,
+    cudaError_t,
+    cudaMemcpyAsync,
+    cudaMemcpyKind,
+    cudaStream_t,
+)
+
+from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
+from rmm.cpp.cpp_device_buffer cimport (
+    cuda_device_id,
+    device_buffer,
+    get_current_cuda_device,
+    prefetch,
+)
+from rmm.python.memory_resource cimport (
+    DeviceMemoryResource,
+    device_memory_resource,
+    get_current_device_resource,
+)
+
+
+# The DeviceMemoryResource attribute could be released prematurely
+# by the gc if the DeviceBuffer is in a reference cycle. Removing
+# the tp_clear function with the no_gc_clear decoration prevents that.
+# See https://github.com/rapidsai/rmm/pull/931 for details.
+@cython.no_gc_clear
+cdef class DeviceBuffer:
+
+    def __cinit__(self, *,
+                  uintptr_t ptr=0,
+                  size_t size=0,
+                  Stream stream=DEFAULT_STREAM,
+                  DeviceMemoryResource mr=None):
+        """Construct a ``DeviceBuffer`` with optional size and data pointer
+
+        Parameters
+        ----------
+        ptr : int
+            pointer to some data on host or device to copy over
+        size : int
+            size of the buffer to allocate
+            (and possibly size of data to copy)
+        stream : optional
+            CUDA stream to use for construction and/or copying,
+            defaults to the CUDA default stream. A reference to the
+            stream is stored internally to ensure it doesn't go out of
+            scope while the DeviceBuffer is in use. Destroying the
+            underlying stream while the DeviceBuffer is in use will
+            result in undefined behavior.
+        mr : optional
+           DeviceMemoryResource for the allocation, if not provided
+           defaults to the current device resource.
+
+        Note
+        ----
+        If the pointer passed is non-null and ``stream`` is the default stream,
+        it is synchronized after the copy. However if a non-default ``stream``
+        is provided, this function is fully asynchronous.
+
+        Examples
+        --------
+        >>> import rmm
+        >>> db = rmm.DeviceBuffer(size=5)
+        """
+        cdef const void* c_ptr
+        cdef device_memory_resource * mr_ptr
+        # Save a reference to the MR and stream used for allocation
+        self.mr = get_current_device_resource() if mr is None else mr
+        self.stream = stream
+
+        mr_ptr = self.mr.get_mr()
+        with nogil:
+            c_ptr = <const void*>ptr
+
+            if c_ptr == NULL or size == 0:
+                self.c_obj.reset(new device_buffer(size, stream.view(), mr_ptr))
+            else:
+                self.c_obj.reset(new device_buffer(c_ptr, size, stream.view(), mr_ptr))
+
+                if stream.c_is_default():
+                    stream.c_synchronize()
+
+    def __len__(self):
+        return self.size
+
+    def __sizeof__(self):
+        return self.size
+
+    def __bytes__(self):
+        return self.tobytes()
+
+    @property
+    def nbytes(self):
+        """Gets the size of the buffer in bytes."""
+        return self.size
+
+    @property
+    def ptr(self):
+        """Gets a pointer to the underlying data."""
+        return int(<uintptr_t>self.c_data())
+
+    @property
+    def size(self):
+        """Gets the size of the buffer in bytes."""
+        return int(self.c_size())
+
+    def __reduce__(self):
+        return to_device, (self.copy_to_host(),)
+
+    @property
+    def __cuda_array_interface__(self):
+        cdef dict intf = {
+            "data": (self.ptr, False),
+            "shape": (self.size,),
+            "strides": None,
+            "typestr": "|u1",
+            "version": 0
+        }
+        return intf
+
+    def prefetch(self, device=None, stream=None):
+        """Prefetch buffer data to the specified device on the specified stream.
+
+        Assumes the storage for this DeviceBuffer is CUDA managed memory
+        (unified memory). If it is not, this function is a no-op.
+
+        Parameters
+        ----------
+        device : optional
+            The CUDA device to which to prefetch the memory for this buffer.
+            Defaults to the current CUDA device. To prefetch to the CPU, pass
+            :py:attr:`~cuda.cudart.cudaCpuDeviceId` as the device.
+        stream : optional
+            CUDA stream to use for prefetching. Defaults to self.stream
+        """
+        cdef cuda_device_id dev = (get_current_cuda_device()
+                                   if device is None
+                                   else cuda_device_id(device))
+        cdef Stream strm = self.stream if stream is None else stream
+        with nogil:
+            prefetch(self.c_obj.get()[0].data(),
+                     self.c_obj.get()[0].size(),
+                     dev,
+                     strm.view())
+
+    def copy(self):
+        """Returns a copy of DeviceBuffer.
+
+        Returns
+        -------
+        A deep copy of existing ``DeviceBuffer``
+
+        Examples
+        --------
+        >>> import rmm
+        >>> db = rmm.DeviceBuffer.to_device(b"abc")
+        >>> db_copy = db.copy()
+        >>> db.copy_to_host()
+        array([97, 98, 99], dtype=uint8)
+        >>> db_copy.copy_to_host()
+        array([97, 98, 99], dtype=uint8)
+        >>> assert db is not db_copy
+        >>> assert db.ptr != db_copy.ptr
+        """
+        ret = DeviceBuffer(ptr=self.ptr, size=self.size, stream=self.stream)
+        ret.mr = self.mr
+        return ret
+
+    def __copy__(self):
+        return self.copy()
+
+    @staticmethod
+    cdef DeviceBuffer c_from_unique_ptr(
+        unique_ptr[device_buffer] ptr,
+        Stream stream=DEFAULT_STREAM,
+        DeviceMemoryResource mr=None,
+    ):
+        cdef DeviceBuffer buf = DeviceBuffer.__new__(DeviceBuffer)
+        if stream.c_is_default():
+            stream.c_synchronize()
+        buf.c_obj = move(ptr)
+        buf.mr = get_current_device_resource() if mr is None else mr
+        buf.stream = stream
+        return buf
+
+    @staticmethod
+    cdef DeviceBuffer c_to_device(const unsigned char[::1] b,
+                                  Stream stream=DEFAULT_STREAM) except *:
+        """Calls ``to_device`` function on arguments provided"""
+        return to_device(b, stream)
+
+    @staticmethod
+    def to_device(const unsigned char[::1] b,
+                  Stream stream=DEFAULT_STREAM):
+        """Calls ``to_device`` function on arguments provided."""
+        return to_device(b, stream)
+
+    cpdef copy_to_host(self, ary=None, Stream stream=DEFAULT_STREAM):
+        """Copy from a ``DeviceBuffer`` to a buffer on host.
+
+        Parameters
+        ----------
+        ary : ``bytes``-like buffer to write into
+        stream : CUDA stream to use for copying, default the default stream
+
+        Examples
+        --------
+        >>> import rmm
+        >>> db = rmm.DeviceBuffer.to_device(b"abc")
+        >>> hb = bytearray(db.nbytes)
+        >>> db.copy_to_host(hb)
+        >>> print(hb)
+        bytearray(b'abc')
+        >>> hb = db.copy_to_host()
+        >>> print(hb)
+        bytearray(b'abc')
+        """
+        cdef const device_buffer* dbp = self.c_obj.get()
+        cdef size_t s = dbp.size()
+
+        cdef unsigned char[::1] hb = ary
+        if hb is None:
+            # NumPy leverages huge pages under-the-hood,
+            # which speeds up the copy from device to host.
+            hb = ary = np.empty((s,), dtype="u1")
+        elif len(hb) < s:
+            raise ValueError(
+                "Argument `ary` is too small. Need space for %i bytes." % s
+            )
+
+        copy_ptr_to_host(<uintptr_t>dbp.data(), hb[:s], stream)
+
+        return ary
+
+    cpdef copy_from_host(self, ary, Stream stream=DEFAULT_STREAM):
+        """Copy from a buffer on host to ``self``
+
+        Parameters
+        ----------
+        ary : ``bytes``-like buffer to copy from
+        stream : CUDA stream to use for copying, default the default stream
+
+        Examples
+        --------
+        >>> import rmm
+        >>> db = rmm.DeviceBuffer(size=10)
+        >>> hb = b"abcdef"
+        >>> db.copy_from_host(hb)
+        >>> hb = db.copy_to_host()
+        >>> print(hb)
+        array([97, 98, 99,  0,  0,  0,  0,  0,  0,  0], dtype=uint8)
+        """
+        cdef device_buffer* dbp = self.c_obj.get()
+
+        cdef const unsigned char[::1] hb = ary
+        cdef size_t s = len(hb)
+        if s > self.size:
+            raise ValueError(
+                "Argument `ary` is too large. Need space for %i bytes." % s
+            )
+
+        copy_host_to_ptr(hb[:s], <uintptr_t>dbp.data(), stream)
+
+    cpdef copy_from_device(self, cuda_ary,
+                           Stream stream=DEFAULT_STREAM):
+        """Copy from a buffer on host to ``self``
+
+        Parameters
+        ----------
+        cuda_ary : object to copy from that has ``__cuda_array_interface__``
+        stream : CUDA stream to use for copying, default the default stream
+
+        Examples
+        --------
+        >>> import rmm
+        >>> db = rmm.DeviceBuffer(size=5)
+        >>> db2 = rmm.DeviceBuffer.to_device(b"abc")
+        >>> db.copy_from_device(db2)
+        >>> hb = db.copy_to_host()
+        >>> print(hb)
+        array([97, 98, 99,  0,  0], dtype=uint8)
+        """
+        if not hasattr(cuda_ary, "__cuda_array_interface__"):
+            raise ValueError(
+                "Expected object to support `__cuda_array_interface__` "
+                "protocol"
+            )
+
+        cuda_ary_interface = cuda_ary.__cuda_array_interface__
+        shape = cuda_ary_interface["shape"]
+        strides = cuda_ary_interface.get("strides")
+        dtype = np.dtype(cuda_ary_interface["typestr"])
+
+        if len(shape) > 1:
+            raise ValueError(
+                "Only 1-D contiguous arrays are supported, got {}-D "
+                "array".format(str(len(shape)))
+            )
+
+        if strides is not None:
+            if strides[0] != dtype.itemsize:
+                raise ValueError(
+                    "Only 1-D contiguous arrays are supported, got a "
+                    "non-contiguous array"
+                )
+
+        cdef uintptr_t src_ptr = cuda_ary_interface["data"][0]
+        cdef size_t s = shape[0] * dtype.itemsize
+        if s > self.size:
+            raise ValueError(
+                "Argument `hb` is too large. Need space for %i bytes." % s
+            )
+
+        cdef device_buffer* dbp = self.c_obj.get()
+
+        copy_device_to_ptr(
+            <uintptr_t>src_ptr,
+            <uintptr_t>dbp.data(),
+            s,
+            stream
+        )
+
+    cpdef bytes tobytes(self, Stream stream=DEFAULT_STREAM):
+        cdef const device_buffer* dbp = self.c_obj.get()
+        cdef size_t s = dbp.size()
+
+        cdef bytes b = PyBytes_FromStringAndSize(NULL, s)
+        cdef unsigned char* p = b
+        cdef unsigned char[::1] mv = (<unsigned char[:(s + 1):1]>p)[:s]
+        self.copy_to_host(mv, stream)
+
+        return b
+
+    cdef size_t c_size(self) except *:
+        return self.c_obj.get()[0].size()
+
+    cpdef void reserve(self,
+                       size_t new_capacity,
+                       Stream stream=DEFAULT_STREAM) except *:
+        self.c_obj.get()[0].reserve(new_capacity, stream.view())
+
+    cpdef void resize(self,
+                      size_t new_size,
+                      Stream stream=DEFAULT_STREAM) except *:
+        self.c_obj.get()[0].resize(new_size, stream.view())
+
+    cpdef size_t capacity(self) except *:
+        return self.c_obj.get()[0].capacity()
+
+    cdef void* c_data(self) except *:
+        return self.c_obj.get()[0].data()
+
+    cdef device_buffer c_release(self) except *:
+        """
+        Releases ownership of the data held by this DeviceBuffer.
+        """
+        return move(cython.operator.dereference(self.c_obj))
+
+
+@cython.boundscheck(False)
+cpdef DeviceBuffer to_device(const unsigned char[::1] b,
+                             Stream stream=DEFAULT_STREAM):
+    """Return a new ``DeviceBuffer`` with a copy of the data.
+
+    Parameters
+    ----------
+    b : ``bytes``-like data on host to copy to device
+    stream : CUDA stream to use for copying, default the default stream
+
+    Returns
+    -------
+    ``DeviceBuffer`` with copy of data from host
+
+    Examples
+    --------
+    >>> import rmm
+    >>> db = rmm._lib.device_buffer.to_device(b"abc")
+    >>> print(bytes(db))
+    b'abc'
+    """
+
+    if b is None:
+        raise TypeError(
+            "Argument 'b' has incorrect type"
+            " (expected bytes-like, got NoneType)"
+        )
+
+    cdef uintptr_t p = <uintptr_t>&b[0]
+    cdef size_t s = len(b)
+    return DeviceBuffer(ptr=p, size=s, stream=stream)
+
+
+@cython.boundscheck(False)
+cdef void _copy_async(const void* src,
+                      void* dst,
+                      size_t count,
+                      ccudart.cudaMemcpyKind kind,
+                      cuda_stream_view stream) except * nogil:
+    """
+    Asynchronously copy data between host and/or device pointers.
+
+    This is a convenience wrapper around cudaMemcpyAsync that
+    checks for errors. Only used for internal implementation.
+
+    Parameters
+    ----------
+    src : pointer to ``bytes``-like host buffer or device data to copy from
+    dst : pointer to ``bytes``-like host buffer or device data to copy into
+    count : the size in bytes to copy
+    kind : the kind of copy to perform
+    stream : CUDA stream to use for copying, default the default stream
+    """
+    cdef cudaError_t err = cudaMemcpyAsync(dst, src, count, kind,
+                                           <cudaStream_t>stream)
+
+    if err != cudaError.cudaSuccess:
+        raise RuntimeError(f"Memcpy failed with error: {err}")
+
+
+@cython.boundscheck(False)
+cpdef void copy_ptr_to_host(uintptr_t db,
+                            unsigned char[::1] hb,
+                            Stream stream=DEFAULT_STREAM) except *:
+    """Copy from a device pointer to a buffer on host
+
+    Parameters
+    ----------
+    db : pointer to data on device to copy
+    hb : ``bytes``-like buffer to write into
+    stream : CUDA stream to use for copying, default the default stream
+
+    Note
+    ----
+    If ``stream`` is the default stream, it is synchronized after the copy.
+    However if a non-default ``stream`` is provided, this function is fully
+    asynchronous.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> db = rmm.DeviceBuffer.to_device(b"abc")
+    >>> hb = bytearray(db.nbytes)
+    >>> rmm._lib.device_buffer.copy_ptr_to_host(db.ptr, hb)
+    >>> print(hb)
+    bytearray(b'abc')
+    """
+
+    if hb is None:
+        raise TypeError(
+            "Argument `hb` has incorrect type"
+            " (expected bytes-like, got NoneType)"
+        )
+
+    with nogil:
+        _copy_async(<const void*>db, <void*>&hb[0], len(hb),
+                    cudaMemcpyKind.cudaMemcpyDeviceToHost, stream.view())
+
+    if stream.c_is_default():
+        stream.c_synchronize()
+
+
+@cython.boundscheck(False)
+cpdef void copy_host_to_ptr(const unsigned char[::1] hb,
+                            uintptr_t db,
+                            Stream stream=DEFAULT_STREAM) except *:
+    """Copy from a host pointer to a device pointer
+
+    Parameters
+    ----------
+    hb : ``bytes``-like host buffer to copy
+    db : pointer to data on device to write into
+    stream : CUDA stream to use for copying, default the default stream
+
+    Note
+    ----
+    If ``stream`` is the default stream, it is synchronized after the copy.
+    However if a non-default ``stream`` is provided, this function is fully
+    asynchronous.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> db = rmm.DeviceBuffer(size=10)
+    >>> hb = b"abc"
+    >>> rmm._lib.device_buffer.copy_host_to_ptr(hb, db.ptr)
+    >>> hb = db.copy_to_host()
+    >>> print(hb)
+    array([97, 98, 99,  0,  0,  0,  0,  0,  0,  0], dtype=uint8)
+    """
+
+    if hb is None:
+        raise TypeError(
+            "Argument `hb` has incorrect type"
+            " (expected bytes-like, got NoneType)"
+        )
+
+    with nogil:
+        _copy_async(<const void*>&hb[0], <void*>db, len(hb),
+                    cudaMemcpyKind.cudaMemcpyHostToDevice, stream.view())
+
+    if stream.c_is_default():
+        stream.c_synchronize()
+
+
+@cython.boundscheck(False)
+cpdef void copy_device_to_ptr(uintptr_t d_src,
+                              uintptr_t d_dst,
+                              size_t count,
+                              Stream stream=DEFAULT_STREAM) except *:
+    """Copy from a device pointer to a device pointer
+
+    Parameters
+    ----------
+    d_src : pointer to data on device to copy from
+    d_dst : pointer to data on device to write into
+    count : the size in bytes to copy
+    stream : CUDA stream to use for copying, default the default stream
+
+    Examples
+    --------
+    >>> import rmm
+    >>> db = rmm.DeviceBuffer(size=5)
+    >>> db2 = rmm.DeviceBuffer.to_device(b"abc")
+    >>> rmm._lib.device_buffer.copy_device_to_ptr(db2.ptr, db.ptr, db2.size)
+    >>> hb = db.copy_to_host()
+    >>> hb
+    array([97, 98, 99,  0,  0], dtype=uint8)
+    """
+
+    with nogil:
+        _copy_async(<const void*>d_src, <void*>d_dst, count,
+                    cudaMemcpyKind.cudaMemcpyDeviceToDevice, stream.view())
diff --git a/python/rmm/rmm/python/helper.pxd b/python/rmm/rmm/python/helper.pxd
new file mode 100644
index 000000000..8ca151c00
--- /dev/null
+++ b/python/rmm/rmm/python/helper.pxd
@@ -0,0 +1,16 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+cdef object parse_bytes(object s) except *
diff --git a/python/rmm/rmm/python/helper.pyx b/python/rmm/rmm/python/helper.pyx
new file mode 100644
index 000000000..d442ee341
--- /dev/null
+++ b/python/rmm/rmm/python/helper.pyx
@@ -0,0 +1,78 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Helper functions for rmm"""
+
+import re
+
+
+cdef dict BYTE_SIZES = {
+    'b': 1,
+    '': 1,
+    'kb': 1000,
+    'mb': 1000**2,
+    'gb': 1000**3,
+    'tb': 1000**4,
+    'pb': 1000**5,
+    'kib': 1024,
+    'mib': 1024**2,
+    'gib': 1024**3,
+    'tib': 1024**4,
+    'pib': 1024**5,
+}
+
+
+pattern = re.compile(r"^([0-9]+(?:\.[0-9]*)?)[\t ]*((?i:(?:[kmgtp]i?)?b))?$")
+
+cdef object parse_bytes(object s):
+    """Parse a string or integer into a number of bytes.
+
+    Parameters
+    ----------
+    s : int | str
+        Size in bytes. If an integer is provided, it is returned as-is.
+        A string is parsed as a floating point number with an (optional,
+        case-insensitive) byte-specifier, both SI prefixes (kb, mb, ..., pb)
+        and binary prefixes (kib, mib, ..., pib) are supported.
+
+     Returns
+     -------
+     Requested size in bytes as an integer.
+
+     Raises
+     ------
+     ValueError
+         If it is not possible to parse the input as a byte specification.
+    """
+    cdef str suffix
+    cdef double n
+    cdef int multiplier
+
+    if isinstance(s, int):
+        return s
+
+    match = pattern.match(s)
+
+    if match is None:
+        raise ValueError(f"Could not parse {s} as a byte specification")
+
+    n = float(match.group(1))
+
+    suffix = match.group(2)
+    if suffix is None:
+        suffix = ""
+
+    multiplier = BYTE_SIZES[suffix.lower()]
+
+    return int(n*multiplier)
diff --git a/python/rmm/rmm/python/lib.pxd b/python/rmm/rmm/python/lib.pxd
new file mode 100644
index 000000000..b61e0d569
--- /dev/null
+++ b/python/rmm/rmm/python/lib.pxd
@@ -0,0 +1,17 @@
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from libcpp.utility cimport pair
+
+ctypedef pair[const char*, unsigned int] caller_pair
diff --git a/python/rmm/rmm/python/lib.pyx b/python/rmm/rmm/python/lib.pyx
new file mode 100644
index 000000000..46753baa3
--- /dev/null
+++ b/python/rmm/rmm/python/lib.pyx
@@ -0,0 +1,13 @@
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/rmm/rmm/python/logger.pyx b/python/rmm/rmm/python/logger.pyx
new file mode 100644
index 000000000..2997ff633
--- /dev/null
+++ b/python/rmm/rmm/python/logger.pyx
@@ -0,0 +1,208 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+from rmm.cpp.cpp_logging cimport logging_level, logger
+
+
+def _validate_level_type(level):
+    if not isinstance(level, logging_level):
+        raise TypeError("level must be an instance of the logging_level enum")
+
+
+def should_log(level):
+    """
+    Check if a message at the given level would be logged.
+
+    A message at the given level would be logged if the current debug logging
+    level is set to a level that is at least as verbose than the given level,
+    *and* the RMM module is compiled for a logging level at least as verbose.
+    If these conditions are not both met, this function will return false.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    Parameters
+    ----------
+    level : logging_level
+        The debug logging level. Valid values are instances of the
+        ``logging_level`` enum.
+
+    Returns
+    -------
+    should_log : bool
+        True if a message at the given level would be logged, False otherwise.
+
+    Raises
+    ------
+    TypeError
+        If the logging level is not an instance of the ``logging_level`` enum.
+    """
+    _validate_level_type(level)
+    return logger().should_log(level)
+
+
+def set_logging_level(level):
+    """
+    Set the debug logging level.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    Parameters
+    ----------
+    level : logging_level
+        The debug logging level. Valid values are instances of the
+        ``logging_level`` enum.
+
+    Raises
+    ------
+    TypeError
+        If the logging level is not an instance of the ``logging_level`` enum.
+
+    See Also
+    --------
+    get_logging_level : Get the current debug logging level.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> rmm.set_logging_level(rmm.logging_level.WARN) # set logging level to warn
+    """
+    _validate_level_type(level)
+    logger().set_level(level)
+
+    if not should_log(level):
+        warnings.warn(f"RMM will not log logging_level.{level.name}. This "
+                      "may be because the C++ library is compiled for a "
+                      "less-verbose logging level.")
+
+
+def get_logging_level():
+    """
+    Get the current debug logging level.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    Returns
+    -------
+    level : logging_level
+        The current debug logging level, an instance of the ``logging_level``
+        enum.
+
+    See Also
+    --------
+    set_logging_level : Set the debug logging level.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> rmm.get_logging_level() # get current logging level
+    <logging_level.INFO: 2>
+    """
+    return logging_level(logger().level())
+
+
+def flush_logger():
+    """
+    Flush the debug logger. This will cause any buffered log messages to
+    be written to the log file.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    See Also
+    --------
+    set_flush_level : Set the flush level for the debug logger.
+    get_flush_level : Get the current debug logging flush level.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> rmm.flush_logger() # flush the logger
+    """
+    logger().flush()
+
+
+def set_flush_level(level):
+    """
+    Set the flush level for the debug logger. Messages of this level or higher
+    will automatically flush to the file.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    Parameters
+    ----------
+    level : logging_level
+        The debug logging level. Valid values are instances of the
+        ``logging_level`` enum.
+
+    Raises
+    ------
+    TypeError
+        If the logging level is not an instance of the ``logging_level`` enum.
+
+    See Also
+    --------
+    get_flush_level : Get the current debug logging flush level.
+    flush_logger : Flush the logger.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> rmm.flush_on(rmm.logging_level.WARN) # set flush level to warn
+    """
+    _validate_level_type(level)
+    logger().flush_on(level)
+
+    if not should_log(level):
+        warnings.warn(f"RMM will not log logging_level.{level.name}. This "
+                      "may be because the C++ library is compiled for a "
+                      "less-verbose logging level.")
+
+
+def get_flush_level():
+    """
+    Get the current debug logging flush level for the RMM logger. Messages of
+    this level or higher will automatically flush to the file.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    Returns
+    -------
+    logging_level
+        The current flush level, an instance of the ``logging_level``
+        enum.
+
+    See Also
+    --------
+    set_flush_level : Set the flush level for the logger.
+    flush_logger : Flush the logger.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> rmm.flush_level() # get current flush level
+    <logging_level.INFO: 2>
+    """
+    return logging_level(logger().flush_level())
diff --git a/python/rmm/rmm/python/memory_resource.pxd b/python/rmm/rmm/python/memory_resource.pxd
new file mode 100644
index 000000000..0adda4eaf
--- /dev/null
+++ b/python/rmm/rmm/python/memory_resource.pxd
@@ -0,0 +1,83 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from libcpp.memory cimport shared_ptr
+
+from rmm.cpp.cpp_memory_resource cimport device_memory_resource
+
+
+cdef class DeviceMemoryResource:
+    cdef shared_ptr[device_memory_resource] c_obj
+    cdef device_memory_resource* get_mr(self) noexcept nogil
+
+cdef class UpstreamResourceAdaptor(DeviceMemoryResource):
+    cdef readonly DeviceMemoryResource upstream_mr
+
+    cpdef DeviceMemoryResource get_upstream(self)
+
+cdef class CudaMemoryResource(DeviceMemoryResource):
+    pass
+
+cdef class ManagedMemoryResource(DeviceMemoryResource):
+    pass
+
+cdef class SystemMemoryResource(DeviceMemoryResource):
+    pass
+
+cdef class SamHeadroomMemoryResource(DeviceMemoryResource):
+    pass
+
+cdef class CudaAsyncMemoryResource(DeviceMemoryResource):
+    pass
+
+cdef class PoolMemoryResource(UpstreamResourceAdaptor):
+    pass
+
+cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor):
+    pass
+
+cdef class BinningMemoryResource(UpstreamResourceAdaptor):
+
+    cdef readonly list _bin_mrs
+
+    cpdef add_bin(
+        self,
+        size_t allocation_size,
+        DeviceMemoryResource bin_resource=*)
+
+cdef class CallbackMemoryResource(DeviceMemoryResource):
+    cdef object _allocate_func
+    cdef object _deallocate_func
+
+cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor):
+    pass
+
+cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor):
+    cdef object _log_file_name
+    cpdef get_file_name(self)
+    cpdef flush(self)
+
+cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor):
+    pass
+
+cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor):
+    pass
+
+cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor):
+    cdef object _callback
+
+cdef class PrefetchResourceAdaptor(UpstreamResourceAdaptor):
+    pass
+
+cpdef DeviceMemoryResource get_current_device_resource()
diff --git a/python/rmm/rmm/python/memory_resource.pyx b/python/rmm/rmm/python/memory_resource.pyx
new file mode 100644
index 000000000..46613667c
--- /dev/null
+++ b/python/rmm/rmm/python/memory_resource.pyx
@@ -0,0 +1,1170 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import warnings
+# This import is needed for Cython typing in translate_python_except_to_cpp
+# See https://github.com/cython/cython/issues/5589
+from builtins import BaseException
+from collections import defaultdict
+
+cimport cython
+from cython.operator cimport dereference as deref
+from libc.stddef cimport size_t
+from libc.stdint cimport int8_t, uintptr_t
+from libcpp cimport bool
+from libcpp.memory cimport make_unique, unique_ptr
+from libcpp.optional cimport optional
+from libcpp.pair cimport pair
+
+from cuda.cudart import cudaError_t
+
+from rmm._cuda.gpu import CUDARuntimeError, getDevice, setDevice
+
+from rmm._cuda.stream cimport Stream
+
+from rmm._cuda.stream import DEFAULT_STREAM
+
+from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
+from rmm.cpp.cpp_per_device_resource cimport (
+    cuda_device_id,
+    set_per_device_resource as cpp_set_per_device_resource,
+)
+from rmm.python.helper cimport parse_bytes
+from rmm.python.memory_resource cimport (
+    available_device_memory as c_available_device_memory,
+    percent_of_free_device_memory as c_percent_of_free_device_memory,
+)
+
+from rmm.statistics import Statistics
+
+from rmm.cpp.cpp_memory_resource cimport (
+    CppExcept,
+    allocate_callback_t,
+    allocation_handle_type,
+    binning_memory_resource,
+    callback_memory_resource,
+    cuda_async_memory_resource,
+    cuda_memory_resource,
+    deallocate_callback_t,
+    device_memory_resource,
+    failure_callback_resource_adaptor,
+    failure_callback_t,
+    fixed_size_memory_resource,
+    limiting_resource_adaptor,
+    logging_resource_adaptor,
+    managed_memory_resource,
+    pool_memory_resource,
+    posix_file_descriptor,
+    prefetch_resource_adaptor,
+    sam_headroom_memory_resource,
+    statistics_resource_adaptor,
+    system_memory_resource,
+    throw_cpp_except,
+    tracking_resource_adaptor,
+    translate_python_except_to_cpp,
+)
+
+
+cdef class DeviceMemoryResource:
+
+    cdef device_memory_resource* get_mr(self) noexcept nogil:
+        """Get the underlying C++ memory resource object."""
+        return self.c_obj.get()
+
+    def allocate(self, size_t nbytes, Stream stream=DEFAULT_STREAM):
+        """Allocate ``nbytes`` bytes of memory.
+
+        Parameters
+        ----------
+        nbytes : size_t
+            The size of the allocation in bytes
+        stream : Stream
+            Optional stream for the allocation
+        """
+        return <uintptr_t>self.c_obj.get().allocate(nbytes, stream.view())
+
+    def deallocate(self, uintptr_t ptr, size_t nbytes, Stream stream=DEFAULT_STREAM):
+        """Deallocate memory pointed to by ``ptr`` of size ``nbytes``.
+
+        Parameters
+        ----------
+        ptr : uintptr_t
+            Pointer to be deallocated
+        nbytes : size_t
+            Size of the allocation in bytes
+        stream : Stream
+            Optional stream for the deallocation
+        """
+        self.c_obj.get().deallocate(<void*>(ptr), nbytes, stream.view())
+
+
+# See the note about `no_gc_clear` in `device_buffer.pyx`.
+@cython.no_gc_clear
+cdef class UpstreamResourceAdaptor(DeviceMemoryResource):
+    """Parent class for all memory resources that track an upstream.
+
+    Upstream resource tracking requires maintaining a reference to the upstream
+    mr so that it is kept alive and may be accessed by any downstream resource
+    adaptors.
+    """
+
+    def __cinit__(self, DeviceMemoryResource upstream_mr, *args, **kwargs):
+
+        if (upstream_mr is None):
+            raise Exception("Argument `upstream_mr` must not be None")
+
+        self.upstream_mr = upstream_mr
+
+    def __dealloc__(self):
+        # Must cleanup the base MR before any upstream MR
+        self.c_obj.reset()
+
+    cpdef DeviceMemoryResource get_upstream(self):
+        return self.upstream_mr
+
+
+cdef class CudaMemoryResource(DeviceMemoryResource):
+    def __cinit__(self):
+        self.c_obj.reset(
+            new cuda_memory_resource()
+        )
+
+    def __init__(self):
+        """
+        Memory resource that uses ``cudaMalloc``/``cudaFree`` for
+        allocation/deallocation.
+        """
+        pass
+
+
+cdef class CudaAsyncMemoryResource(DeviceMemoryResource):
+    """
+    Memory resource that uses ``cudaMallocAsync``/``cudaFreeAsync`` for
+    allocation/deallocation.
+
+    Parameters
+    ----------
+    initial_pool_size : int | str, optional
+        Initial pool size in bytes. By default, half the available memory
+        on the device is used. A string argument is parsed using `parse_bytes`.
+    release_threshold: int, optional
+        Release threshold in bytes. If the pool size grows beyond this
+        value, unused memory held by the pool will be released at the
+        next synchronization point.
+    enable_ipc: bool, optional
+        If True, enables export of POSIX file descriptor handles for the memory
+        allocated by this resource so that it can be used with CUDA IPC.
+    """
+    def __cinit__(
+        self,
+        initial_pool_size=None,
+        release_threshold=None,
+        enable_ipc=False
+    ):
+        cdef optional[size_t] c_initial_pool_size = (
+            optional[size_t]()
+            if initial_pool_size is None
+            else optional[size_t](<size_t> parse_bytes(initial_pool_size))
+        )
+
+        cdef optional[size_t] c_release_threshold = (
+            optional[size_t]()
+            if release_threshold is None
+            else optional[size_t](<size_t> release_threshold)
+        )
+
+        # If IPC memory handles are not supported, the constructor below will
+        # raise an error from C++.
+        cdef optional[allocation_handle_type] c_export_handle_type = (
+            optional[allocation_handle_type](
+                posix_file_descriptor
+            )
+            if enable_ipc
+            else optional[allocation_handle_type]()
+        )
+
+        self.c_obj.reset(
+            new cuda_async_memory_resource(
+                c_initial_pool_size,
+                c_release_threshold,
+                c_export_handle_type
+            )
+        )
+
+
+cdef class ManagedMemoryResource(DeviceMemoryResource):
+    def __cinit__(self):
+        self.c_obj.reset(
+            new managed_memory_resource()
+        )
+
+    def __init__(self):
+        """
+        Memory resource that uses ``cudaMallocManaged``/``cudaFree`` for
+        allocation/deallocation.
+        """
+        pass
+
+
+cdef class SystemMemoryResource(DeviceMemoryResource):
+    def __cinit__(self):
+        self.c_obj.reset(
+            new system_memory_resource()
+        )
+
+    def __init__(self):
+        """
+        Memory resource that uses ``malloc``/``free`` for
+        allocation/deallocation.
+        """
+        pass
+
+
+cdef class SamHeadroomMemoryResource(DeviceMemoryResource):
+    def __cinit__(
+        self,
+        size_t headroom
+    ):
+        self.c_obj.reset(new sam_headroom_memory_resource(headroom))
+
+    def __init__(
+        self,
+        size_t headroom
+    ):
+        """
+        Memory resource that uses ``malloc``/``free`` for
+        allocation/deallocation.
+
+        Parameters
+        ----------
+        headroom : size_t
+            Size of the reserved GPU memory as headroom
+        """
+        pass
+
+
+cdef class PoolMemoryResource(UpstreamResourceAdaptor):
+
+    def __cinit__(
+            self,
+            DeviceMemoryResource upstream_mr,
+            initial_pool_size=None,
+            maximum_pool_size=None
+    ):
+        cdef size_t c_initial_pool_size
+        cdef optional[size_t] c_maximum_pool_size
+        c_initial_pool_size = (
+            c_percent_of_free_device_memory(50) if
+            initial_pool_size is None
+            else parse_bytes(initial_pool_size)
+        )
+        c_maximum_pool_size = (
+            optional[size_t]() if
+            maximum_pool_size is None
+            else optional[size_t](<size_t> parse_bytes(maximum_pool_size))
+        )
+        self.c_obj.reset(
+            new pool_memory_resource[device_memory_resource](
+                upstream_mr.get_mr(),
+                c_initial_pool_size,
+                c_maximum_pool_size
+            )
+        )
+
+    def __init__(
+            self,
+            DeviceMemoryResource upstream_mr,
+            object initial_pool_size=None,
+            object maximum_pool_size=None
+    ):
+        """
+        Coalescing best-fit suballocator which uses a pool of memory allocated
+        from an upstream memory resource.
+
+        Parameters
+        ----------
+        upstream_mr : DeviceMemoryResource
+            The DeviceMemoryResource from which to allocate blocks for the
+            pool.
+        initial_pool_size : int | str, optional
+            Initial pool size in bytes. By default, half the available memory
+            on the device is used.
+        maximum_pool_size : int | str, optional
+            Maximum size in bytes, that the pool can grow to.
+        """
+        pass
+
+    def pool_size(self):
+        cdef pool_memory_resource[device_memory_resource]* c_mr = (
+            <pool_memory_resource[device_memory_resource]*>(self.get_mr())
+        )
+        return c_mr.pool_size()
+
+cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor):
+    def __cinit__(
+            self,
+            DeviceMemoryResource upstream_mr,
+            size_t block_size=1<<20,
+            size_t blocks_to_preallocate=128
+    ):
+        self.c_obj.reset(
+            new fixed_size_memory_resource[device_memory_resource](
+                upstream_mr.get_mr(),
+                block_size,
+                blocks_to_preallocate
+            )
+        )
+
+    def __init__(
+            self,
+            DeviceMemoryResource upstream_mr,
+            size_t block_size=1<<20,
+            size_t blocks_to_preallocate=128
+    ):
+        """
+        Memory resource which allocates memory blocks of a single fixed size.
+
+        Parameters
+        ----------
+        upstream_mr : DeviceMemoryResource
+            The DeviceMemoryResource from which to allocate blocks for the
+            pool.
+        block_size : int, optional
+            The size of blocks to allocate (default is 1MiB).
+        blocks_to_preallocate : int, optional
+            The number of blocks to allocate to initialize the pool.
+
+        Notes
+        -----
+        Supports only allocations of size smaller than the configured
+        block_size.
+        """
+        pass
+
+
+cdef class BinningMemoryResource(UpstreamResourceAdaptor):
+    def __cinit__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        int8_t min_size_exponent=-1,
+        int8_t max_size_exponent=-1,
+    ):
+
+        self._bin_mrs = []
+
+        if (min_size_exponent == -1 or max_size_exponent == -1):
+            self.c_obj.reset(
+                new binning_memory_resource[device_memory_resource](
+                    upstream_mr.get_mr()
+                )
+            )
+        else:
+            self.c_obj.reset(
+                new binning_memory_resource[device_memory_resource](
+                    upstream_mr.get_mr(),
+                    min_size_exponent,
+                    max_size_exponent
+                )
+            )
+
+    def __dealloc__(self):
+
+        # Must cleanup the base MR before any upstream or referenced Bins
+        self.c_obj.reset()
+
+    def __init__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        int8_t min_size_exponent=-1,
+        int8_t max_size_exponent=-1,
+    ):
+        """
+        Allocates memory from a set of specified "bin" sizes based on a
+        specified allocation size.
+
+        If min_size_exponent and max_size_exponent are specified, initializes
+        with one or more FixedSizeMemoryResource bins in the range
+        ``[2**min_size_exponent, 2**max_size_exponent]``.
+
+        Call :py:meth:`~.add_bin` to add additional bin allocators.
+
+        Parameters
+        ----------
+        upstream_mr : DeviceMemoryResource
+            The memory resource to use for allocations larger than any of the
+            bins.
+        min_size_exponent : size_t
+            The base-2 exponent of the minimum size FixedSizeMemoryResource
+            bin to create.
+        max_size_exponent : size_t
+            The base-2 exponent of the maximum size FixedSizeMemoryResource
+            bin to create.
+        """
+        pass
+
+    cpdef add_bin(
+        self,
+        size_t allocation_size,
+        DeviceMemoryResource bin_resource=None
+    ):
+        """
+        Adds a bin of the specified maximum allocation size to this memory
+        resource. If specified, uses bin_resource for allocation for this bin.
+        If not specified, creates and uses a FixedSizeMemoryResource for
+        allocation for this bin.
+
+        Allocations smaller than allocation_size and larger than the next
+        smaller bin size will use this fixed-size memory resource.
+
+        Parameters
+        ----------
+        allocation_size : size_t
+            The maximum allocation size in bytes for the created bin
+        bin_resource : DeviceMemoryResource
+            The resource to use for this bin (optional)
+        """
+        if bin_resource is None:
+            (<binning_memory_resource[device_memory_resource]*>(
+                self.c_obj.get()))[0].add_bin(allocation_size)
+        else:
+            # Save the ref to the new bin resource to ensure its lifetime
+            self._bin_mrs.append(bin_resource)
+
+            (<binning_memory_resource[device_memory_resource]*>(
+                self.c_obj.get()))[0].add_bin(
+                    allocation_size,
+                    bin_resource.get_mr())
+
+    @property
+    def bin_mrs(self) -> list:
+        """Get the list of binned memory resources."""
+        return self._bin_mrs
+
+
+cdef void* _allocate_callback_wrapper(
+    size_t nbytes,
+    cuda_stream_view stream,
+    void* ctx
+    # Note that this function is specifically designed to rethrow Python
+    # exceptions as C++ exceptions when called as a callback from C++, so it is
+    # noexcept from Cython's perspective.
+) noexcept nogil:
+    cdef CppExcept err
+    with gil:
+        try:
+            return <void*><uintptr_t>((<object>ctx)(
+                nbytes,
+                Stream._from_cudaStream_t(stream.value())
+            ))
+        except BaseException as e:
+            err = translate_python_except_to_cpp(e)
+    throw_cpp_except(err)
+
+cdef void _deallocate_callback_wrapper(
+    void* ptr,
+    size_t nbytes,
+    cuda_stream_view stream,
+    void* ctx
+) except * with gil:
+    (<object>ctx)(<uintptr_t>(ptr), nbytes, Stream._from_cudaStream_t(stream.value()))
+
+
+cdef class CallbackMemoryResource(DeviceMemoryResource):
+    """
+    A memory resource that uses the user-provided callables to do
+    memory allocation and deallocation.
+
+    ``CallbackMemoryResource`` should really only be used for
+    debugging memory issues, as there is a significant performance
+    penalty associated with using a Python function for each memory
+    allocation and deallocation.
+
+    Parameters
+    ----------
+    allocate_func: callable
+        The allocation function must accept two arguments. An integer
+        representing the number of bytes to allocate and a Stream on
+        which to perform the allocation, and return an integer
+        representing the pointer to the allocated memory.
+    deallocate_func: callable
+        The deallocation function must accept three arguments. an integer
+        representing the pointer to the memory to free, a second
+        integer representing the number of bytes to free, and a Stream
+        on which to perform the deallocation.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> base_mr = rmm.mr.CudaMemoryResource()
+    >>> def allocate_func(size, stream):
+    ...     print(f"Allocating {size} bytes")
+    ...     return base_mr.allocate(size, stream)
+    ...
+    >>> def deallocate_func(ptr, size, stream):
+    ...     print(f"Deallocating {size} bytes")
+    ...     return base_mr.deallocate(ptr, size, stream)
+    ...
+    >>> rmm.mr.set_current_device_resource(
+        rmm.mr.CallbackMemoryResource(allocate_func, deallocate_func)
+    )
+    >>> dbuf = rmm.DeviceBuffer(size=256)
+    Allocating 256 bytes
+    >>> del dbuf
+    Deallocating 256 bytes
+    """
+    def __init__(
+        self,
+        allocate_func,
+        deallocate_func,
+    ):
+        self._allocate_func = allocate_func
+        self._deallocate_func = deallocate_func
+        self.c_obj.reset(
+            new callback_memory_resource(
+                <allocate_callback_t>(_allocate_callback_wrapper),
+                <deallocate_callback_t>(_deallocate_callback_wrapper),
+                <void*>(allocate_func),
+                <void*>(deallocate_func)
+            )
+        )
+
+
+def _append_id(filename, id):
+    """
+    Append ".dev<ID>" onto a filename before the extension
+
+    Example: _append_id("hello.txt", 1) returns "hello.dev1.txt"
+
+    Parameters
+    ----------
+    filename : string
+        The filename, possibly with extension
+    id : int
+        The ID to append
+    """
+    name, ext = os.path.splitext(filename)
+    return f"{name}.dev{id}{ext}"
+
+
+cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor):
+
+    def __cinit__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        size_t allocation_limit
+    ):
+        self.c_obj.reset(
+            new limiting_resource_adaptor[device_memory_resource](
+                upstream_mr.get_mr(),
+                allocation_limit
+            )
+        )
+
+    def __init__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        size_t allocation_limit
+    ):
+        """
+        Memory resource that limits the total allocation amount possible
+        performed by an upstream memory resource.
+
+        Parameters
+        ----------
+        upstream_mr : DeviceMemoryResource
+            The upstream memory resource.
+        allocation_limit : size_t
+            Maximum memory allowed for this allocator.
+        """
+        pass
+
+    def get_allocated_bytes(self) -> size_t:
+        """
+        Query the number of bytes that have been allocated. Note that this can
+        not be used to know how large of an allocation is possible due to both
+        possible fragmentation and also internal page sizes and alignment that
+        is not tracked by this allocator.
+        """
+        return (<limiting_resource_adaptor[device_memory_resource]*>(
+            self.c_obj.get())
+        )[0].get_allocated_bytes()
+
+    def get_allocation_limit(self) -> size_t:
+        """
+        Query the maximum number of bytes that this allocator is allowed to
+        allocate. This is the limit on the allocator and not a representation
+        of the underlying device. The device may not be able to support this
+        limit.
+        """
+        return (<limiting_resource_adaptor[device_memory_resource]*>(
+            self.c_obj.get())
+        )[0].get_allocation_limit()
+
+
+cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor):
+    def __cinit__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        object log_file_name=None
+    ):
+        if log_file_name is None:
+            log_file_name = os.getenv("RMM_LOG_FILE")
+            if not log_file_name:
+                raise ValueError(
+                    "RMM log file must be specified either using "
+                    "log_file_name= argument or RMM_LOG_FILE "
+                    "environment variable"
+                )
+
+        # Append the device ID before the file extension
+        log_file_name = _append_id(
+            log_file_name, getDevice()
+        )
+        log_file_name = os.path.abspath(log_file_name)
+        self._log_file_name = log_file_name
+
+        self.c_obj.reset(
+            new logging_resource_adaptor[device_memory_resource](
+                upstream_mr.get_mr(),
+                log_file_name.encode()
+            )
+        )
+
+    def __init__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        object log_file_name=None
+    ):
+        """
+        Memory resource that logs information about allocations/deallocations
+        performed by an upstream memory resource.
+
+        Parameters
+        ----------
+        upstream : DeviceMemoryResource
+            The upstream memory resource.
+        log_file_name : str
+            Path to the file to which logs are written.
+        """
+        pass
+
+    cpdef flush(self):
+        (<logging_resource_adaptor[device_memory_resource]*>(
+            self.get_mr()))[0].flush()
+
+    cpdef get_file_name(self):
+        return self._log_file_name
+
+    def __dealloc__(self):
+        self.c_obj.reset()
+
+cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor):
+
+    def __cinit__(
+        self,
+        DeviceMemoryResource upstream_mr
+    ):
+        self.c_obj.reset(
+            new statistics_resource_adaptor[device_memory_resource](
+                upstream_mr.get_mr()
+            )
+        )
+
+    def __init__(
+        self,
+        DeviceMemoryResource upstream_mr
+    ):
+        """
+        Memory resource that tracks the current, peak and total
+        allocations/deallocations performed by an upstream memory resource.
+        Includes the ability to query these statistics at any time.
+
+        A stack of counters is maintained. Use :meth:`push_counters` and
+        :meth:`pop_counters` to track statistics at different nesting levels.
+
+        Parameters
+        ----------
+        upstream : DeviceMemoryResource
+            The upstream memory resource.
+        """
+        pass
+
+    @property
+    def allocation_counts(self) -> Statistics:
+        """
+        Gets the current, peak, and total allocated bytes and number of
+        allocations.
+
+        The dictionary keys are ``current_bytes``, ``current_count``,
+        ``peak_bytes``, ``peak_count``, ``total_bytes``, and ``total_count``.
+
+        Returns:
+            dict: Dictionary containing allocation counts and bytes.
+        """
+        cdef statistics_resource_adaptor[device_memory_resource]* mr = \
+            <statistics_resource_adaptor[device_memory_resource]*> self.c_obj.get()
+
+        counts = deref(mr).get_allocations_counter()
+        byte_counts = deref(mr).get_bytes_counter()
+        return Statistics(
+            current_bytes=byte_counts.value,
+            current_count=counts.value,
+            peak_bytes=byte_counts.peak,
+            peak_count=counts.peak,
+            total_bytes=byte_counts.total,
+            total_count=counts.total,
+        )
+
+    def pop_counters(self) -> Statistics:
+        """
+        Pop a counter pair (bytes and allocations) from the stack
+
+        Returns
+        -------
+        The popped statistics
+        """
+        cdef statistics_resource_adaptor[device_memory_resource]* mr = \
+            <statistics_resource_adaptor[device_memory_resource]*> self.c_obj.get()
+
+        bytes_and_allocs = deref(mr).pop_counters()
+        return Statistics(
+            current_bytes=bytes_and_allocs.first.value,
+            current_count=bytes_and_allocs.second.value,
+            peak_bytes=bytes_and_allocs.first.peak,
+            peak_count=bytes_and_allocs.second.peak,
+            total_bytes=bytes_and_allocs.first.total,
+            total_count=bytes_and_allocs.second.total,
+        )
+
+    def push_counters(self) -> Statistics:
+        """
+        Push a new counter pair (bytes and allocations) on the stack
+
+        Returns
+        -------
+        The statistics _before_ the push
+        """
+
+        cdef statistics_resource_adaptor[device_memory_resource]* mr = \
+            <statistics_resource_adaptor[device_memory_resource]*> self.c_obj.get()
+
+        bytes_and_allocs = deref(mr).push_counters()
+        return Statistics(
+            current_bytes=bytes_and_allocs.first.value,
+            current_count=bytes_and_allocs.second.value,
+            peak_bytes=bytes_and_allocs.first.peak,
+            peak_count=bytes_and_allocs.second.peak,
+            total_bytes=bytes_and_allocs.first.total,
+            total_count=bytes_and_allocs.second.total,
+        )
+
+cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor):
+
+    def __cinit__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        bool capture_stacks=False
+    ):
+        self.c_obj.reset(
+            new tracking_resource_adaptor[device_memory_resource](
+                upstream_mr.get_mr(),
+                capture_stacks
+            )
+        )
+
+    def __init__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        bool capture_stacks=False
+    ):
+        """
+        Memory resource that logs tracks allocations/deallocations
+        performed by an upstream memory resource. Includes the ability to
+        query all outstanding allocations with the stack trace, if desired.
+
+        Parameters
+        ----------
+        upstream : DeviceMemoryResource
+            The upstream memory resource.
+        capture_stacks : bool
+            Whether or not to capture the stack trace with each allocation.
+        """
+        pass
+
+    def get_allocated_bytes(self) -> size_t:
+        """
+        Query the number of bytes that have been allocated. Note that this can
+        not be used to know how large of an allocation is possible due to both
+        possible fragmentation and also internal page sizes and alignment that
+        is not tracked by this allocator.
+        """
+        return (<tracking_resource_adaptor[device_memory_resource]*>(
+            self.c_obj.get())
+        )[0].get_allocated_bytes()
+
+    def get_outstanding_allocations_str(self) -> str:
+        """
+        Returns a string containing information about the current outstanding
+        allocations. For each allocation, the address, size and optional
+        stack trace are shown.
+        """
+
+        return (<tracking_resource_adaptor[device_memory_resource]*>(
+            self.c_obj.get())
+        )[0].get_outstanding_allocations_str().decode('UTF-8')
+
+    def log_outstanding_allocations(self):
+        """
+        Logs the output of `get_outstanding_allocations_str` to the current
+        RMM log file if enabled.
+        """
+
+        (<tracking_resource_adaptor[device_memory_resource]*>(
+            self.c_obj.get()))[0].log_outstanding_allocations()
+
+
+# Note that this function is specifically designed to rethrow Python exceptions
+# as C++ exceptions when called as a callback from C++, so it is noexcept from
+# Cython's perspective.
+cdef bool _oom_callback_function(size_t bytes, void *callback_arg) noexcept nogil:
+    cdef CppExcept err
+    with gil:
+        try:
+            return (<object>callback_arg)(bytes)
+        except BaseException as e:
+            err = translate_python_except_to_cpp(e)
+    throw_cpp_except(err)
+
+
+cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor):
+
+    def __cinit__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        object callback,
+    ):
+        self._callback = callback
+        self.c_obj.reset(
+            new failure_callback_resource_adaptor[device_memory_resource](
+                upstream_mr.get_mr(),
+                <failure_callback_t>_oom_callback_function,
+                <void*>callback
+            )
+        )
+
+    def __init__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        object callback,
+    ):
+        """
+        Memory resource that call callback when memory allocation fails.
+
+        Parameters
+        ----------
+        upstream : DeviceMemoryResource
+            The upstream memory resource.
+        callback : callable
+            Function called when memory allocation fails.
+        """
+        pass
+
+cdef class PrefetchResourceAdaptor(UpstreamResourceAdaptor):
+
+    def __cinit__(
+        self,
+        DeviceMemoryResource upstream_mr
+    ):
+        self.c_obj.reset(
+            new prefetch_resource_adaptor[device_memory_resource](
+                upstream_mr.get_mr()
+            )
+        )
+
+    def __init__(
+        self,
+        DeviceMemoryResource upstream_mr
+    ):
+        """
+        Memory resource that prefetches all allocations.
+
+        Parameters
+        ----------
+        upstream : DeviceMemoryResource
+            The upstream memory resource.
+        """
+        pass
+
+
+# Global per-device memory resources; dict of int:DeviceMemoryResource
+cdef _per_device_mrs = defaultdict(CudaMemoryResource)
+
+
+cpdef void _initialize(
+    bool pool_allocator=False,
+    bool managed_memory=False,
+    object initial_pool_size=None,
+    object maximum_pool_size=None,
+    object devices=0,
+    bool logging=False,
+    object log_file_name=None,
+) except *:
+    """
+    Initializes RMM library using the options passed
+    """
+    if managed_memory:
+        upstream = ManagedMemoryResource
+    else:
+        upstream = CudaMemoryResource
+
+    if pool_allocator:
+        typ = PoolMemoryResource
+        args = (upstream(),)
+        kwargs = dict(
+            initial_pool_size=None if initial_pool_size is None
+            else parse_bytes(initial_pool_size),
+            maximum_pool_size=None if maximum_pool_size is None
+            else parse_bytes(maximum_pool_size)
+        )
+    else:
+        typ = upstream
+        args = ()
+        kwargs = {}
+
+    cdef DeviceMemoryResource mr
+    cdef int original_device
+
+    # Save the current device so we can reset it
+    try:
+        original_device = getDevice()
+    except CUDARuntimeError as e:
+        if e.status == cudaError_t.cudaErrorNoDevice:
+            warnings.warn(e.msg)
+        else:
+            raise e
+    else:
+        # reset any previously specified per device resources
+        global _per_device_mrs
+        _per_device_mrs.clear()
+
+        if devices is None:
+            devices = [0]
+        elif isinstance(devices, int):
+            devices = [devices]
+
+        # create a memory resource per specified device
+        for device in devices:
+            setDevice(device)
+
+            if logging:
+                mr = LoggingResourceAdaptor(
+                    typ(*args, **kwargs),
+                    log_file_name
+                )
+            else:
+                mr = typ(*args, **kwargs)
+
+            set_per_device_resource(device, mr)
+
+        # reset CUDA device to original
+        setDevice(original_device)
+
+
+cpdef get_per_device_resource(int device):
+    """
+    Get the default memory resource for the specified device.
+
+    If the returned memory resource is used when a different device is the
+    active CUDA device, behavior is undefined.
+
+    Parameters
+    ----------
+    device : int
+        The ID of the device for which to get the memory resource.
+    """
+    global _per_device_mrs
+    return _per_device_mrs[device]
+
+
+cpdef set_per_device_resource(int device, DeviceMemoryResource mr):
+    """
+    Set the default memory resource for the specified device.
+
+    Parameters
+    ----------
+    device : int
+        The ID of the device for which to get the memory resource.
+    mr : DeviceMemoryResource
+        The memory resource to set.  Must have been created while device was
+        the active CUDA device.
+    """
+    global _per_device_mrs
+    _per_device_mrs[device] = mr
+
+    # Since cuda_device_id does not have a default constructor, it must be heap
+    # allocated
+    cdef unique_ptr[cuda_device_id] device_id = \
+        make_unique[cuda_device_id](device)
+
+    cpp_set_per_device_resource(deref(device_id), mr.get_mr())
+
+
+cpdef set_current_device_resource(DeviceMemoryResource mr):
+    """
+    Set the default memory resource for the current device.
+
+    Parameters
+    ----------
+    mr : DeviceMemoryResource
+        The memory resource to set. Must have been created while the current
+        device is the active CUDA device.
+    """
+    set_per_device_resource(getDevice(), mr)
+
+
+cpdef get_per_device_resource_type(int device):
+    """
+    Get the memory resource type used for RMM device allocations on the
+    specified device.
+
+    Parameters
+    ----------
+    device : int
+        The device ID
+    """
+    return type(get_per_device_resource(device))
+
+
+cpdef DeviceMemoryResource get_current_device_resource():
+    """
+    Get the memory resource used for RMM device allocations on the current
+    device.
+
+    If the returned memory resource is used when a different device is the
+    active CUDA device, behavior is undefined.
+    """
+    return get_per_device_resource(getDevice())
+
+
+cpdef get_current_device_resource_type():
+    """
+    Get the memory resource type used for RMM device allocations on the
+    current device.
+    """
+    return type(get_current_device_resource())
+
+
+cpdef is_initialized():
+    """
+    Check whether RMM is initialized
+    """
+    global _per_device_mrs
+    cdef DeviceMemoryResource each_mr
+    return all(
+        [each_mr.get_mr() is not NULL
+            for each_mr in _per_device_mrs.values()]
+    )
+
+
+cpdef _flush_logs():
+    """
+    Flush the logs of all currently initialized LoggingResourceAdaptor
+    memory resources
+    """
+    global _per_device_mrs
+    cdef DeviceMemoryResource each_mr
+    for each_mr in _per_device_mrs.values():
+        if isinstance(each_mr, LoggingResourceAdaptor):
+            each_mr.flush()
+
+
+def enable_logging(log_file_name=None):
+    """
+    Enable logging of run-time events for all devices.
+
+    Parameters
+    ----------
+    log_file_name:  str, optional
+        Name of the log file. If not specified, the environment variable
+        RMM_LOG_FILE is used. A ValueError is thrown if neither is available.
+        A separate log file is produced for each device,
+        and the suffix `".dev{id}"` is automatically added to the log file
+        name.
+
+    Notes
+    -----
+    Note that if you use the environment variable CUDA_VISIBLE_DEVICES
+    with logging enabled, the suffix may not be what you expect. For
+    example, if you set CUDA_VISIBLE_DEVICES=1, the log file produced
+    will still have suffix `0`. Similarly, if you set
+    CUDA_VISIBLE_DEVICES=1,0 and use devices 0 and 1, the log file
+    with suffix `0` will correspond to the GPU with device ID `1`.
+    Use `rmm.get_log_filenames()` to get the log file names
+    corresponding to each device.
+    """
+    global _per_device_mrs
+
+    devices = [0] if not _per_device_mrs.keys() else _per_device_mrs.keys()
+
+    for device in devices:
+        each_mr = <DeviceMemoryResource>_per_device_mrs[device]
+        if not isinstance(each_mr, LoggingResourceAdaptor):
+            set_per_device_resource(
+                device,
+                LoggingResourceAdaptor(each_mr, log_file_name)
+            )
+
+
+def disable_logging():
+    """
+    Disable logging if it was enabled previously using
+    `rmm.initialize()` or `rmm.enable_logging()`.
+    """
+    global _per_device_mrs
+    for i, each_mr in _per_device_mrs.items():
+        if isinstance(each_mr, LoggingResourceAdaptor):
+            set_per_device_resource(i, each_mr.get_upstream())
+
+
+def get_log_filenames():
+    """
+    Returns the log filename (or `None` if not writing logs)
+    for each device in use.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> rmm.reinitialize(devices=[0, 1], logging=True, log_file_name="rmm.log")
+    >>> rmm.get_log_filenames()
+    {0: '/home/user/workspace/rapids/rmm/python/rmm.dev0.log',
+     1: '/home/user/workspace/rapids/rmm/python/rmm.dev1.log'}
+    """
+    global _per_device_mrs
+
+    return {
+        i: each_mr.get_file_name()
+        if isinstance(each_mr, LoggingResourceAdaptor)
+        else None
+        for i, each_mr in _per_device_mrs.items()
+    }
+
+
+def available_device_memory():
+    """
+    Returns a tuple of free and total device memory memory.
+    """
+    cdef pair[size_t, size_t] res
+    res = c_available_device_memory()
+    return (res.first, res.second)

From 3ed73225f9d95b340949b06f5e1c326c715e77e3 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Tue, 10 Sep 2024 21:27:39 -0700
Subject: [PATCH 02/13] seperate out the rest of the cython

---
 python/rmm/rmm/__init__.py                    | 10 ++---
 python/rmm/rmm/_cuda/stream.pxd               |  4 +-
 python/rmm/rmm/_cuda/stream.pyx               |  4 +-
 python/rmm/rmm/_lib/__init__.py               |  4 +-
 .../rmm/rmm/_lib/tests/test_device_buffer.pyx |  7 ++--
 python/rmm/rmm/allocators/cupy.py             |  4 +-
 python/rmm/rmm/allocators/numba.py            |  4 +-
 python/rmm/rmm/allocators/torch.py            |  4 +-
 python/rmm/rmm/cpp/CMakeLists.txt             |  2 +-
 .../rmm/{python/lib.pyx => cpp/__init__.py}   |  0
 .../rmm/cpp/{cpp_logger.pyx => _logger.pxd}   |  0
 python/rmm/rmm/cpp/_logger.pyx                | 15 ++++++++
 python/rmm/rmm/cpp/cpp_memory_resource.pxd    | 34 -----------------
 .../{cpp_cuda_stream.pxd => cuda_stream.pxd}  |  2 +-
 ...a_stream_pool.pxd => cuda_stream_pool.pxd} |  2 +-
 ...a_stream_view.pxd => cuda_stream_view.pxd} |  0
 ...pp_device_buffer.pxd => device_buffer.pxd} |  4 +-
 ..._device_uvector.pxd => device_uvector.pxd} |  0
 python/rmm/rmm/{python => cpp}/lib.pxd        |  2 +-
 python/rmm/rmm/cpp/lib.pyx                    | 13 +++++++
 ...emory_resource.pyx => memory_resource.pxd} | 23 +++++++++--
 ...e_resource.pxd => per_device_resource.pxd} |  2 +-
 python/rmm/rmm/mr.py                          |  2 +-
 python/rmm/rmm/python/CMakeLists.txt          |  3 +-
 python/rmm/rmm/python/cuda_stream.pxd         | 27 +++++++++++++
 python/rmm/rmm/python/cuda_stream.pyx         |  2 +-
 python/rmm/rmm/python/device_buffer.pxd       |  2 +-
 python/rmm/rmm/python/device_buffer.pyx       | 14 +++----
 python/rmm/rmm/python/logger.pyx              |  5 ++-
 python/rmm/rmm/python/memory_resource.pxd     |  2 +-
 python/rmm/rmm/python/memory_resource.pyx     | 12 +++---
 python/rmm/rmm/python/tests/__init__.py       |  0
 .../rmm/python/tests/test_device_buffer.pyx   | 38 +++++++++++++++++++
 python/rmm/rmm/tests/test_cython.py           |  4 +-
 python/rmm/rmm/tests/test_rmm.py              |  2 +-
 35 files changed, 165 insertions(+), 88 deletions(-)
 rename python/rmm/rmm/{python/lib.pyx => cpp/__init__.py} (100%)
 rename python/rmm/rmm/cpp/{cpp_logger.pyx => _logger.pxd} (100%)
 create mode 100644 python/rmm/rmm/cpp/_logger.pyx
 delete mode 100644 python/rmm/rmm/cpp/cpp_memory_resource.pxd
 rename python/rmm/rmm/cpp/{cpp_cuda_stream.pxd => cuda_stream.pxd} (94%)
 rename python/rmm/rmm/cpp/{cpp_cuda_stream_pool.pxd => cuda_stream_pool.pxd} (93%)
 rename python/rmm/rmm/cpp/{cpp_cuda_stream_view.pxd => cuda_stream_view.pxd} (100%)
 rename python/rmm/rmm/cpp/{cpp_device_buffer.pxd => device_buffer.pxd} (94%)
 rename python/rmm/rmm/cpp/{cpp_device_uvector.pxd => device_uvector.pxd} (100%)
 rename python/rmm/rmm/{python => cpp}/lib.pxd (93%)
 create mode 100644 python/rmm/rmm/cpp/lib.pyx
 rename python/rmm/rmm/cpp/{cpp_memory_resource.pyx => memory_resource.pxd} (90%)
 rename python/rmm/rmm/cpp/{cpp_per_device_resource.pxd => per_device_resource.pxd} (95%)
 create mode 100644 python/rmm/rmm/python/cuda_stream.pxd
 create mode 100644 python/rmm/rmm/python/tests/__init__.py
 create mode 100644 python/rmm/rmm/python/tests/test_device_buffer.pyx

diff --git a/python/rmm/rmm/__init__.py b/python/rmm/rmm/__init__.py
index 1e3b5c8b1..61c5e4561 100644
--- a/python/rmm/rmm/__init__.py
+++ b/python/rmm/rmm/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 from rmm import mr
-from rmm._lib.device_buffer import DeviceBuffer
-from rmm._lib.logger import (
+from rmm._version import __git_commit__, __version__
+from rmm.mr import disable_logging, enable_logging, get_log_filenames
+from rmm.python.device_buffer import DeviceBuffer
+from rmm.python.logger import (
     flush_logger,
     get_flush_level,
     get_logging_level,
@@ -23,8 +25,6 @@
     set_logging_level,
     should_log,
 )
-from rmm._version import __git_commit__, __version__
-from rmm.mr import disable_logging, enable_logging, get_log_filenames
 from rmm.rmm import (
     RMMError,
     is_initialized,
diff --git a/python/rmm/rmm/_cuda/stream.pxd b/python/rmm/rmm/_cuda/stream.pxd
index 3c3d3aa6f..c978074fc 100644
--- a/python/rmm/rmm/_cuda/stream.pxd
+++ b/python/rmm/rmm/_cuda/stream.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,7 +16,7 @@ from cuda.ccudart cimport cudaStream_t
 from libc.stdint cimport uintptr_t
 from libcpp cimport bool
 
-from rmm._lib.cuda_stream_view cimport cuda_stream_view
+from rmm.cpp.cuda_stream_view cimport cuda_stream_view
 
 
 cdef class Stream:
diff --git a/python/rmm/rmm/_cuda/stream.pyx b/python/rmm/rmm/_cuda/stream.pyx
index 4d5ff5232..3c6c10c18 100644
--- a/python/rmm/rmm/_cuda/stream.pyx
+++ b/python/rmm/rmm/_cuda/stream.pyx
@@ -16,13 +16,13 @@ from cuda.ccudart cimport cudaStream_t
 from libc.stdint cimport uintptr_t
 from libcpp cimport bool
 
-from rmm._lib.cuda_stream cimport CudaStream
-from rmm._lib.cuda_stream_view cimport (
+from rmm.cpp.cuda_stream_view cimport (
     cuda_stream_default,
     cuda_stream_legacy,
     cuda_stream_per_thread,
     cuda_stream_view,
 )
+from rmm.python.cuda_stream cimport CudaStream
 
 
 cdef class Stream:
diff --git a/python/rmm/rmm/_lib/__init__.py b/python/rmm/rmm/_lib/__init__.py
index 0b8672ef6..f9462af65 100644
--- a/python/rmm/rmm/_lib/__init__.py
+++ b/python/rmm/rmm/_lib/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .device_buffer import DeviceBuffer
+# from .device_buffer import DeviceBuffer
diff --git a/python/rmm/rmm/_lib/tests/test_device_buffer.pyx b/python/rmm/rmm/_lib/tests/test_device_buffer.pyx
index 733383827..0783dd9c6 100644
--- a/python/rmm/rmm/_lib/tests/test_device_buffer.pyx
+++ b/python/rmm/rmm/_lib/tests/test_device_buffer.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,8 +16,9 @@ import numpy as np
 
 from libcpp.memory cimport make_unique
 
-from rmm._lib.cuda_stream_view cimport cuda_stream_default
-from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
+from rmm.cpp.cuda_stream_view cimport cuda_stream_default
+from rmm.cpp.device_buffer cimport device_buffer
+from rmm.python.device_buffer cimport DeviceBuffer
 
 
 def test_release():
diff --git a/python/rmm/rmm/allocators/cupy.py b/python/rmm/rmm/allocators/cupy.py
index 89947c46b..8cd41b3b3 100644
--- a/python/rmm/rmm/allocators/cupy.py
+++ b/python/rmm/rmm/allocators/cupy.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from rmm import _lib as librmm
+from rmm import python as librmm
 from rmm._cuda.stream import Stream
 
 try:
diff --git a/python/rmm/rmm/allocators/numba.py b/python/rmm/rmm/allocators/numba.py
index 5e87b87b6..9838b8b59 100644
--- a/python/rmm/rmm/allocators/numba.py
+++ b/python/rmm/rmm/allocators/numba.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@
 from numba import config, cuda
 from numba.cuda import HostOnlyCUDAMemoryManager, IpcHandle, MemoryPointer
 
-from rmm import _lib as librmm
+from rmm import python as librmm
 
 
 def _make_emm_plugin_finalizer(handle, allocations):
diff --git a/python/rmm/rmm/allocators/torch.py b/python/rmm/rmm/allocators/torch.py
index 753da66da..8df921ad8 100644
--- a/python/rmm/rmm/allocators/torch.py
+++ b/python/rmm/rmm/allocators/torch.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -29,7 +29,7 @@
         # is pure Python and will therefore be in the source directory.
         # Instead, we search relative to an arbitrary file in the compiled
         # package. We use the _lib.lib module because it is small.
-        from rmm._lib import lib
+        from rmm.cpp import lib
 
         sofile = pathlib.Path(lib.__file__).parent / "_torch_allocator.so"
         rmm_torch_allocator = CUDAPluggableAllocator(
diff --git a/python/rmm/rmm/cpp/CMakeLists.txt b/python/rmm/rmm/cpp/CMakeLists.txt
index 8ac924df4..a201187b8 100644
--- a/python/rmm/rmm/cpp/CMakeLists.txt
+++ b/python/rmm/rmm/cpp/CMakeLists.txt
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-set(cython_sources cpp_logger.pyx cpp_memory_resource.pyx)
+set(cython_sources lib.pyx _logger.pyx)
 set(linked_libraries rmm::rmm)
 
 # Build all of the Cython targets
diff --git a/python/rmm/rmm/python/lib.pyx b/python/rmm/rmm/cpp/__init__.py
similarity index 100%
rename from python/rmm/rmm/python/lib.pyx
rename to python/rmm/rmm/cpp/__init__.py
diff --git a/python/rmm/rmm/cpp/cpp_logger.pyx b/python/rmm/rmm/cpp/_logger.pxd
similarity index 100%
rename from python/rmm/rmm/cpp/cpp_logger.pyx
rename to python/rmm/rmm/cpp/_logger.pxd
diff --git a/python/rmm/rmm/cpp/_logger.pyx b/python/rmm/rmm/cpp/_logger.pyx
new file mode 100644
index 000000000..e848c6dda
--- /dev/null
+++ b/python/rmm/rmm/cpp/_logger.pyx
@@ -0,0 +1,15 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ._logger cimport logging_level  # no-cython-lint
diff --git a/python/rmm/rmm/cpp/cpp_memory_resource.pxd b/python/rmm/rmm/cpp/cpp_memory_resource.pxd
deleted file mode 100644
index cef5d4737..000000000
--- a/python/rmm/rmm/cpp/cpp_memory_resource.pxd
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from libcpp.pair cimport pair
-
-from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
-
-
-cdef extern from "rmm/mr/device/device_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass device_memory_resource:
-        void* allocate(size_t bytes) except +
-        void* allocate(size_t bytes, cuda_stream_view stream) except +
-        void deallocate(void* ptr, size_t bytes) except +
-        void deallocate(
-            void* ptr,
-            size_t bytes,
-            cuda_stream_view stream
-        ) except +
-
-cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil:
-    size_t percent_of_free_device_memory(int percent) except +
-    pair[size_t, size_t] available_device_memory() except +
diff --git a/python/rmm/rmm/cpp/cpp_cuda_stream.pxd b/python/rmm/rmm/cpp/cuda_stream.pxd
similarity index 94%
rename from python/rmm/rmm/cpp/cpp_cuda_stream.pxd
rename to python/rmm/rmm/cpp/cuda_stream.pxd
index 16b66ee2b..fcd84198c 100644
--- a/python/rmm/rmm/cpp/cpp_cuda_stream.pxd
+++ b/python/rmm/rmm/cpp/cuda_stream.pxd
@@ -15,7 +15,7 @@
 from cuda.ccudart cimport cudaStream_t
 from libcpp cimport bool
 
-from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
+from rmm.cpp.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "rmm/cuda_stream.hpp" namespace "rmm" nogil:
diff --git a/python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd b/python/rmm/rmm/cpp/cuda_stream_pool.pxd
similarity index 93%
rename from python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd
rename to python/rmm/rmm/cpp/cuda_stream_pool.pxd
index 553b38514..f72cfd7f6 100644
--- a/python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd
+++ b/python/rmm/rmm/cpp/cuda_stream_pool.pxd
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
+from rmm.cpp.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "rmm/cuda_stream_pool.hpp" namespace "rmm" nogil:
diff --git a/python/rmm/rmm/cpp/cpp_cuda_stream_view.pxd b/python/rmm/rmm/cpp/cuda_stream_view.pxd
similarity index 100%
rename from python/rmm/rmm/cpp/cpp_cuda_stream_view.pxd
rename to python/rmm/rmm/cpp/cuda_stream_view.pxd
diff --git a/python/rmm/rmm/cpp/cpp_device_buffer.pxd b/python/rmm/rmm/cpp/device_buffer.pxd
similarity index 94%
rename from python/rmm/rmm/cpp/cpp_device_buffer.pxd
rename to python/rmm/rmm/cpp/device_buffer.pxd
index 1aa7634cf..a3801cf05 100644
--- a/python/rmm/rmm/cpp/cpp_device_buffer.pxd
+++ b/python/rmm/rmm/cpp/device_buffer.pxd
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
-from rmm.cpp.cpp_memory_resource cimport device_memory_resource
+from rmm.cpp.cuda_stream_view cimport cuda_stream_view
+from rmm.cpp.memory_resource cimport device_memory_resource
 
 
 cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil:
diff --git a/python/rmm/rmm/cpp/cpp_device_uvector.pxd b/python/rmm/rmm/cpp/device_uvector.pxd
similarity index 100%
rename from python/rmm/rmm/cpp/cpp_device_uvector.pxd
rename to python/rmm/rmm/cpp/device_uvector.pxd
diff --git a/python/rmm/rmm/python/lib.pxd b/python/rmm/rmm/cpp/lib.pxd
similarity index 93%
rename from python/rmm/rmm/python/lib.pxd
rename to python/rmm/rmm/cpp/lib.pxd
index b61e0d569..592574862 100644
--- a/python/rmm/rmm/python/lib.pxd
+++ b/python/rmm/rmm/cpp/lib.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/rmm/rmm/cpp/lib.pyx b/python/rmm/rmm/cpp/lib.pyx
new file mode 100644
index 000000000..46753baa3
--- /dev/null
+++ b/python/rmm/rmm/cpp/lib.pyx
@@ -0,0 +1,13 @@
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/rmm/rmm/cpp/cpp_memory_resource.pyx b/python/rmm/rmm/cpp/memory_resource.pxd
similarity index 90%
rename from python/rmm/rmm/cpp/cpp_memory_resource.pyx
rename to python/rmm/rmm/cpp/memory_resource.pxd
index 50e201517..5e457b653 100644
--- a/python/rmm/rmm/cpp/cpp_memory_resource.pyx
+++ b/python/rmm/rmm/cpp/memory_resource.pxd
@@ -23,13 +23,30 @@ from libcpp.optional cimport optional
 from libcpp.pair cimport pair
 from libcpp.string cimport string
 
-from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
-from rmm.cpp.cpp_memory_resource cimport device_memory_resource
+from rmm.cpp.cuda_stream_view cimport cuda_stream_view
+from rmm.cpp.memory_resource cimport device_memory_resource
+
+
+cdef extern from "rmm/mr/device/device_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass device_memory_resource:
+        void* allocate(size_t bytes) except +
+        void* allocate(size_t bytes, cuda_stream_view stream) except +
+        void deallocate(void* ptr, size_t bytes) except +
+        void deallocate(
+            void* ptr,
+            size_t bytes,
+            cuda_stream_view stream
+        ) except +
+
+cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil:
+    size_t percent_of_free_device_memory(int percent) except +
+    pair[size_t, size_t] available_device_memory() except +
 
 # Transparent handle of a C++ exception
 ctypedef pair[int, string] CppExcept
 
-cdef CppExcept translate_python_except_to_cpp(err: BaseException) noexcept:
+cdef inline CppExcept translate_python_except_to_cpp(err: BaseException) noexcept:
     """Translate a Python exception into a C++ exception handle
 
     The returned exception handle can then be thrown by `throw_cpp_except()`,
diff --git a/python/rmm/rmm/cpp/cpp_per_device_resource.pxd b/python/rmm/rmm/cpp/per_device_resource.pxd
similarity index 95%
rename from python/rmm/rmm/cpp/cpp_per_device_resource.pxd
rename to python/rmm/rmm/cpp/per_device_resource.pxd
index 59f651687..ec43d5f99 100644
--- a/python/rmm/rmm/cpp/cpp_per_device_resource.pxd
+++ b/python/rmm/rmm/cpp/per_device_resource.pxd
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from rmm.cpp.cpp_memory_resource cimport device_memory_resource
+from rmm.cpp.memory_resource cimport device_memory_resource
 
 
 cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil:
diff --git a/python/rmm/rmm/mr.py b/python/rmm/rmm/mr.py
index 6eb94da0f..f820cf805 100644
--- a/python/rmm/rmm/mr.py
+++ b/python/rmm/rmm/mr.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from rmm._lib.memory_resource import (
+from rmm.python.memory_resource import (
     BinningMemoryResource,
     CallbackMemoryResource,
     CudaAsyncMemoryResource,
diff --git a/python/rmm/rmm/python/CMakeLists.txt b/python/rmm/rmm/python/CMakeLists.txt
index 558c0ce9a..0e88f01bb 100644
--- a/python/rmm/rmm/python/CMakeLists.txt
+++ b/python/rmm/rmm/python/CMakeLists.txt
@@ -12,8 +12,7 @@
 # the License.
 # =============================================================================
 
-set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx
-                   helper.pyx)
+set(cython_sources device_buffer.pyx logger.pyx memory_resource.pyx cuda_stream.pyx helper.pyx)
 set(linked_libraries rmm::rmm)
 
 # Build all of the Cython targets
diff --git a/python/rmm/rmm/python/cuda_stream.pxd b/python/rmm/rmm/python/cuda_stream.pxd
new file mode 100644
index 000000000..169e560a4
--- /dev/null
+++ b/python/rmm/rmm/python/cuda_stream.pxd
@@ -0,0 +1,27 @@
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cimport cython
+from cuda.ccudart cimport cudaStream_t
+from libcpp cimport bool
+from libcpp.memory cimport unique_ptr
+
+from rmm.cpp.cuda_stream cimport cuda_stream
+
+
+@cython.final
+cdef class CudaStream:
+    cdef unique_ptr[cuda_stream] c_obj
+    cdef cudaStream_t value(self) except * nogil
+    cdef bool is_valid(self) except * nogil
diff --git a/python/rmm/rmm/python/cuda_stream.pyx b/python/rmm/rmm/python/cuda_stream.pyx
index f1529111d..de02291d4 100644
--- a/python/rmm/rmm/python/cuda_stream.pyx
+++ b/python/rmm/rmm/python/cuda_stream.pyx
@@ -16,7 +16,7 @@ cimport cython
 from cuda.ccudart cimport cudaStream_t
 from libcpp cimport bool
 
-from rmm.cpp.cpp_cuda_stream cimport cuda_stream
+from rmm.cpp.cuda_stream cimport cuda_stream
 
 
 @cython.final
diff --git a/python/rmm/rmm/python/device_buffer.pxd b/python/rmm/rmm/python/device_buffer.pxd
index d8b56d2fb..2d4d932af 100644
--- a/python/rmm/rmm/python/device_buffer.pxd
+++ b/python/rmm/rmm/python/device_buffer.pxd
@@ -16,7 +16,7 @@ from libc.stdint cimport uintptr_t
 from libcpp.memory cimport unique_ptr
 
 from rmm._cuda.stream cimport Stream
-from rmm.cpp.cpp_device_buffer cimport device_buffer
+from rmm.cpp.device_buffer cimport device_buffer
 from rmm.python.memory_resource cimport DeviceMemoryResource
 
 
diff --git a/python/rmm/rmm/python/device_buffer.pyx b/python/rmm/rmm/python/device_buffer.pyx
index 1d564ff33..73b0d32ec 100644
--- a/python/rmm/rmm/python/device_buffer.pyx
+++ b/python/rmm/rmm/python/device_buffer.pyx
@@ -32,16 +32,16 @@ from cuda.ccudart cimport (
     cudaStream_t,
 )
 
-from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
-from rmm.cpp.cpp_device_buffer cimport (
+from rmm.cpp.cuda_stream_view cimport cuda_stream_view
+from rmm.cpp.device_buffer cimport (
     cuda_device_id,
     device_buffer,
     get_current_cuda_device,
     prefetch,
 )
+from rmm.cpp.memory_resource cimport device_memory_resource
 from rmm.python.memory_resource cimport (
     DeviceMemoryResource,
-    device_memory_resource,
     get_current_device_resource,
 )
 
@@ -401,7 +401,7 @@ cpdef DeviceBuffer to_device(const unsigned char[::1] b,
     Examples
     --------
     >>> import rmm
-    >>> db = rmm._lib.device_buffer.to_device(b"abc")
+    >>> db = rmm.python.device_buffer.to_device(b"abc")
     >>> print(bytes(db))
     b'abc'
     """
@@ -467,7 +467,7 @@ cpdef void copy_ptr_to_host(uintptr_t db,
     >>> import rmm
     >>> db = rmm.DeviceBuffer.to_device(b"abc")
     >>> hb = bytearray(db.nbytes)
-    >>> rmm._lib.device_buffer.copy_ptr_to_host(db.ptr, hb)
+    >>> rmm.python.device_buffer.copy_ptr_to_host(db.ptr, hb)
     >>> print(hb)
     bytearray(b'abc')
     """
@@ -509,7 +509,7 @@ cpdef void copy_host_to_ptr(const unsigned char[::1] hb,
     >>> import rmm
     >>> db = rmm.DeviceBuffer(size=10)
     >>> hb = b"abc"
-    >>> rmm._lib.device_buffer.copy_host_to_ptr(hb, db.ptr)
+    >>> rmm.python.device_buffer.copy_host_to_ptr(hb, db.ptr)
     >>> hb = db.copy_to_host()
     >>> print(hb)
     array([97, 98, 99,  0,  0,  0,  0,  0,  0,  0], dtype=uint8)
@@ -548,7 +548,7 @@ cpdef void copy_device_to_ptr(uintptr_t d_src,
     >>> import rmm
     >>> db = rmm.DeviceBuffer(size=5)
     >>> db2 = rmm.DeviceBuffer.to_device(b"abc")
-    >>> rmm._lib.device_buffer.copy_device_to_ptr(db2.ptr, db.ptr, db2.size)
+    >>> rmm.python.device_buffer.copy_device_to_ptr(db2.ptr, db.ptr, db2.size)
     >>> hb = db.copy_to_host()
     >>> hb
     array([97, 98, 99,  0,  0], dtype=uint8)
diff --git a/python/rmm/rmm/python/logger.pyx b/python/rmm/rmm/python/logger.pyx
index 2997ff633..15ba47308 100644
--- a/python/rmm/rmm/python/logger.pyx
+++ b/python/rmm/rmm/python/logger.pyx
@@ -13,7 +13,10 @@
 # limitations under the License.
 
 import warnings
-from rmm.cpp.cpp_logging cimport logging_level, logger
+
+from rmm.cpp._logger cimport logger
+
+from rmm.cpp._logger import logging_level
 
 
 def _validate_level_type(level):
diff --git a/python/rmm/rmm/python/memory_resource.pxd b/python/rmm/rmm/python/memory_resource.pxd
index 0adda4eaf..4970d5230 100644
--- a/python/rmm/rmm/python/memory_resource.pxd
+++ b/python/rmm/rmm/python/memory_resource.pxd
@@ -14,7 +14,7 @@
 
 from libcpp.memory cimport shared_ptr
 
-from rmm.cpp.cpp_memory_resource cimport device_memory_resource
+from rmm.cpp.memory_resource cimport device_memory_resource
 
 
 cdef class DeviceMemoryResource:
diff --git a/python/rmm/rmm/python/memory_resource.pyx b/python/rmm/rmm/python/memory_resource.pyx
index 46613667c..14585f36b 100644
--- a/python/rmm/rmm/python/memory_resource.pyx
+++ b/python/rmm/rmm/python/memory_resource.pyx
@@ -36,23 +36,20 @@ from rmm._cuda.stream cimport Stream
 
 from rmm._cuda.stream import DEFAULT_STREAM
 
-from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
-from rmm.cpp.cpp_per_device_resource cimport (
+from rmm.cpp.cuda_stream_view cimport cuda_stream_view
+from rmm.cpp.per_device_resource cimport (
     cuda_device_id,
     set_per_device_resource as cpp_set_per_device_resource,
 )
 from rmm.python.helper cimport parse_bytes
-from rmm.python.memory_resource cimport (
-    available_device_memory as c_available_device_memory,
-    percent_of_free_device_memory as c_percent_of_free_device_memory,
-)
 
 from rmm.statistics import Statistics
 
-from rmm.cpp.cpp_memory_resource cimport (
+from rmm.cpp.memory_resource cimport (
     CppExcept,
     allocate_callback_t,
     allocation_handle_type,
+    available_device_memory as c_available_device_memory,
     binning_memory_resource,
     callback_memory_resource,
     cuda_async_memory_resource,
@@ -65,6 +62,7 @@ from rmm.cpp.cpp_memory_resource cimport (
     limiting_resource_adaptor,
     logging_resource_adaptor,
     managed_memory_resource,
+    percent_of_free_device_memory as c_percent_of_free_device_memory,
     pool_memory_resource,
     posix_file_descriptor,
     prefetch_resource_adaptor,
diff --git a/python/rmm/rmm/python/tests/__init__.py b/python/rmm/rmm/python/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/rmm/rmm/python/tests/test_device_buffer.pyx b/python/rmm/rmm/python/tests/test_device_buffer.pyx
new file mode 100644
index 000000000..0783dd9c6
--- /dev/null
+++ b/python/rmm/rmm/python/tests/test_device_buffer.pyx
@@ -0,0 +1,38 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+
+from libcpp.memory cimport make_unique
+
+from rmm.cpp.cuda_stream_view cimport cuda_stream_default
+from rmm.cpp.device_buffer cimport device_buffer
+from rmm.python.device_buffer cimport DeviceBuffer
+
+
+def test_release():
+    expect = DeviceBuffer.to_device(b'abc')
+    cdef DeviceBuffer buf = DeviceBuffer.to_device(b'abc')
+
+    got = DeviceBuffer.c_from_unique_ptr(
+        make_unique[device_buffer](buf.c_release(),
+                                   cuda_stream_default.value())
+    )
+    np.testing.assert_equal(expect.copy_to_host(), got.copy_to_host())
+
+
+def test_size_after_release():
+    cdef DeviceBuffer buf = DeviceBuffer.to_device(b'abc')
+    buf.c_release()
+    assert buf.size == 0
diff --git a/python/rmm/rmm/tests/test_cython.py b/python/rmm/rmm/tests/test_cython.py
index 82eba2451..a0364942c 100644
--- a/python/rmm/rmm/tests/test_cython.py
+++ b/python/rmm/rmm/tests/test_cython.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -29,7 +29,7 @@ def wrapped(*args, **kwargs):
     return wrapped
 
 
-cython_test_modules = ["rmm._lib.tests.test_device_buffer"]
+cython_test_modules = ["rmm.python.tests.test_device_buffer"]
 
 
 for mod in cython_test_modules:
diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py
index c88d21b38..4d7b6c646 100644
--- a/python/rmm/rmm/tests/test_rmm.py
+++ b/python/rmm/rmm/tests/test_rmm.py
@@ -354,7 +354,7 @@ def test_rmm_pool_numba_stream(stream):
     rmm.reinitialize(pool_allocator=True)
 
     stream = rmm._cuda.stream.Stream(stream)
-    a = rmm._lib.device_buffer.DeviceBuffer(size=3, stream=stream)
+    a = rmm.python.device_buffer.DeviceBuffer(size=3, stream=stream)
 
     assert a.size == 3
     assert a.ptr != 0

From afef5391c23512bb04f35c04cbf7bc1436f9ca1e Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Mon, 16 Sep 2024 10:10:41 -0700
Subject: [PATCH 03/13] remove _lib

---
 python/rmm/CMakeLists.txt                     |    1 -
 python/rmm/rmm/_lib/CMakeLists.txt            |   36 -
 python/rmm/rmm/_lib/__init__.pxd              |   13 -
 python/rmm/rmm/_lib/__init__.py               |   15 -
 python/rmm/rmm/_lib/_torch_allocator.cpp      |   64 -
 python/rmm/rmm/_lib/cuda_stream.pxd           |   37 -
 python/rmm/rmm/_lib/cuda_stream.pyx           |   34 -
 python/rmm/rmm/_lib/cuda_stream_pool.pxd      |   25 -
 python/rmm/rmm/_lib/cuda_stream_view.pxd      |   32 -
 python/rmm/rmm/_lib/device_buffer.pxd         |  116 --
 python/rmm/rmm/_lib/device_buffer.pyx         |  552 -------
 python/rmm/rmm/_lib/device_uvector.pxd        |   39 -
 python/rmm/rmm/_lib/helper.pxd                |   16 -
 python/rmm/rmm/_lib/helper.pyx                |   78 -
 python/rmm/rmm/_lib/lib.pxd                   |   20 -
 python/rmm/rmm/_lib/lib.pyx                   |   13 -
 python/rmm/rmm/_lib/logger.pyx                |  260 ----
 python/rmm/rmm/_lib/memory_resource.pxd       |  103 --
 python/rmm/rmm/_lib/memory_resource.pyx       | 1332 -----------------
 python/rmm/rmm/_lib/per_device_resource.pxd   |   23 -
 python/rmm/rmm/_lib/tests/__init__.py         |    0
 .../rmm/rmm/_lib/tests/test_device_buffer.pyx |   38 -
 22 files changed, 2847 deletions(-)
 delete mode 100644 python/rmm/rmm/_lib/CMakeLists.txt
 delete mode 100644 python/rmm/rmm/_lib/__init__.pxd
 delete mode 100644 python/rmm/rmm/_lib/__init__.py
 delete mode 100644 python/rmm/rmm/_lib/_torch_allocator.cpp
 delete mode 100644 python/rmm/rmm/_lib/cuda_stream.pxd
 delete mode 100644 python/rmm/rmm/_lib/cuda_stream.pyx
 delete mode 100644 python/rmm/rmm/_lib/cuda_stream_pool.pxd
 delete mode 100644 python/rmm/rmm/_lib/cuda_stream_view.pxd
 delete mode 100644 python/rmm/rmm/_lib/device_buffer.pxd
 delete mode 100644 python/rmm/rmm/_lib/device_buffer.pyx
 delete mode 100644 python/rmm/rmm/_lib/device_uvector.pxd
 delete mode 100644 python/rmm/rmm/_lib/helper.pxd
 delete mode 100644 python/rmm/rmm/_lib/helper.pyx
 delete mode 100644 python/rmm/rmm/_lib/lib.pxd
 delete mode 100644 python/rmm/rmm/_lib/lib.pyx
 delete mode 100644 python/rmm/rmm/_lib/logger.pyx
 delete mode 100644 python/rmm/rmm/_lib/memory_resource.pxd
 delete mode 100644 python/rmm/rmm/_lib/memory_resource.pyx
 delete mode 100644 python/rmm/rmm/_lib/per_device_resource.pxd
 delete mode 100644 python/rmm/rmm/_lib/tests/__init__.py
 delete mode 100644 python/rmm/rmm/_lib/tests/test_device_buffer.pyx

diff --git a/python/rmm/CMakeLists.txt b/python/rmm/CMakeLists.txt
index 4c4e603ef..119888a4f 100644
--- a/python/rmm/CMakeLists.txt
+++ b/python/rmm/CMakeLists.txt
@@ -30,6 +30,5 @@ rapids_cython_init()
 add_compile_definitions("SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}")
 
 add_subdirectory(rmm/_cuda)
-# add_subdirectory(rmm/_lib)
 add_subdirectory(rmm/cpp)
 add_subdirectory(rmm/python)
diff --git a/python/rmm/rmm/_lib/CMakeLists.txt b/python/rmm/rmm/_lib/CMakeLists.txt
deleted file mode 100644
index 7cdfed971..000000000
--- a/python/rmm/rmm/_lib/CMakeLists.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-# =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-# =============================================================================
-
-set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx
-                   helper.pyx)
-set(linked_libraries rmm::rmm)
-
-# Build all of the Cython targets
-rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}"
-                                                                               CXX)
-
-# mark all symbols in these Cython targets "hidden" by default, so they won't collide with symbols
-# loaded from other DSOs
-foreach(_cython_target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
-  set_target_properties(${_cython_target} PROPERTIES C_VISIBILITY_PRESET hidden
-                                                     CXX_VISIBILITY_PRESET hidden)
-endforeach()
-
-add_library(_torch_allocator SHARED _torch_allocator.cpp)
-# Want the output to be called _torch_allocator.so
-set_target_properties(_torch_allocator PROPERTIES PREFIX "" SUFFIX ".so")
-target_link_libraries(_torch_allocator PRIVATE rmm::rmm)
-cmake_path(RELATIVE_PATH CMAKE_CURRENT_SOURCE_DIR BASE_DIRECTORY "${PROJECT_SOURCE_DIR}"
-           OUTPUT_VARIABLE _torch_allocator_location)
-install(TARGETS _torch_allocator DESTINATION "${_torch_allocator_location}")
diff --git a/python/rmm/rmm/_lib/__init__.pxd b/python/rmm/rmm/_lib/__init__.pxd
deleted file mode 100644
index 46753baa3..000000000
--- a/python/rmm/rmm/_lib/__init__.pxd
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/python/rmm/rmm/_lib/__init__.py b/python/rmm/rmm/_lib/__init__.py
deleted file mode 100644
index f9462af65..000000000
--- a/python/rmm/rmm/_lib/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# from .device_buffer import DeviceBuffer
diff --git a/python/rmm/rmm/_lib/_torch_allocator.cpp b/python/rmm/rmm/_lib/_torch_allocator.cpp
deleted file mode 100644
index bfe94c2d0..000000000
--- a/python/rmm/rmm/_lib/_torch_allocator.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <rmm/cuda_device.hpp>
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device/per_device_resource.hpp>
-
-#include <cuda_runtime_api.h>
-
-// These signatures must match those required by CUDAPluggableAllocator in
-// github.com/pytorch/pytorch/blob/main/torch/csrc/cuda/CUDAPluggableAllocator.h
-// Since the loading is done at runtime via dlopen, no error checking
-// can be performed for mismatching signatures.
-
-/**
- * @brief Allocate memory of at least \p size bytes.
- *
- * @throws rmm::bad_alloc When the requested allocation cannot be satisfied.
- *
- * @param size The number of bytes to allocate
- * @param device The device whose memory resource one should use
- * @param stream CUDA stream to perform allocation on
- * @return Pointer to the newly allocated memory
- */
-extern "C" void* allocate(std::size_t size, int device, void* stream)
-{
-  rmm::cuda_device_id const device_id{device};
-  rmm::cuda_set_device_raii with_device{device_id};
-  auto mr = rmm::mr::get_per_device_resource_ref(device_id);
-  return mr.allocate_async(
-    size, rmm::CUDA_ALLOCATION_ALIGNMENT, rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
-}
-
-/**
- * @brief Deallocate memory pointed to by \p ptr.
- *
- * @param ptr Pointer to be deallocated
- * @param size The number of bytes in the allocation
- * @param device The device whose memory resource one should use
- * @param stream CUDA stream to perform deallocation on
- */
-extern "C" void deallocate(void* ptr, std::size_t size, int device, void* stream)
-{
-  rmm::cuda_device_id const device_id{device};
-  rmm::cuda_set_device_raii with_device{device_id};
-  auto mr = rmm::mr::get_per_device_resource_ref(device_id);
-  mr.deallocate_async(ptr,
-                      size,
-                      rmm::CUDA_ALLOCATION_ALIGNMENT,
-                      rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
-}
diff --git a/python/rmm/rmm/_lib/cuda_stream.pxd b/python/rmm/rmm/_lib/cuda_stream.pxd
deleted file mode 100644
index e224cf9af..000000000
--- a/python/rmm/rmm/_lib/cuda_stream.pxd
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-cimport cython
-from cuda.ccudart cimport cudaStream_t
-from libcpp cimport bool
-from libcpp.memory cimport unique_ptr
-
-from rmm._lib.cuda_stream_view cimport cuda_stream_view
-
-
-cdef extern from "rmm/cuda_stream.hpp" namespace "rmm" nogil:
-    cdef cppclass cuda_stream:
-        cuda_stream() except +
-        bool is_valid() except +
-        cudaStream_t value() except +
-        cuda_stream_view view() except +
-        void synchronize() except +
-        void synchronize_no_throw()
-
-
-@cython.final
-cdef class CudaStream:
-    cdef unique_ptr[cuda_stream] c_obj
-    cdef cudaStream_t value(self) except * nogil
-    cdef bool is_valid(self) except * nogil
diff --git a/python/rmm/rmm/_lib/cuda_stream.pyx b/python/rmm/rmm/_lib/cuda_stream.pyx
deleted file mode 100644
index 0861f0663..000000000
--- a/python/rmm/rmm/_lib/cuda_stream.pyx
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-cimport cython
-from cuda.ccudart cimport cudaStream_t
-from libcpp cimport bool
-
-
-@cython.final
-cdef class CudaStream:
-    """
-    Wrapper around a CUDA stream with RAII semantics.
-    When a CudaStream instance is GC'd, the underlying
-    CUDA stream is destroyed.
-    """
-    def __cinit__(self):
-        self.c_obj.reset(new cuda_stream())
-
-    cdef cudaStream_t value(self) except * nogil:
-        return self.c_obj.get()[0].value()
-
-    cdef bool is_valid(self) except * nogil:
-        return self.c_obj.get()[0].is_valid()
diff --git a/python/rmm/rmm/_lib/cuda_stream_pool.pxd b/python/rmm/rmm/_lib/cuda_stream_pool.pxd
deleted file mode 100644
index 0286a9377..000000000
--- a/python/rmm/rmm/_lib/cuda_stream_pool.pxd
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-cimport cython
-
-from rmm._lib.cuda_stream_view cimport cuda_stream_view
-
-
-cdef extern from "rmm/cuda_stream_pool.hpp" namespace "rmm" nogil:
-    cdef cppclass cuda_stream_pool:
-        cuda_stream_pool(size_t pool_size)
-        cuda_stream_view get_stream()
-        cuda_stream_view get_stream(size_t stream_id) except +
-        size_t get_pool_size()
diff --git a/python/rmm/rmm/_lib/cuda_stream_view.pxd b/python/rmm/rmm/_lib/cuda_stream_view.pxd
deleted file mode 100644
index bf0d33c24..000000000
--- a/python/rmm/rmm/_lib/cuda_stream_view.pxd
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from cuda.ccudart cimport cudaStream_t
-from libcpp cimport bool
-
-
-cdef extern from "rmm/cuda_stream_view.hpp" namespace "rmm" nogil:
-    cdef cppclass cuda_stream_view:
-        cuda_stream_view()
-        cuda_stream_view(cudaStream_t)
-        cudaStream_t value()
-        bool is_default()
-        bool is_per_thread_default()
-        void synchronize() except +
-
-    cdef bool operator==(cuda_stream_view const, cuda_stream_view const)
-
-    const cuda_stream_view cuda_stream_default
-    const cuda_stream_view cuda_stream_legacy
-    const cuda_stream_view cuda_stream_per_thread
diff --git a/python/rmm/rmm/_lib/device_buffer.pxd b/python/rmm/rmm/_lib/device_buffer.pxd
deleted file mode 100644
index 0da9ace0c..000000000
--- a/python/rmm/rmm/_lib/device_buffer.pxd
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from libc.stdint cimport uintptr_t
-from libcpp.memory cimport unique_ptr
-
-from rmm._cuda.stream cimport Stream
-from rmm._lib.cuda_stream_view cimport cuda_stream_view
-from rmm._lib.memory_resource cimport (
-    DeviceMemoryResource,
-    device_memory_resource,
-)
-
-
-cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil:
-    cdef cppclass cuda_device_id:
-        ctypedef int value_type
-        cuda_device_id()
-        cuda_device_id(value_type id)
-        value_type value()
-
-    cdef cuda_device_id get_current_cuda_device()
-
-cdef extern from "rmm/prefetch.hpp" namespace "rmm" nogil:
-    cdef void prefetch(const void* ptr,
-                       size_t bytes,
-                       cuda_device_id device,
-                       cuda_stream_view stream) except +
-
-cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil:
-    cdef cppclass device_buffer:
-        device_buffer()
-        device_buffer(
-            size_t size,
-            cuda_stream_view stream,
-            device_memory_resource *
-        ) except +
-        device_buffer(
-            const void* source_data,
-            size_t size,
-            cuda_stream_view stream,
-            device_memory_resource *
-        ) except +
-        device_buffer(
-            const device_buffer buf,
-            cuda_stream_view stream,
-            device_memory_resource *
-        ) except +
-        void reserve(size_t new_capacity, cuda_stream_view stream) except +
-        void resize(size_t new_size, cuda_stream_view stream) except +
-        void shrink_to_fit(cuda_stream_view stream) except +
-        void* data()
-        size_t size()
-        size_t capacity()
-
-
-cdef class DeviceBuffer:
-    cdef unique_ptr[device_buffer] c_obj
-
-    # Holds a reference to the DeviceMemoryResource used for allocation.
-    # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is
-    # needed for deallocation
-    cdef DeviceMemoryResource mr
-
-    # Holds a reference to the stream used by the underlying `device_buffer`.
-    # Ensures the stream does not get destroyed before this DeviceBuffer
-    cdef Stream stream
-
-    @staticmethod
-    cdef DeviceBuffer c_from_unique_ptr(
-        unique_ptr[device_buffer] ptr,
-        Stream stream=*,
-        DeviceMemoryResource mr=*,
-    )
-
-    @staticmethod
-    cdef DeviceBuffer c_to_device(const unsigned char[::1] b,
-                                  Stream stream=*) except *
-    cpdef copy_to_host(self, ary=*, Stream stream=*)
-    cpdef copy_from_host(self, ary, Stream stream=*)
-    cpdef copy_from_device(self, cuda_ary, Stream stream=*)
-    cpdef bytes tobytes(self, Stream stream=*)
-
-    cdef size_t c_size(self) except *
-    cpdef void reserve(self, size_t new_capacity, Stream stream=*) except *
-    cpdef void resize(self, size_t new_size, Stream stream=*) except *
-    cpdef size_t capacity(self) except *
-    cdef void* c_data(self) except *
-
-    cdef device_buffer c_release(self) except *
-
-cpdef DeviceBuffer to_device(const unsigned char[::1] b,
-                             Stream stream=*)
-cpdef void copy_ptr_to_host(uintptr_t db,
-                            unsigned char[::1] hb,
-                            Stream stream=*) except *
-
-cpdef void copy_host_to_ptr(const unsigned char[::1] hb,
-                            uintptr_t db,
-                            Stream stream=*) except *
-
-cpdef void copy_device_to_ptr(uintptr_t d_src,
-                              uintptr_t d_dst,
-                              size_t count,
-                              Stream stream=*) except *
diff --git a/python/rmm/rmm/_lib/device_buffer.pyx b/python/rmm/rmm/_lib/device_buffer.pyx
deleted file mode 100644
index 94a4dc771..000000000
--- a/python/rmm/rmm/_lib/device_buffer.pyx
+++ /dev/null
@@ -1,552 +0,0 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import numpy as np
-
-cimport cython
-from cpython.bytes cimport PyBytes_FromStringAndSize
-from libc.stdint cimport uintptr_t
-from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
-
-from rmm._cuda.stream cimport Stream
-
-from rmm._cuda.stream import DEFAULT_STREAM
-
-cimport cuda.ccudart as ccudart
-from cuda.ccudart cimport (
-    cudaError,
-    cudaError_t,
-    cudaMemcpyAsync,
-    cudaMemcpyKind,
-    cudaStream_t,
-)
-
-from rmm._lib.memory_resource cimport (
-    DeviceMemoryResource,
-    device_memory_resource,
-    get_current_device_resource,
-)
-
-
-# The DeviceMemoryResource attribute could be released prematurely
-# by the gc if the DeviceBuffer is in a reference cycle. Removing
-# the tp_clear function with the no_gc_clear decoration prevents that.
-# See https://github.com/rapidsai/rmm/pull/931 for details.
-@cython.no_gc_clear
-cdef class DeviceBuffer:
-
-    def __cinit__(self, *,
-                  uintptr_t ptr=0,
-                  size_t size=0,
-                  Stream stream=DEFAULT_STREAM,
-                  DeviceMemoryResource mr=None):
-        """Construct a ``DeviceBuffer`` with optional size and data pointer
-
-        Parameters
-        ----------
-        ptr : int
-            pointer to some data on host or device to copy over
-        size : int
-            size of the buffer to allocate
-            (and possibly size of data to copy)
-        stream : optional
-            CUDA stream to use for construction and/or copying,
-            defaults to the CUDA default stream. A reference to the
-            stream is stored internally to ensure it doesn't go out of
-            scope while the DeviceBuffer is in use. Destroying the
-            underlying stream while the DeviceBuffer is in use will
-            result in undefined behavior.
-        mr : optional
-           DeviceMemoryResource for the allocation, if not provided
-           defaults to the current device resource.
-
-        Note
-        ----
-        If the pointer passed is non-null and ``stream`` is the default stream,
-        it is synchronized after the copy. However if a non-default ``stream``
-        is provided, this function is fully asynchronous.
-
-        Examples
-        --------
-        >>> import rmm
-        >>> db = rmm.DeviceBuffer(size=5)
-        """
-        cdef const void* c_ptr
-        cdef device_memory_resource * mr_ptr
-        # Save a reference to the MR and stream used for allocation
-        self.mr = get_current_device_resource() if mr is None else mr
-        self.stream = stream
-
-        mr_ptr = self.mr.get_mr()
-        with nogil:
-            c_ptr = <const void*>ptr
-
-            if c_ptr == NULL or size == 0:
-                self.c_obj.reset(new device_buffer(size, stream.view(), mr_ptr))
-            else:
-                self.c_obj.reset(new device_buffer(c_ptr, size, stream.view(), mr_ptr))
-
-                if stream.c_is_default():
-                    stream.c_synchronize()
-
-    def __len__(self):
-        return self.size
-
-    def __sizeof__(self):
-        return self.size
-
-    def __bytes__(self):
-        return self.tobytes()
-
-    @property
-    def nbytes(self):
-        """Gets the size of the buffer in bytes."""
-        return self.size
-
-    @property
-    def ptr(self):
-        """Gets a pointer to the underlying data."""
-        return int(<uintptr_t>self.c_data())
-
-    @property
-    def size(self):
-        """Gets the size of the buffer in bytes."""
-        return int(self.c_size())
-
-    def __reduce__(self):
-        return to_device, (self.copy_to_host(),)
-
-    @property
-    def __cuda_array_interface__(self):
-        cdef dict intf = {
-            "data": (self.ptr, False),
-            "shape": (self.size,),
-            "strides": None,
-            "typestr": "|u1",
-            "version": 0
-        }
-        return intf
-
-    def prefetch(self, device=None, stream=None):
-        """Prefetch buffer data to the specified device on the specified stream.
-
-        Assumes the storage for this DeviceBuffer is CUDA managed memory
-        (unified memory). If it is not, this function is a no-op.
-
-        Parameters
-        ----------
-        device : optional
-            The CUDA device to which to prefetch the memory for this buffer.
-            Defaults to the current CUDA device. To prefetch to the CPU, pass
-            :py:attr:`~cuda.cudart.cudaCpuDeviceId` as the device.
-        stream : optional
-            CUDA stream to use for prefetching. Defaults to self.stream
-        """
-        cdef cuda_device_id dev = (get_current_cuda_device()
-                                   if device is None
-                                   else cuda_device_id(device))
-        cdef Stream strm = self.stream if stream is None else stream
-        with nogil:
-            prefetch(self.c_obj.get()[0].data(),
-                     self.c_obj.get()[0].size(),
-                     dev,
-                     strm.view())
-
-    def copy(self):
-        """Returns a copy of DeviceBuffer.
-
-        Returns
-        -------
-        A deep copy of existing ``DeviceBuffer``
-
-        Examples
-        --------
-        >>> import rmm
-        >>> db = rmm.DeviceBuffer.to_device(b"abc")
-        >>> db_copy = db.copy()
-        >>> db.copy_to_host()
-        array([97, 98, 99], dtype=uint8)
-        >>> db_copy.copy_to_host()
-        array([97, 98, 99], dtype=uint8)
-        >>> assert db is not db_copy
-        >>> assert db.ptr != db_copy.ptr
-        """
-        ret = DeviceBuffer(ptr=self.ptr, size=self.size, stream=self.stream)
-        ret.mr = self.mr
-        return ret
-
-    def __copy__(self):
-        return self.copy()
-
-    @staticmethod
-    cdef DeviceBuffer c_from_unique_ptr(
-        unique_ptr[device_buffer] ptr,
-        Stream stream=DEFAULT_STREAM,
-        DeviceMemoryResource mr=None,
-    ):
-        cdef DeviceBuffer buf = DeviceBuffer.__new__(DeviceBuffer)
-        if stream.c_is_default():
-            stream.c_synchronize()
-        buf.c_obj = move(ptr)
-        buf.mr = get_current_device_resource() if mr is None else mr
-        buf.stream = stream
-        return buf
-
-    @staticmethod
-    cdef DeviceBuffer c_to_device(const unsigned char[::1] b,
-                                  Stream stream=DEFAULT_STREAM) except *:
-        """Calls ``to_device`` function on arguments provided"""
-        return to_device(b, stream)
-
-    @staticmethod
-    def to_device(const unsigned char[::1] b,
-                  Stream stream=DEFAULT_STREAM):
-        """Calls ``to_device`` function on arguments provided."""
-        return to_device(b, stream)
-
-    cpdef copy_to_host(self, ary=None, Stream stream=DEFAULT_STREAM):
-        """Copy from a ``DeviceBuffer`` to a buffer on host.
-
-        Parameters
-        ----------
-        ary : ``bytes``-like buffer to write into
-        stream : CUDA stream to use for copying, default the default stream
-
-        Examples
-        --------
-        >>> import rmm
-        >>> db = rmm.DeviceBuffer.to_device(b"abc")
-        >>> hb = bytearray(db.nbytes)
-        >>> db.copy_to_host(hb)
-        >>> print(hb)
-        bytearray(b'abc')
-        >>> hb = db.copy_to_host()
-        >>> print(hb)
-        bytearray(b'abc')
-        """
-        cdef const device_buffer* dbp = self.c_obj.get()
-        cdef size_t s = dbp.size()
-
-        cdef unsigned char[::1] hb = ary
-        if hb is None:
-            # NumPy leverages huge pages under-the-hood,
-            # which speeds up the copy from device to host.
-            hb = ary = np.empty((s,), dtype="u1")
-        elif len(hb) < s:
-            raise ValueError(
-                "Argument `ary` is too small. Need space for %i bytes." % s
-            )
-
-        copy_ptr_to_host(<uintptr_t>dbp.data(), hb[:s], stream)
-
-        return ary
-
-    cpdef copy_from_host(self, ary, Stream stream=DEFAULT_STREAM):
-        """Copy from a buffer on host to ``self``
-
-        Parameters
-        ----------
-        ary : ``bytes``-like buffer to copy from
-        stream : CUDA stream to use for copying, default the default stream
-
-        Examples
-        --------
-        >>> import rmm
-        >>> db = rmm.DeviceBuffer(size=10)
-        >>> hb = b"abcdef"
-        >>> db.copy_from_host(hb)
-        >>> hb = db.copy_to_host()
-        >>> print(hb)
-        array([97, 98, 99,  0,  0,  0,  0,  0,  0,  0], dtype=uint8)
-        """
-        cdef device_buffer* dbp = self.c_obj.get()
-
-        cdef const unsigned char[::1] hb = ary
-        cdef size_t s = len(hb)
-        if s > self.size:
-            raise ValueError(
-                "Argument `ary` is too large. Need space for %i bytes." % s
-            )
-
-        copy_host_to_ptr(hb[:s], <uintptr_t>dbp.data(), stream)
-
-    cpdef copy_from_device(self, cuda_ary,
-                           Stream stream=DEFAULT_STREAM):
-        """Copy from a buffer on host to ``self``
-
-        Parameters
-        ----------
-        cuda_ary : object to copy from that has ``__cuda_array_interface__``
-        stream : CUDA stream to use for copying, default the default stream
-
-        Examples
-        --------
-        >>> import rmm
-        >>> db = rmm.DeviceBuffer(size=5)
-        >>> db2 = rmm.DeviceBuffer.to_device(b"abc")
-        >>> db.copy_from_device(db2)
-        >>> hb = db.copy_to_host()
-        >>> print(hb)
-        array([97, 98, 99,  0,  0], dtype=uint8)
-        """
-        if not hasattr(cuda_ary, "__cuda_array_interface__"):
-            raise ValueError(
-                "Expected object to support `__cuda_array_interface__` "
-                "protocol"
-            )
-
-        cuda_ary_interface = cuda_ary.__cuda_array_interface__
-        shape = cuda_ary_interface["shape"]
-        strides = cuda_ary_interface.get("strides")
-        dtype = np.dtype(cuda_ary_interface["typestr"])
-
-        if len(shape) > 1:
-            raise ValueError(
-                "Only 1-D contiguous arrays are supported, got {}-D "
-                "array".format(str(len(shape)))
-            )
-
-        if strides is not None:
-            if strides[0] != dtype.itemsize:
-                raise ValueError(
-                    "Only 1-D contiguous arrays are supported, got a "
-                    "non-contiguous array"
-                )
-
-        cdef uintptr_t src_ptr = cuda_ary_interface["data"][0]
-        cdef size_t s = shape[0] * dtype.itemsize
-        if s > self.size:
-            raise ValueError(
-                "Argument `hb` is too large. Need space for %i bytes." % s
-            )
-
-        cdef device_buffer* dbp = self.c_obj.get()
-
-        copy_device_to_ptr(
-            <uintptr_t>src_ptr,
-            <uintptr_t>dbp.data(),
-            s,
-            stream
-        )
-
-    cpdef bytes tobytes(self, Stream stream=DEFAULT_STREAM):
-        cdef const device_buffer* dbp = self.c_obj.get()
-        cdef size_t s = dbp.size()
-
-        cdef bytes b = PyBytes_FromStringAndSize(NULL, s)
-        cdef unsigned char* p = b
-        cdef unsigned char[::1] mv = (<unsigned char[:(s + 1):1]>p)[:s]
-        self.copy_to_host(mv, stream)
-
-        return b
-
-    cdef size_t c_size(self) except *:
-        return self.c_obj.get()[0].size()
-
-    cpdef void reserve(self,
-                       size_t new_capacity,
-                       Stream stream=DEFAULT_STREAM) except *:
-        self.c_obj.get()[0].reserve(new_capacity, stream.view())
-
-    cpdef void resize(self,
-                      size_t new_size,
-                      Stream stream=DEFAULT_STREAM) except *:
-        self.c_obj.get()[0].resize(new_size, stream.view())
-
-    cpdef size_t capacity(self) except *:
-        return self.c_obj.get()[0].capacity()
-
-    cdef void* c_data(self) except *:
-        return self.c_obj.get()[0].data()
-
-    cdef device_buffer c_release(self) except *:
-        """
-        Releases ownership of the data held by this DeviceBuffer.
-        """
-        return move(cython.operator.dereference(self.c_obj))
-
-
-@cython.boundscheck(False)
-cpdef DeviceBuffer to_device(const unsigned char[::1] b,
-                             Stream stream=DEFAULT_STREAM):
-    """Return a new ``DeviceBuffer`` with a copy of the data.
-
-    Parameters
-    ----------
-    b : ``bytes``-like data on host to copy to device
-    stream : CUDA stream to use for copying, default the default stream
-
-    Returns
-    -------
-    ``DeviceBuffer`` with copy of data from host
-
-    Examples
-    --------
-    >>> import rmm
-    >>> db = rmm._lib.device_buffer.to_device(b"abc")
-    >>> print(bytes(db))
-    b'abc'
-    """
-
-    if b is None:
-        raise TypeError(
-            "Argument 'b' has incorrect type"
-            " (expected bytes-like, got NoneType)"
-        )
-
-    cdef uintptr_t p = <uintptr_t>&b[0]
-    cdef size_t s = len(b)
-    return DeviceBuffer(ptr=p, size=s, stream=stream)
-
-
-@cython.boundscheck(False)
-cdef void _copy_async(const void* src,
-                      void* dst,
-                      size_t count,
-                      ccudart.cudaMemcpyKind kind,
-                      cuda_stream_view stream) except * nogil:
-    """
-    Asynchronously copy data between host and/or device pointers.
-
-    This is a convenience wrapper around cudaMemcpyAsync that
-    checks for errors. Only used for internal implementation.
-
-    Parameters
-    ----------
-    src : pointer to ``bytes``-like host buffer or device data to copy from
-    dst : pointer to ``bytes``-like host buffer or device data to copy into
-    count : the size in bytes to copy
-    kind : the kind of copy to perform
-    stream : CUDA stream to use for copying, default the default stream
-    """
-    cdef cudaError_t err = cudaMemcpyAsync(dst, src, count, kind,
-                                           <cudaStream_t>stream)
-
-    if err != cudaError.cudaSuccess:
-        raise RuntimeError(f"Memcpy failed with error: {err}")
-
-
-@cython.boundscheck(False)
-cpdef void copy_ptr_to_host(uintptr_t db,
-                            unsigned char[::1] hb,
-                            Stream stream=DEFAULT_STREAM) except *:
-    """Copy from a device pointer to a buffer on host
-
-    Parameters
-    ----------
-    db : pointer to data on device to copy
-    hb : ``bytes``-like buffer to write into
-    stream : CUDA stream to use for copying, default the default stream
-
-    Note
-    ----
-    If ``stream`` is the default stream, it is synchronized after the copy.
-    However if a non-default ``stream`` is provided, this function is fully
-    asynchronous.
-
-    Examples
-    --------
-    >>> import rmm
-    >>> db = rmm.DeviceBuffer.to_device(b"abc")
-    >>> hb = bytearray(db.nbytes)
-    >>> rmm._lib.device_buffer.copy_ptr_to_host(db.ptr, hb)
-    >>> print(hb)
-    bytearray(b'abc')
-    """
-
-    if hb is None:
-        raise TypeError(
-            "Argument `hb` has incorrect type"
-            " (expected bytes-like, got NoneType)"
-        )
-
-    with nogil:
-        _copy_async(<const void*>db, <void*>&hb[0], len(hb),
-                    cudaMemcpyKind.cudaMemcpyDeviceToHost, stream.view())
-
-    if stream.c_is_default():
-        stream.c_synchronize()
-
-
-@cython.boundscheck(False)
-cpdef void copy_host_to_ptr(const unsigned char[::1] hb,
-                            uintptr_t db,
-                            Stream stream=DEFAULT_STREAM) except *:
-    """Copy from a host pointer to a device pointer
-
-    Parameters
-    ----------
-    hb : ``bytes``-like host buffer to copy
-    db : pointer to data on device to write into
-    stream : CUDA stream to use for copying, default the default stream
-
-    Note
-    ----
-    If ``stream`` is the default stream, it is synchronized after the copy.
-    However if a non-default ``stream`` is provided, this function is fully
-    asynchronous.
-
-    Examples
-    --------
-    >>> import rmm
-    >>> db = rmm.DeviceBuffer(size=10)
-    >>> hb = b"abc"
-    >>> rmm._lib.device_buffer.copy_host_to_ptr(hb, db.ptr)
-    >>> hb = db.copy_to_host()
-    >>> print(hb)
-    array([97, 98, 99,  0,  0,  0,  0,  0,  0,  0], dtype=uint8)
-    """
-
-    if hb is None:
-        raise TypeError(
-            "Argument `hb` has incorrect type"
-            " (expected bytes-like, got NoneType)"
-        )
-
-    with nogil:
-        _copy_async(<const void*>&hb[0], <void*>db, len(hb),
-                    cudaMemcpyKind.cudaMemcpyHostToDevice, stream.view())
-
-    if stream.c_is_default():
-        stream.c_synchronize()
-
-
-@cython.boundscheck(False)
-cpdef void copy_device_to_ptr(uintptr_t d_src,
-                              uintptr_t d_dst,
-                              size_t count,
-                              Stream stream=DEFAULT_STREAM) except *:
-    """Copy from a device pointer to a device pointer
-
-    Parameters
-    ----------
-    d_src : pointer to data on device to copy from
-    d_dst : pointer to data on device to write into
-    count : the size in bytes to copy
-    stream : CUDA stream to use for copying, default the default stream
-
-    Examples
-    --------
-    >>> import rmm
-    >>> db = rmm.DeviceBuffer(size=5)
-    >>> db2 = rmm.DeviceBuffer.to_device(b"abc")
-    >>> rmm._lib.device_buffer.copy_device_to_ptr(db2.ptr, db.ptr, db2.size)
-    >>> hb = db.copy_to_host()
-    >>> hb
-    array([97, 98, 99,  0,  0], dtype=uint8)
-    """
-
-    with nogil:
-        _copy_async(<const void*>d_src, <void*>d_dst, count,
-                    cudaMemcpyKind.cudaMemcpyDeviceToDevice, stream.view())
diff --git a/python/rmm/rmm/_lib/device_uvector.pxd b/python/rmm/rmm/_lib/device_uvector.pxd
deleted file mode 100644
index 29e122bbf..000000000
--- a/python/rmm/rmm/_lib/device_uvector.pxd
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from rmm._lib.cuda_stream_view cimport cuda_stream_view
-from rmm._lib.device_buffer cimport device_buffer
-from rmm._lib.memory_resource cimport device_memory_resource
-
-
-cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil:
-    cdef cppclass device_uvector[T]:
-        device_uvector(size_t size, cuda_stream_view  stream) except +
-        T* element_ptr(size_t index)
-        void set_element(size_t element_index, const T& v, cuda_stream_view s)
-        void set_element_async(
-            size_t element_index,
-            const T& v,
-            cuda_stream_view s
-        ) except +
-        T front_element(cuda_stream_view s) except +
-        T back_element(cuda_stream_view s) except +
-        void reserve(size_t new_capacity, cuda_stream_view stream) except +
-        void resize(size_t new_size, cuda_stream_view stream) except +
-        void shrink_to_fit(cuda_stream_view stream) except +
-        device_buffer release()
-        size_t capacity()
-        T* data()
-        size_t size()
-        device_memory_resource* memory_resource()
diff --git a/python/rmm/rmm/_lib/helper.pxd b/python/rmm/rmm/_lib/helper.pxd
deleted file mode 100644
index 8ca151c00..000000000
--- a/python/rmm/rmm/_lib/helper.pxd
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-cdef object parse_bytes(object s) except *
diff --git a/python/rmm/rmm/_lib/helper.pyx b/python/rmm/rmm/_lib/helper.pyx
deleted file mode 100644
index d442ee341..000000000
--- a/python/rmm/rmm/_lib/helper.pyx
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Helper functions for rmm"""
-
-import re
-
-
-cdef dict BYTE_SIZES = {
-    'b': 1,
-    '': 1,
-    'kb': 1000,
-    'mb': 1000**2,
-    'gb': 1000**3,
-    'tb': 1000**4,
-    'pb': 1000**5,
-    'kib': 1024,
-    'mib': 1024**2,
-    'gib': 1024**3,
-    'tib': 1024**4,
-    'pib': 1024**5,
-}
-
-
-pattern = re.compile(r"^([0-9]+(?:\.[0-9]*)?)[\t ]*((?i:(?:[kmgtp]i?)?b))?$")
-
-cdef object parse_bytes(object s):
-    """Parse a string or integer into a number of bytes.
-
-    Parameters
-    ----------
-    s : int | str
-        Size in bytes. If an integer is provided, it is returned as-is.
-        A string is parsed as a floating point number with an (optional,
-        case-insensitive) byte-specifier, both SI prefixes (kb, mb, ..., pb)
-        and binary prefixes (kib, mib, ..., pib) are supported.
-
-     Returns
-     -------
-     Requested size in bytes as an integer.
-
-     Raises
-     ------
-     ValueError
-         If it is not possible to parse the input as a byte specification.
-    """
-    cdef str suffix
-    cdef double n
-    cdef int multiplier
-
-    if isinstance(s, int):
-        return s
-
-    match = pattern.match(s)
-
-    if match is None:
-        raise ValueError(f"Could not parse {s} as a byte specification")
-
-    n = float(match.group(1))
-
-    suffix = match.group(2)
-    if suffix is None:
-        suffix = ""
-
-    multiplier = BYTE_SIZES[suffix.lower()]
-
-    return int(n*multiplier)
diff --git a/python/rmm/rmm/_lib/lib.pxd b/python/rmm/rmm/_lib/lib.pxd
deleted file mode 100644
index e35b672e4..000000000
--- a/python/rmm/rmm/_lib/lib.pxd
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from libc.stdint cimport uintptr_t
-from libcpp cimport bool
-from libcpp.utility cimport pair
-from libcpp.vector cimport vector
-
-ctypedef pair[const char*, unsigned int] caller_pair
diff --git a/python/rmm/rmm/_lib/lib.pyx b/python/rmm/rmm/_lib/lib.pyx
deleted file mode 100644
index 46753baa3..000000000
--- a/python/rmm/rmm/_lib/lib.pyx
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/python/rmm/rmm/_lib/logger.pyx b/python/rmm/rmm/_lib/logger.pyx
deleted file mode 100644
index 029bbdd79..000000000
--- a/python/rmm/rmm/_lib/logger.pyx
+++ /dev/null
@@ -1,260 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import warnings
-
-from libcpp cimport bool
-
-
-cdef extern from "spdlog/common.h" namespace "spdlog::level" nogil:
-    cpdef enum logging_level "spdlog::level::level_enum":
-        """
-        The debug logging level for RMM.
-
-        Debug logging prints messages to a log file. See
-        `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
-        for more information.
-
-        Valid levels, in decreasing order of verbosity, are TRACE, DEBUG,
-        INFO, WARN, ERR, CRITICAL, and OFF. Default is INFO.
-
-        Examples
-        --------
-        >>> import rmm
-        >>> rmm.logging_level.DEBUG
-        <logging_level.DEBUG: 1>
-        >>> rmm.logging_level.DEBUG.value
-        1
-        >>> rmm.logging_level.DEBUG.name
-        'DEBUG'
-
-        See Also
-        --------
-        set_logging_level : Set the debug logging level
-        get_logging_level : Get the current debug logging level
-        """
-        TRACE "spdlog::level::trace"
-        DEBUG "spdlog::level::debug"
-        INFO "spdlog::level::info"
-        WARN "spdlog::level::warn"
-        ERR "spdlog::level::err"
-        CRITICAL "spdlog::level::critical"
-        OFF "spdlog::level::off"
-
-
-cdef extern from "spdlog/spdlog.h" namespace "spdlog" nogil:
-    cdef cppclass spdlog_logger "spdlog::logger":
-        spdlog_logger() except +
-        void set_level(logging_level level)
-        logging_level level()
-        void flush() except +
-        void flush_on(logging_level level)
-        logging_level flush_level()
-        bool should_log(logging_level msg_level)
-
-
-cdef extern from "rmm/logger.hpp" namespace "rmm" nogil:
-    cdef spdlog_logger& logger() except +
-
-
-def _validate_level_type(level):
-    if not isinstance(level, logging_level):
-        raise TypeError("level must be an instance of the logging_level enum")
-
-
-def should_log(level):
-    """
-    Check if a message at the given level would be logged.
-
-    A message at the given level would be logged if the current debug logging
-    level is set to a level that is at least as verbose than the given level,
-    *and* the RMM module is compiled for a logging level at least as verbose.
-    If these conditions are not both met, this function will return false.
-
-    Debug logging prints messages to a log file. See
-    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
-    for more information.
-
-    Parameters
-    ----------
-    level : logging_level
-        The debug logging level. Valid values are instances of the
-        ``logging_level`` enum.
-
-    Returns
-    -------
-    should_log : bool
-        True if a message at the given level would be logged, False otherwise.
-
-    Raises
-    ------
-    TypeError
-        If the logging level is not an instance of the ``logging_level`` enum.
-    """
-    _validate_level_type(level)
-    return logger().should_log(level)
-
-
-def set_logging_level(level):
-    """
-    Set the debug logging level.
-
-    Debug logging prints messages to a log file. See
-    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
-    for more information.
-
-    Parameters
-    ----------
-    level : logging_level
-        The debug logging level. Valid values are instances of the
-        ``logging_level`` enum.
-
-    Raises
-    ------
-    TypeError
-        If the logging level is not an instance of the ``logging_level`` enum.
-
-    See Also
-    --------
-    get_logging_level : Get the current debug logging level.
-
-    Examples
-    --------
-    >>> import rmm
-    >>> rmm.set_logging_level(rmm.logging_level.WARN) # set logging level to warn
-    """
-    _validate_level_type(level)
-    logger().set_level(level)
-
-    if not should_log(level):
-        warnings.warn(f"RMM will not log logging_level.{level.name}. This "
-                      "may be because the C++ library is compiled for a "
-                      "less-verbose logging level.")
-
-
-def get_logging_level():
-    """
-    Get the current debug logging level.
-
-    Debug logging prints messages to a log file. See
-    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
-    for more information.
-
-    Returns
-    -------
-    level : logging_level
-        The current debug logging level, an instance of the ``logging_level``
-        enum.
-
-    See Also
-    --------
-    set_logging_level : Set the debug logging level.
-
-    Examples
-    --------
-    >>> import rmm
-    >>> rmm.get_logging_level() # get current logging level
-    <logging_level.INFO: 2>
-    """
-    return logging_level(logger().level())
-
-
-def flush_logger():
-    """
-    Flush the debug logger. This will cause any buffered log messages to
-    be written to the log file.
-
-    Debug logging prints messages to a log file. See
-    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
-    for more information.
-
-    See Also
-    --------
-    set_flush_level : Set the flush level for the debug logger.
-    get_flush_level : Get the current debug logging flush level.
-
-    Examples
-    --------
-    >>> import rmm
-    >>> rmm.flush_logger() # flush the logger
-    """
-    logger().flush()
-
-
-def set_flush_level(level):
-    """
-    Set the flush level for the debug logger. Messages of this level or higher
-    will automatically flush to the file.
-
-    Debug logging prints messages to a log file. See
-    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
-    for more information.
-
-    Parameters
-    ----------
-    level : logging_level
-        The debug logging level. Valid values are instances of the
-        ``logging_level`` enum.
-
-    Raises
-    ------
-    TypeError
-        If the logging level is not an instance of the ``logging_level`` enum.
-
-    See Also
-    --------
-    get_flush_level : Get the current debug logging flush level.
-    flush_logger : Flush the logger.
-
-    Examples
-    --------
-    >>> import rmm
-    >>> rmm.flush_on(rmm.logging_level.WARN) # set flush level to warn
-    """
-    _validate_level_type(level)
-    logger().flush_on(level)
-
-    if not should_log(level):
-        warnings.warn(f"RMM will not log logging_level.{level.name}. This "
-                      "may be because the C++ library is compiled for a "
-                      "less-verbose logging level.")
-
-
-def get_flush_level():
-    """
-    Get the current debug logging flush level for the RMM logger. Messages of
-    this level or higher will automatically flush to the file.
-
-    Debug logging prints messages to a log file. See
-    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
-    for more information.
-
-    Returns
-    -------
-    logging_level
-        The current flush level, an instance of the ``logging_level``
-        enum.
-
-    See Also
-    --------
-    set_flush_level : Set the flush level for the logger.
-    flush_logger : Flush the logger.
-
-    Examples
-    --------
-    >>> import rmm
-    >>> rmm.flush_level() # get current flush level
-    <logging_level.INFO: 2>
-    """
-    return logging_level(logger().flush_level())
diff --git a/python/rmm/rmm/_lib/memory_resource.pxd b/python/rmm/rmm/_lib/memory_resource.pxd
deleted file mode 100644
index 000a3fe1e..000000000
--- a/python/rmm/rmm/_lib/memory_resource.pxd
+++ /dev/null
@@ -1,103 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from libc.stdint cimport int8_t
-from libcpp.memory cimport shared_ptr
-from libcpp.pair cimport pair
-from libcpp.string cimport string
-from libcpp.vector cimport vector
-
-from rmm._lib.cuda_stream_view cimport cuda_stream_view
-
-
-cdef extern from "rmm/mr/device/device_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass device_memory_resource:
-        void* allocate(size_t bytes) except +
-        void* allocate(size_t bytes, cuda_stream_view stream) except +
-        void deallocate(void* ptr, size_t bytes) except +
-        void deallocate(
-            void* ptr,
-            size_t bytes,
-            cuda_stream_view stream
-        ) except +
-
-cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil:
-    size_t percent_of_free_device_memory(int percent) except +
-    pair[size_t, size_t] available_device_memory() except +
-
-cdef class DeviceMemoryResource:
-    cdef shared_ptr[device_memory_resource] c_obj
-    cdef device_memory_resource* get_mr(self) noexcept nogil
-
-cdef class UpstreamResourceAdaptor(DeviceMemoryResource):
-    cdef readonly DeviceMemoryResource upstream_mr
-
-    cpdef DeviceMemoryResource get_upstream(self)
-
-cdef class CudaMemoryResource(DeviceMemoryResource):
-    pass
-
-cdef class ManagedMemoryResource(DeviceMemoryResource):
-    pass
-
-cdef class SystemMemoryResource(DeviceMemoryResource):
-    pass
-
-cdef class SamHeadroomMemoryResource(DeviceMemoryResource):
-    pass
-
-cdef class CudaAsyncMemoryResource(DeviceMemoryResource):
-    pass
-
-cdef class PoolMemoryResource(UpstreamResourceAdaptor):
-    pass
-
-cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor):
-    pass
-
-cdef class BinningMemoryResource(UpstreamResourceAdaptor):
-
-    cdef readonly list _bin_mrs
-
-    cpdef add_bin(
-        self,
-        size_t allocation_size,
-        DeviceMemoryResource bin_resource=*)
-
-cdef class CallbackMemoryResource(DeviceMemoryResource):
-    cdef object _allocate_func
-    cdef object _deallocate_func
-
-cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor):
-    pass
-
-cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor):
-    cdef object _log_file_name
-    cpdef get_file_name(self)
-    cpdef flush(self)
-
-cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor):
-    pass
-
-cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor):
-    pass
-
-cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor):
-    cdef object _callback
-
-cdef class PrefetchResourceAdaptor(UpstreamResourceAdaptor):
-    pass
-
-cpdef DeviceMemoryResource get_current_device_resource()
diff --git a/python/rmm/rmm/_lib/memory_resource.pyx b/python/rmm/rmm/_lib/memory_resource.pyx
deleted file mode 100644
index 231253e3f..000000000
--- a/python/rmm/rmm/_lib/memory_resource.pyx
+++ /dev/null
@@ -1,1332 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import warnings
-# This import is needed for Cython typing in translate_python_except_to_cpp
-# See https://github.com/cython/cython/issues/5589
-from builtins import BaseException
-from collections import defaultdict
-
-cimport cython
-from cython.operator cimport dereference as deref
-from libc.stddef cimport size_t
-from libc.stdint cimport int8_t, int64_t, uintptr_t
-from libcpp cimport bool
-from libcpp.memory cimport make_unique, unique_ptr
-from libcpp.optional cimport optional
-from libcpp.pair cimport pair
-from libcpp.string cimport string
-
-from cuda.cudart import cudaError_t
-
-from rmm._cuda.gpu import CUDARuntimeError, getDevice, setDevice
-
-from rmm._cuda.stream cimport Stream
-
-from rmm._cuda.stream import DEFAULT_STREAM
-
-from rmm._lib.cuda_stream_view cimport cuda_stream_view
-from rmm._lib.helper cimport parse_bytes
-from rmm._lib.memory_resource cimport (
-    available_device_memory as c_available_device_memory,
-    percent_of_free_device_memory as c_percent_of_free_device_memory,
-)
-from rmm._lib.per_device_resource cimport (
-    cuda_device_id,
-    set_per_device_resource as cpp_set_per_device_resource,
-)
-
-from rmm.statistics import Statistics
-
-# Transparent handle of a C++ exception
-ctypedef pair[int, string] CppExcept
-
-cdef CppExcept translate_python_except_to_cpp(err: BaseException) noexcept:
-    """Translate a Python exception into a C++ exception handle
-
-    The returned exception handle can then be thrown by `throw_cpp_except()`,
-    which MUST be done without holding the GIL.
-
-    This is useful when C++ calls a Python function and needs to catch or
-    propagate exceptions.
-    """
-    if isinstance(err, MemoryError):
-        return CppExcept(0, str.encode(str(err)))
-    return CppExcept(-1, str.encode(str(err)))
-
-# Implementation of `throw_cpp_except()`, which throws a given `CppExcept`.
-# This function MUST be called without the GIL otherwise the thrown C++
-# exception are translated back into a Python exception.
-cdef extern from *:
-    """
-    #include <stdexcept>
-    #include <utility>
-
-    void throw_cpp_except(std::pair<int, std::string> res) {
-        switch(res.first) {
-            case 0:
-                throw rmm::out_of_memory(res.second);
-            default:
-                throw std::runtime_error(res.second);
-        }
-    }
-    """
-    void throw_cpp_except(CppExcept) nogil
-
-
-# NOTE: Keep extern declarations in .pyx file as much as possible to avoid
-# leaking dependencies when importing RMM Cython .pxd files
-cdef extern from "rmm/mr/device/cuda_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass cuda_memory_resource(device_memory_resource):
-        cuda_memory_resource() except +
-
-cdef extern from "rmm/mr/device/managed_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass managed_memory_resource(device_memory_resource):
-        managed_memory_resource() except +
-
-cdef extern from "rmm/mr/device/system_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass system_memory_resource(device_memory_resource):
-        system_memory_resource() except +
-
-cdef extern from "rmm/mr/device/sam_headroom_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass sam_headroom_memory_resource(device_memory_resource):
-        sam_headroom_memory_resource(size_t headroom) except +
-
-cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-
-    cdef cppclass cuda_async_memory_resource(device_memory_resource):
-        cuda_async_memory_resource(
-            optional[size_t] initial_pool_size,
-            optional[size_t] release_threshold,
-            optional[allocation_handle_type] export_handle_type) except +
-
-# TODO: when we adopt Cython 3.0 use enum class
-cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \
-        namespace \
-        "rmm::mr::cuda_async_memory_resource::allocation_handle_type" \
-        nogil:
-    enum allocation_handle_type \
-            "rmm::mr::cuda_async_memory_resource::allocation_handle_type":
-        none
-        posix_file_descriptor
-        win32
-        win32_kmt
-
-
-cdef extern from "rmm/mr/device/pool_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass pool_memory_resource[Upstream](device_memory_resource):
-        pool_memory_resource(
-            Upstream* upstream_mr,
-            size_t initial_pool_size,
-            optional[size_t] maximum_pool_size) except +
-        size_t pool_size()
-
-cdef extern from "rmm/mr/device/fixed_size_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass fixed_size_memory_resource[Upstream](device_memory_resource):
-        fixed_size_memory_resource(
-            Upstream* upstream_mr,
-            size_t block_size,
-            size_t block_to_preallocate) except +
-
-cdef extern from "rmm/mr/device/callback_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    ctypedef void* (*allocate_callback_t)(size_t, cuda_stream_view, void*)
-    ctypedef void (*deallocate_callback_t)(void*, size_t, cuda_stream_view, void*)
-
-    cdef cppclass callback_memory_resource(device_memory_resource):
-        callback_memory_resource(
-            allocate_callback_t allocate_callback,
-            deallocate_callback_t deallocate_callback,
-            void* allocate_callback_arg,
-            void* deallocate_callback_arg
-        ) except +
-
-cdef extern from "rmm/mr/device/binning_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass binning_memory_resource[Upstream](device_memory_resource):
-        binning_memory_resource(Upstream* upstream_mr) except +
-        binning_memory_resource(
-            Upstream* upstream_mr,
-            int8_t min_size_exponent,
-            int8_t max_size_exponent) except +
-
-        void add_bin(size_t allocation_size) except +
-        void add_bin(
-            size_t allocation_size,
-            device_memory_resource* bin_resource) except +
-
-cdef extern from "rmm/mr/device/limiting_resource_adaptor.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass limiting_resource_adaptor[Upstream](device_memory_resource):
-        limiting_resource_adaptor(
-            Upstream* upstream_mr,
-            size_t allocation_limit) except +
-
-        size_t get_allocated_bytes() except +
-        size_t get_allocation_limit() except +
-
-cdef extern from "rmm/mr/device/logging_resource_adaptor.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass logging_resource_adaptor[Upstream](device_memory_resource):
-        logging_resource_adaptor(
-            Upstream* upstream_mr,
-            string filename) except +
-
-        void flush() except +
-
-cdef extern from "rmm/mr/device/statistics_resource_adaptor.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass statistics_resource_adaptor[Upstream](device_memory_resource):
-        struct counter:
-            counter()
-
-            int64_t value
-            int64_t peak
-            int64_t total
-
-        statistics_resource_adaptor(Upstream* upstream_mr) except +
-
-        counter get_bytes_counter() except +
-        counter get_allocations_counter() except +
-        pair[counter, counter] pop_counters() except +
-        pair[counter, counter] push_counters() except +
-
-cdef extern from "rmm/mr/device/tracking_resource_adaptor.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass tracking_resource_adaptor[Upstream](device_memory_resource):
-        tracking_resource_adaptor(
-            Upstream* upstream_mr,
-            bool capture_stacks) except +
-
-        size_t get_allocated_bytes() except +
-        string get_outstanding_allocations_str() except +
-        void log_outstanding_allocations() except +
-
-cdef extern from "rmm/mr/device/failure_callback_resource_adaptor.hpp" \
-        namespace "rmm::mr" nogil:
-    ctypedef bool (*failure_callback_t)(size_t, void*)
-    cdef cppclass failure_callback_resource_adaptor[Upstream](
-        device_memory_resource
-    ):
-        failure_callback_resource_adaptor(
-            Upstream* upstream_mr,
-            failure_callback_t callback,
-            void* callback_arg
-        ) except +
-
-cdef extern from "rmm/mr/device/prefetch_resource_adaptor.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass prefetch_resource_adaptor[Upstream](device_memory_resource):
-        prefetch_resource_adaptor(Upstream* upstream_mr) except +
-
-
-cdef class DeviceMemoryResource:
-
-    cdef device_memory_resource* get_mr(self) noexcept nogil:
-        """Get the underlying C++ memory resource object."""
-        return self.c_obj.get()
-
-    def allocate(self, size_t nbytes, Stream stream=DEFAULT_STREAM):
-        """Allocate ``nbytes`` bytes of memory.
-
-        Parameters
-        ----------
-        nbytes : size_t
-            The size of the allocation in bytes
-        stream : Stream
-            Optional stream for the allocation
-        """
-        return <uintptr_t>self.c_obj.get().allocate(nbytes, stream.view())
-
-    def deallocate(self, uintptr_t ptr, size_t nbytes, Stream stream=DEFAULT_STREAM):
-        """Deallocate memory pointed to by ``ptr`` of size ``nbytes``.
-
-        Parameters
-        ----------
-        ptr : uintptr_t
-            Pointer to be deallocated
-        nbytes : size_t
-            Size of the allocation in bytes
-        stream : Stream
-            Optional stream for the deallocation
-        """
-        self.c_obj.get().deallocate(<void*>(ptr), nbytes, stream.view())
-
-
-# See the note about `no_gc_clear` in `device_buffer.pyx`.
-@cython.no_gc_clear
-cdef class UpstreamResourceAdaptor(DeviceMemoryResource):
-    """Parent class for all memory resources that track an upstream.
-
-    Upstream resource tracking requires maintaining a reference to the upstream
-    mr so that it is kept alive and may be accessed by any downstream resource
-    adaptors.
-    """
-
-    def __cinit__(self, DeviceMemoryResource upstream_mr, *args, **kwargs):
-
-        if (upstream_mr is None):
-            raise Exception("Argument `upstream_mr` must not be None")
-
-        self.upstream_mr = upstream_mr
-
-    def __dealloc__(self):
-        # Must cleanup the base MR before any upstream MR
-        self.c_obj.reset()
-
-    cpdef DeviceMemoryResource get_upstream(self):
-        return self.upstream_mr
-
-
-cdef class CudaMemoryResource(DeviceMemoryResource):
-    def __cinit__(self):
-        self.c_obj.reset(
-            new cuda_memory_resource()
-        )
-
-    def __init__(self):
-        """
-        Memory resource that uses ``cudaMalloc``/``cudaFree`` for
-        allocation/deallocation.
-        """
-        pass
-
-
-cdef class CudaAsyncMemoryResource(DeviceMemoryResource):
-    """
-    Memory resource that uses ``cudaMallocAsync``/``cudaFreeAsync`` for
-    allocation/deallocation.
-
-    Parameters
-    ----------
-    initial_pool_size : int | str, optional
-        Initial pool size in bytes. By default, half the available memory
-        on the device is used. A string argument is parsed using `parse_bytes`.
-    release_threshold: int, optional
-        Release threshold in bytes. If the pool size grows beyond this
-        value, unused memory held by the pool will be released at the
-        next synchronization point.
-    enable_ipc: bool, optional
-        If True, enables export of POSIX file descriptor handles for the memory
-        allocated by this resource so that it can be used with CUDA IPC.
-    """
-    def __cinit__(
-        self,
-        initial_pool_size=None,
-        release_threshold=None,
-        enable_ipc=False
-    ):
-        cdef optional[size_t] c_initial_pool_size = (
-            optional[size_t]()
-            if initial_pool_size is None
-            else optional[size_t](<size_t> parse_bytes(initial_pool_size))
-        )
-
-        cdef optional[size_t] c_release_threshold = (
-            optional[size_t]()
-            if release_threshold is None
-            else optional[size_t](<size_t> release_threshold)
-        )
-
-        # If IPC memory handles are not supported, the constructor below will
-        # raise an error from C++.
-        cdef optional[allocation_handle_type] c_export_handle_type = (
-            optional[allocation_handle_type](
-                posix_file_descriptor
-            )
-            if enable_ipc
-            else optional[allocation_handle_type]()
-        )
-
-        self.c_obj.reset(
-            new cuda_async_memory_resource(
-                c_initial_pool_size,
-                c_release_threshold,
-                c_export_handle_type
-            )
-        )
-
-
-cdef class ManagedMemoryResource(DeviceMemoryResource):
-    def __cinit__(self):
-        self.c_obj.reset(
-            new managed_memory_resource()
-        )
-
-    def __init__(self):
-        """
-        Memory resource that uses ``cudaMallocManaged``/``cudaFree`` for
-        allocation/deallocation.
-        """
-        pass
-
-
-cdef class SystemMemoryResource(DeviceMemoryResource):
-    def __cinit__(self):
-        self.c_obj.reset(
-            new system_memory_resource()
-        )
-
-    def __init__(self):
-        """
-        Memory resource that uses ``malloc``/``free`` for
-        allocation/deallocation.
-        """
-        pass
-
-
-cdef class SamHeadroomMemoryResource(DeviceMemoryResource):
-    def __cinit__(
-        self,
-        size_t headroom
-    ):
-        self.c_obj.reset(new sam_headroom_memory_resource(headroom))
-
-    def __init__(
-        self,
-        size_t headroom
-    ):
-        """
-        Memory resource that uses ``malloc``/``free`` for
-        allocation/deallocation.
-
-        Parameters
-        ----------
-        headroom : size_t
-            Size of the reserved GPU memory as headroom
-        """
-        pass
-
-
-cdef class PoolMemoryResource(UpstreamResourceAdaptor):
-
-    def __cinit__(
-            self,
-            DeviceMemoryResource upstream_mr,
-            initial_pool_size=None,
-            maximum_pool_size=None
-    ):
-        cdef size_t c_initial_pool_size
-        cdef optional[size_t] c_maximum_pool_size
-        c_initial_pool_size = (
-            c_percent_of_free_device_memory(50) if
-            initial_pool_size is None
-            else parse_bytes(initial_pool_size)
-        )
-        c_maximum_pool_size = (
-            optional[size_t]() if
-            maximum_pool_size is None
-            else optional[size_t](<size_t> parse_bytes(maximum_pool_size))
-        )
-        self.c_obj.reset(
-            new pool_memory_resource[device_memory_resource](
-                upstream_mr.get_mr(),
-                c_initial_pool_size,
-                c_maximum_pool_size
-            )
-        )
-
-    def __init__(
-            self,
-            DeviceMemoryResource upstream_mr,
-            object initial_pool_size=None,
-            object maximum_pool_size=None
-    ):
-        """
-        Coalescing best-fit suballocator which uses a pool of memory allocated
-        from an upstream memory resource.
-
-        Parameters
-        ----------
-        upstream_mr : DeviceMemoryResource
-            The DeviceMemoryResource from which to allocate blocks for the
-            pool.
-        initial_pool_size : int | str, optional
-            Initial pool size in bytes. By default, half the available memory
-            on the device is used.
-        maximum_pool_size : int | str, optional
-            Maximum size in bytes, that the pool can grow to.
-        """
-        pass
-
-    def pool_size(self):
-        cdef pool_memory_resource[device_memory_resource]* c_mr = (
-            <pool_memory_resource[device_memory_resource]*>(self.get_mr())
-        )
-        return c_mr.pool_size()
-
-cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor):
-    def __cinit__(
-            self,
-            DeviceMemoryResource upstream_mr,
-            size_t block_size=1<<20,
-            size_t blocks_to_preallocate=128
-    ):
-        self.c_obj.reset(
-            new fixed_size_memory_resource[device_memory_resource](
-                upstream_mr.get_mr(),
-                block_size,
-                blocks_to_preallocate
-            )
-        )
-
-    def __init__(
-            self,
-            DeviceMemoryResource upstream_mr,
-            size_t block_size=1<<20,
-            size_t blocks_to_preallocate=128
-    ):
-        """
-        Memory resource which allocates memory blocks of a single fixed size.
-
-        Parameters
-        ----------
-        upstream_mr : DeviceMemoryResource
-            The DeviceMemoryResource from which to allocate blocks for the
-            pool.
-        block_size : int, optional
-            The size of blocks to allocate (default is 1MiB).
-        blocks_to_preallocate : int, optional
-            The number of blocks to allocate to initialize the pool.
-
-        Notes
-        -----
-        Supports only allocations of size smaller than the configured
-        block_size.
-        """
-        pass
-
-
-cdef class BinningMemoryResource(UpstreamResourceAdaptor):
-    def __cinit__(
-        self,
-        DeviceMemoryResource upstream_mr,
-        int8_t min_size_exponent=-1,
-        int8_t max_size_exponent=-1,
-    ):
-
-        self._bin_mrs = []
-
-        if (min_size_exponent == -1 or max_size_exponent == -1):
-            self.c_obj.reset(
-                new binning_memory_resource[device_memory_resource](
-                    upstream_mr.get_mr()
-                )
-            )
-        else:
-            self.c_obj.reset(
-                new binning_memory_resource[device_memory_resource](
-                    upstream_mr.get_mr(),
-                    min_size_exponent,
-                    max_size_exponent
-                )
-            )
-
-    def __dealloc__(self):
-
-        # Must cleanup the base MR before any upstream or referenced Bins
-        self.c_obj.reset()
-
-    def __init__(
-        self,
-        DeviceMemoryResource upstream_mr,
-        int8_t min_size_exponent=-1,
-        int8_t max_size_exponent=-1,
-    ):
-        """
-        Allocates memory from a set of specified "bin" sizes based on a
-        specified allocation size.
-
-        If min_size_exponent and max_size_exponent are specified, initializes
-        with one or more FixedSizeMemoryResource bins in the range
-        ``[2**min_size_exponent, 2**max_size_exponent]``.
-
-        Call :py:meth:`~.add_bin` to add additional bin allocators.
-
-        Parameters
-        ----------
-        upstream_mr : DeviceMemoryResource
-            The memory resource to use for allocations larger than any of the
-            bins.
-        min_size_exponent : size_t
-            The base-2 exponent of the minimum size FixedSizeMemoryResource
-            bin to create.
-        max_size_exponent : size_t
-            The base-2 exponent of the maximum size FixedSizeMemoryResource
-            bin to create.
-        """
-        pass
-
-    cpdef add_bin(
-        self,
-        size_t allocation_size,
-        DeviceMemoryResource bin_resource=None
-    ):
-        """
-        Adds a bin of the specified maximum allocation size to this memory
-        resource. If specified, uses bin_resource for allocation for this bin.
-        If not specified, creates and uses a FixedSizeMemoryResource for
-        allocation for this bin.
-
-        Allocations smaller than allocation_size and larger than the next
-        smaller bin size will use this fixed-size memory resource.
-
-        Parameters
-        ----------
-        allocation_size : size_t
-            The maximum allocation size in bytes for the created bin
-        bin_resource : DeviceMemoryResource
-            The resource to use for this bin (optional)
-        """
-        if bin_resource is None:
-            (<binning_memory_resource[device_memory_resource]*>(
-                self.c_obj.get()))[0].add_bin(allocation_size)
-        else:
-            # Save the ref to the new bin resource to ensure its lifetime
-            self._bin_mrs.append(bin_resource)
-
-            (<binning_memory_resource[device_memory_resource]*>(
-                self.c_obj.get()))[0].add_bin(
-                    allocation_size,
-                    bin_resource.get_mr())
-
-    @property
-    def bin_mrs(self) -> list:
-        """Get the list of binned memory resources."""
-        return self._bin_mrs
-
-
-cdef void* _allocate_callback_wrapper(
-    size_t nbytes,
-    cuda_stream_view stream,
-    void* ctx
-    # Note that this function is specifically designed to rethrow Python
-    # exceptions as C++ exceptions when called as a callback from C++, so it is
-    # noexcept from Cython's perspective.
-) noexcept nogil:
-    cdef CppExcept err
-    with gil:
-        try:
-            return <void*><uintptr_t>((<object>ctx)(
-                nbytes,
-                Stream._from_cudaStream_t(stream.value())
-            ))
-        except BaseException as e:
-            err = translate_python_except_to_cpp(e)
-    throw_cpp_except(err)
-
-cdef void _deallocate_callback_wrapper(
-    void* ptr,
-    size_t nbytes,
-    cuda_stream_view stream,
-    void* ctx
-) except * with gil:
-    (<object>ctx)(<uintptr_t>(ptr), nbytes, Stream._from_cudaStream_t(stream.value()))
-
-
-cdef class CallbackMemoryResource(DeviceMemoryResource):
-    """
-    A memory resource that uses the user-provided callables to do
-    memory allocation and deallocation.
-
-    ``CallbackMemoryResource`` should really only be used for
-    debugging memory issues, as there is a significant performance
-    penalty associated with using a Python function for each memory
-    allocation and deallocation.
-
-    Parameters
-    ----------
-    allocate_func: callable
-        The allocation function must accept two arguments. An integer
-        representing the number of bytes to allocate and a Stream on
-        which to perform the allocation, and return an integer
-        representing the pointer to the allocated memory.
-    deallocate_func: callable
-        The deallocation function must accept three arguments. an integer
-        representing the pointer to the memory to free, a second
-        integer representing the number of bytes to free, and a Stream
-        on which to perform the deallocation.
-
-    Examples
-    --------
-    >>> import rmm
-    >>> base_mr = rmm.mr.CudaMemoryResource()
-    >>> def allocate_func(size, stream):
-    ...     print(f"Allocating {size} bytes")
-    ...     return base_mr.allocate(size, stream)
-    ...
-    >>> def deallocate_func(ptr, size, stream):
-    ...     print(f"Deallocating {size} bytes")
-    ...     return base_mr.deallocate(ptr, size, stream)
-    ...
-    >>> rmm.mr.set_current_device_resource(
-        rmm.mr.CallbackMemoryResource(allocate_func, deallocate_func)
-    )
-    >>> dbuf = rmm.DeviceBuffer(size=256)
-    Allocating 256 bytes
-    >>> del dbuf
-    Deallocating 256 bytes
-    """
-    def __init__(
-        self,
-        allocate_func,
-        deallocate_func,
-    ):
-        self._allocate_func = allocate_func
-        self._deallocate_func = deallocate_func
-        self.c_obj.reset(
-            new callback_memory_resource(
-                <allocate_callback_t>(_allocate_callback_wrapper),
-                <deallocate_callback_t>(_deallocate_callback_wrapper),
-                <void*>(allocate_func),
-                <void*>(deallocate_func)
-            )
-        )
-
-
-def _append_id(filename, id):
-    """
-    Append ".dev<ID>" onto a filename before the extension
-
-    Example: _append_id("hello.txt", 1) returns "hello.dev1.txt"
-
-    Parameters
-    ----------
-    filename : string
-        The filename, possibly with extension
-    id : int
-        The ID to append
-    """
-    name, ext = os.path.splitext(filename)
-    return f"{name}.dev{id}{ext}"
-
-
-cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor):
-
-    def __cinit__(
-        self,
-        DeviceMemoryResource upstream_mr,
-        size_t allocation_limit
-    ):
-        self.c_obj.reset(
-            new limiting_resource_adaptor[device_memory_resource](
-                upstream_mr.get_mr(),
-                allocation_limit
-            )
-        )
-
-    def __init__(
-        self,
-        DeviceMemoryResource upstream_mr,
-        size_t allocation_limit
-    ):
-        """
-        Memory resource that limits the total allocation amount possible
-        performed by an upstream memory resource.
-
-        Parameters
-        ----------
-        upstream_mr : DeviceMemoryResource
-            The upstream memory resource.
-        allocation_limit : size_t
-            Maximum memory allowed for this allocator.
-        """
-        pass
-
-    def get_allocated_bytes(self) -> size_t:
-        """
-        Query the number of bytes that have been allocated. Note that this can
-        not be used to know how large of an allocation is possible due to both
-        possible fragmentation and also internal page sizes and alignment that
-        is not tracked by this allocator.
-        """
-        return (<limiting_resource_adaptor[device_memory_resource]*>(
-            self.c_obj.get())
-        )[0].get_allocated_bytes()
-
-    def get_allocation_limit(self) -> size_t:
-        """
-        Query the maximum number of bytes that this allocator is allowed to
-        allocate. This is the limit on the allocator and not a representation
-        of the underlying device. The device may not be able to support this
-        limit.
-        """
-        return (<limiting_resource_adaptor[device_memory_resource]*>(
-            self.c_obj.get())
-        )[0].get_allocation_limit()
-
-
-cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor):
-    def __cinit__(
-        self,
-        DeviceMemoryResource upstream_mr,
-        object log_file_name=None
-    ):
-        if log_file_name is None:
-            log_file_name = os.getenv("RMM_LOG_FILE")
-            if not log_file_name:
-                raise ValueError(
-                    "RMM log file must be specified either using "
-                    "log_file_name= argument or RMM_LOG_FILE "
-                    "environment variable"
-                )
-
-        # Append the device ID before the file extension
-        log_file_name = _append_id(
-            log_file_name, getDevice()
-        )
-        log_file_name = os.path.abspath(log_file_name)
-        self._log_file_name = log_file_name
-
-        self.c_obj.reset(
-            new logging_resource_adaptor[device_memory_resource](
-                upstream_mr.get_mr(),
-                log_file_name.encode()
-            )
-        )
-
-    def __init__(
-        self,
-        DeviceMemoryResource upstream_mr,
-        object log_file_name=None
-    ):
-        """
-        Memory resource that logs information about allocations/deallocations
-        performed by an upstream memory resource.
-
-        Parameters
-        ----------
-        upstream : DeviceMemoryResource
-            The upstream memory resource.
-        log_file_name : str
-            Path to the file to which logs are written.
-        """
-        pass
-
-    cpdef flush(self):
-        (<logging_resource_adaptor[device_memory_resource]*>(
-            self.get_mr()))[0].flush()
-
-    cpdef get_file_name(self):
-        return self._log_file_name
-
-    def __dealloc__(self):
-        self.c_obj.reset()
-
-cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor):
-
-    def __cinit__(
-        self,
-        DeviceMemoryResource upstream_mr
-    ):
-        self.c_obj.reset(
-            new statistics_resource_adaptor[device_memory_resource](
-                upstream_mr.get_mr()
-            )
-        )
-
-    def __init__(
-        self,
-        DeviceMemoryResource upstream_mr
-    ):
-        """
-        Memory resource that tracks the current, peak and total
-        allocations/deallocations performed by an upstream memory resource.
-        Includes the ability to query these statistics at any time.
-
-        A stack of counters is maintained. Use :meth:`push_counters` and
-        :meth:`pop_counters` to track statistics at different nesting levels.
-
-        Parameters
-        ----------
-        upstream : DeviceMemoryResource
-            The upstream memory resource.
-        """
-        pass
-
-    @property
-    def allocation_counts(self) -> Statistics:
-        """
-        Gets the current, peak, and total allocated bytes and number of
-        allocations.
-
-        The dictionary keys are ``current_bytes``, ``current_count``,
-        ``peak_bytes``, ``peak_count``, ``total_bytes``, and ``total_count``.
-
-        Returns:
-            dict: Dictionary containing allocation counts and bytes.
-        """
-        cdef statistics_resource_adaptor[device_memory_resource]* mr = \
-            <statistics_resource_adaptor[device_memory_resource]*> self.c_obj.get()
-
-        counts = deref(mr).get_allocations_counter()
-        byte_counts = deref(mr).get_bytes_counter()
-        return Statistics(
-            current_bytes=byte_counts.value,
-            current_count=counts.value,
-            peak_bytes=byte_counts.peak,
-            peak_count=counts.peak,
-            total_bytes=byte_counts.total,
-            total_count=counts.total,
-        )
-
-    def pop_counters(self) -> Statistics:
-        """
-        Pop a counter pair (bytes and allocations) from the stack
-
-        Returns
-        -------
-        The popped statistics
-        """
-        cdef statistics_resource_adaptor[device_memory_resource]* mr = \
-            <statistics_resource_adaptor[device_memory_resource]*> self.c_obj.get()
-
-        bytes_and_allocs = deref(mr).pop_counters()
-        return Statistics(
-            current_bytes=bytes_and_allocs.first.value,
-            current_count=bytes_and_allocs.second.value,
-            peak_bytes=bytes_and_allocs.first.peak,
-            peak_count=bytes_and_allocs.second.peak,
-            total_bytes=bytes_and_allocs.first.total,
-            total_count=bytes_and_allocs.second.total,
-        )
-
-    def push_counters(self) -> Statistics:
-        """
-        Push a new counter pair (bytes and allocations) on the stack
-
-        Returns
-        -------
-        The statistics _before_ the push
-        """
-
-        cdef statistics_resource_adaptor[device_memory_resource]* mr = \
-            <statistics_resource_adaptor[device_memory_resource]*> self.c_obj.get()
-
-        bytes_and_allocs = deref(mr).push_counters()
-        return Statistics(
-            current_bytes=bytes_and_allocs.first.value,
-            current_count=bytes_and_allocs.second.value,
-            peak_bytes=bytes_and_allocs.first.peak,
-            peak_count=bytes_and_allocs.second.peak,
-            total_bytes=bytes_and_allocs.first.total,
-            total_count=bytes_and_allocs.second.total,
-        )
-
-cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor):
-
-    def __cinit__(
-        self,
-        DeviceMemoryResource upstream_mr,
-        bool capture_stacks=False
-    ):
-        self.c_obj.reset(
-            new tracking_resource_adaptor[device_memory_resource](
-                upstream_mr.get_mr(),
-                capture_stacks
-            )
-        )
-
-    def __init__(
-        self,
-        DeviceMemoryResource upstream_mr,
-        bool capture_stacks=False
-    ):
-        """
-        Memory resource that logs tracks allocations/deallocations
-        performed by an upstream memory resource. Includes the ability to
-        query all outstanding allocations with the stack trace, if desired.
-
-        Parameters
-        ----------
-        upstream : DeviceMemoryResource
-            The upstream memory resource.
-        capture_stacks : bool
-            Whether or not to capture the stack trace with each allocation.
-        """
-        pass
-
-    def get_allocated_bytes(self) -> size_t:
-        """
-        Query the number of bytes that have been allocated. Note that this can
-        not be used to know how large of an allocation is possible due to both
-        possible fragmentation and also internal page sizes and alignment that
-        is not tracked by this allocator.
-        """
-        return (<tracking_resource_adaptor[device_memory_resource]*>(
-            self.c_obj.get())
-        )[0].get_allocated_bytes()
-
-    def get_outstanding_allocations_str(self) -> str:
-        """
-        Returns a string containing information about the current outstanding
-        allocations. For each allocation, the address, size and optional
-        stack trace are shown.
-        """
-
-        return (<tracking_resource_adaptor[device_memory_resource]*>(
-            self.c_obj.get())
-        )[0].get_outstanding_allocations_str().decode('UTF-8')
-
-    def log_outstanding_allocations(self):
-        """
-        Logs the output of `get_outstanding_allocations_str` to the current
-        RMM log file if enabled.
-        """
-
-        (<tracking_resource_adaptor[device_memory_resource]*>(
-            self.c_obj.get()))[0].log_outstanding_allocations()
-
-
-# Note that this function is specifically designed to rethrow Python exceptions
-# as C++ exceptions when called as a callback from C++, so it is noexcept from
-# Cython's perspective.
-cdef bool _oom_callback_function(size_t bytes, void *callback_arg) noexcept nogil:
-    cdef CppExcept err
-    with gil:
-        try:
-            return (<object>callback_arg)(bytes)
-        except BaseException as e:
-            err = translate_python_except_to_cpp(e)
-    throw_cpp_except(err)
-
-
-cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor):
-
-    def __cinit__(
-        self,
-        DeviceMemoryResource upstream_mr,
-        object callback,
-    ):
-        self._callback = callback
-        self.c_obj.reset(
-            new failure_callback_resource_adaptor[device_memory_resource](
-                upstream_mr.get_mr(),
-                <failure_callback_t>_oom_callback_function,
-                <void*>callback
-            )
-        )
-
-    def __init__(
-        self,
-        DeviceMemoryResource upstream_mr,
-        object callback,
-    ):
-        """
-        Memory resource that call callback when memory allocation fails.
-
-        Parameters
-        ----------
-        upstream : DeviceMemoryResource
-            The upstream memory resource.
-        callback : callable
-            Function called when memory allocation fails.
-        """
-        pass
-
-cdef class PrefetchResourceAdaptor(UpstreamResourceAdaptor):
-
-    def __cinit__(
-        self,
-        DeviceMemoryResource upstream_mr
-    ):
-        self.c_obj.reset(
-            new prefetch_resource_adaptor[device_memory_resource](
-                upstream_mr.get_mr()
-            )
-        )
-
-    def __init__(
-        self,
-        DeviceMemoryResource upstream_mr
-    ):
-        """
-        Memory resource that prefetches all allocations.
-
-        Parameters
-        ----------
-        upstream : DeviceMemoryResource
-            The upstream memory resource.
-        """
-        pass
-
-
-# Global per-device memory resources; dict of int:DeviceMemoryResource
-cdef _per_device_mrs = defaultdict(CudaMemoryResource)
-
-
-cpdef void _initialize(
-    bool pool_allocator=False,
-    bool managed_memory=False,
-    object initial_pool_size=None,
-    object maximum_pool_size=None,
-    object devices=0,
-    bool logging=False,
-    object log_file_name=None,
-) except *:
-    """
-    Initializes RMM library using the options passed
-    """
-    if managed_memory:
-        upstream = ManagedMemoryResource
-    else:
-        upstream = CudaMemoryResource
-
-    if pool_allocator:
-        typ = PoolMemoryResource
-        args = (upstream(),)
-        kwargs = dict(
-            initial_pool_size=None if initial_pool_size is None
-            else parse_bytes(initial_pool_size),
-            maximum_pool_size=None if maximum_pool_size is None
-            else parse_bytes(maximum_pool_size)
-        )
-    else:
-        typ = upstream
-        args = ()
-        kwargs = {}
-
-    cdef DeviceMemoryResource mr
-    cdef int original_device
-
-    # Save the current device so we can reset it
-    try:
-        original_device = getDevice()
-    except CUDARuntimeError as e:
-        if e.status == cudaError_t.cudaErrorNoDevice:
-            warnings.warn(e.msg)
-        else:
-            raise e
-    else:
-        # reset any previously specified per device resources
-        global _per_device_mrs
-        _per_device_mrs.clear()
-
-        if devices is None:
-            devices = [0]
-        elif isinstance(devices, int):
-            devices = [devices]
-
-        # create a memory resource per specified device
-        for device in devices:
-            setDevice(device)
-
-            if logging:
-                mr = LoggingResourceAdaptor(
-                    typ(*args, **kwargs),
-                    log_file_name
-                )
-            else:
-                mr = typ(*args, **kwargs)
-
-            set_per_device_resource(device, mr)
-
-        # reset CUDA device to original
-        setDevice(original_device)
-
-
-cpdef get_per_device_resource(int device):
-    """
-    Get the default memory resource for the specified device.
-
-    If the returned memory resource is used when a different device is the
-    active CUDA device, behavior is undefined.
-
-    Parameters
-    ----------
-    device : int
-        The ID of the device for which to get the memory resource.
-    """
-    global _per_device_mrs
-    return _per_device_mrs[device]
-
-
-cpdef set_per_device_resource(int device, DeviceMemoryResource mr):
-    """
-    Set the default memory resource for the specified device.
-
-    Parameters
-    ----------
-    device : int
-        The ID of the device for which to get the memory resource.
-    mr : DeviceMemoryResource
-        The memory resource to set.  Must have been created while device was
-        the active CUDA device.
-    """
-    global _per_device_mrs
-    _per_device_mrs[device] = mr
-
-    # Since cuda_device_id does not have a default constructor, it must be heap
-    # allocated
-    cdef unique_ptr[cuda_device_id] device_id = \
-        make_unique[cuda_device_id](device)
-
-    cpp_set_per_device_resource(deref(device_id), mr.get_mr())
-
-
-cpdef set_current_device_resource(DeviceMemoryResource mr):
-    """
-    Set the default memory resource for the current device.
-
-    Parameters
-    ----------
-    mr : DeviceMemoryResource
-        The memory resource to set. Must have been created while the current
-        device is the active CUDA device.
-    """
-    set_per_device_resource(getDevice(), mr)
-
-
-cpdef get_per_device_resource_type(int device):
-    """
-    Get the memory resource type used for RMM device allocations on the
-    specified device.
-
-    Parameters
-    ----------
-    device : int
-        The device ID
-    """
-    return type(get_per_device_resource(device))
-
-
-cpdef DeviceMemoryResource get_current_device_resource():
-    """
-    Get the memory resource used for RMM device allocations on the current
-    device.
-
-    If the returned memory resource is used when a different device is the
-    active CUDA device, behavior is undefined.
-    """
-    return get_per_device_resource(getDevice())
-
-
-cpdef get_current_device_resource_type():
-    """
-    Get the memory resource type used for RMM device allocations on the
-    current device.
-    """
-    return type(get_current_device_resource())
-
-
-cpdef is_initialized():
-    """
-    Check whether RMM is initialized
-    """
-    global _per_device_mrs
-    cdef DeviceMemoryResource each_mr
-    return all(
-        [each_mr.get_mr() is not NULL
-            for each_mr in _per_device_mrs.values()]
-    )
-
-
-cpdef _flush_logs():
-    """
-    Flush the logs of all currently initialized LoggingResourceAdaptor
-    memory resources
-    """
-    global _per_device_mrs
-    cdef DeviceMemoryResource each_mr
-    for each_mr in _per_device_mrs.values():
-        if isinstance(each_mr, LoggingResourceAdaptor):
-            each_mr.flush()
-
-
-def enable_logging(log_file_name=None):
-    """
-    Enable logging of run-time events for all devices.
-
-    Parameters
-    ----------
-    log_file_name:  str, optional
-        Name of the log file. If not specified, the environment variable
-        RMM_LOG_FILE is used. A ValueError is thrown if neither is available.
-        A separate log file is produced for each device,
-        and the suffix `".dev{id}"` is automatically added to the log file
-        name.
-
-    Notes
-    -----
-    Note that if you use the environment variable CUDA_VISIBLE_DEVICES
-    with logging enabled, the suffix may not be what you expect. For
-    example, if you set CUDA_VISIBLE_DEVICES=1, the log file produced
-    will still have suffix `0`. Similarly, if you set
-    CUDA_VISIBLE_DEVICES=1,0 and use devices 0 and 1, the log file
-    with suffix `0` will correspond to the GPU with device ID `1`.
-    Use `rmm.get_log_filenames()` to get the log file names
-    corresponding to each device.
-    """
-    global _per_device_mrs
-
-    devices = [0] if not _per_device_mrs.keys() else _per_device_mrs.keys()
-
-    for device in devices:
-        each_mr = <DeviceMemoryResource>_per_device_mrs[device]
-        if not isinstance(each_mr, LoggingResourceAdaptor):
-            set_per_device_resource(
-                device,
-                LoggingResourceAdaptor(each_mr, log_file_name)
-            )
-
-
-def disable_logging():
-    """
-    Disable logging if it was enabled previously using
-    `rmm.initialize()` or `rmm.enable_logging()`.
-    """
-    global _per_device_mrs
-    for i, each_mr in _per_device_mrs.items():
-        if isinstance(each_mr, LoggingResourceAdaptor):
-            set_per_device_resource(i, each_mr.get_upstream())
-
-
-def get_log_filenames():
-    """
-    Returns the log filename (or `None` if not writing logs)
-    for each device in use.
-
-    Examples
-    --------
-    >>> import rmm
-    >>> rmm.reinitialize(devices=[0, 1], logging=True, log_file_name="rmm.log")
-    >>> rmm.get_log_filenames()
-    {0: '/home/user/workspace/rapids/rmm/python/rmm.dev0.log',
-     1: '/home/user/workspace/rapids/rmm/python/rmm.dev1.log'}
-    """
-    global _per_device_mrs
-
-    return {
-        i: each_mr.get_file_name()
-        if isinstance(each_mr, LoggingResourceAdaptor)
-        else None
-        for i, each_mr in _per_device_mrs.items()
-    }
-
-
-def available_device_memory():
-    """
-    Returns a tuple of free and total device memory memory.
-    """
-    cdef pair[size_t, size_t] res
-    res = c_available_device_memory()
-    return (res.first, res.second)
diff --git a/python/rmm/rmm/_lib/per_device_resource.pxd b/python/rmm/rmm/_lib/per_device_resource.pxd
deleted file mode 100644
index c33217622..000000000
--- a/python/rmm/rmm/_lib/per_device_resource.pxd
+++ /dev/null
@@ -1,23 +0,0 @@
-from rmm._lib.memory_resource cimport device_memory_resource
-
-
-cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil:
-    cdef cppclass cuda_device_id:
-        ctypedef int value_type
-
-        cuda_device_id(value_type id)
-
-        value_type value()
-
-cdef extern from "rmm/mr/device/per_device_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef device_memory_resource* set_current_device_resource(
-        device_memory_resource* new_mr
-    )
-    cdef device_memory_resource* get_current_device_resource()
-    cdef device_memory_resource* set_per_device_resource(
-        cuda_device_id id, device_memory_resource* new_mr
-    )
-    cdef device_memory_resource* get_per_device_resource (
-        cuda_device_id id
-    )
diff --git a/python/rmm/rmm/_lib/tests/__init__.py b/python/rmm/rmm/_lib/tests/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/python/rmm/rmm/_lib/tests/test_device_buffer.pyx b/python/rmm/rmm/_lib/tests/test_device_buffer.pyx
deleted file mode 100644
index 0783dd9c6..000000000
--- a/python/rmm/rmm/_lib/tests/test_device_buffer.pyx
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-
-from libcpp.memory cimport make_unique
-
-from rmm.cpp.cuda_stream_view cimport cuda_stream_default
-from rmm.cpp.device_buffer cimport device_buffer
-from rmm.python.device_buffer cimport DeviceBuffer
-
-
-def test_release():
-    expect = DeviceBuffer.to_device(b'abc')
-    cdef DeviceBuffer buf = DeviceBuffer.to_device(b'abc')
-
-    got = DeviceBuffer.c_from_unique_ptr(
-        make_unique[device_buffer](buf.c_release(),
-                                   cuda_stream_default.value())
-    )
-    np.testing.assert_equal(expect.copy_to_host(), got.copy_to_host())
-
-
-def test_size_after_release():
-    cdef DeviceBuffer buf = DeviceBuffer.to_device(b'abc')
-    buf.c_release()
-    assert buf.size == 0

From 15b1ae113b5d04f4e8724c41397393dd5dd0871a Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Mon, 16 Sep 2024 11:07:37 -0700
Subject: [PATCH 04/13] deprecate rmm._lib

---
 python/rmm/rmm/__init__.py       | 15 +++++++++++++++
 python/rmm/rmm/_lib/__init__.py  | 22 ++++++++++++++++++++++
 python/rmm/rmm/tests/test_rmm.py |  5 +++++
 3 files changed, 42 insertions(+)
 create mode 100644 python/rmm/rmm/_lib/__init__.py

diff --git a/python/rmm/rmm/__init__.py b/python/rmm/rmm/__init__.py
index 61c5e4561..3c55a5cbd 100644
--- a/python/rmm/rmm/__init__.py
+++ b/python/rmm/rmm/__init__.py
@@ -52,3 +52,18 @@
     "should_log",
     "unregister_reinitialize_hook",
 ]
+
+
+def __getattr__(name):
+    if name == "_lib":
+        import importlib
+        import warnings
+
+        warnings.warn(
+            "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.python` instead.",
+            FutureWarning,
+        )
+        module = importlib.import_module("rmm.python")
+        return module
+    else:
+        raise AttributeError(f"Module '{__name__}' has no attribute '{name}'")
diff --git a/python/rmm/rmm/_lib/__init__.py b/python/rmm/rmm/_lib/__init__.py
new file mode 100644
index 000000000..9b6d1daf4
--- /dev/null
+++ b/python/rmm/rmm/_lib/__init__.py
@@ -0,0 +1,22 @@
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+
+warnings.warn(
+    "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.python` instead.",
+    FutureWarning,
+)
+
+from rmm.python import *
diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py
index 4d7b6c646..a1a973950 100644
--- a/python/rmm/rmm/tests/test_rmm.py
+++ b/python/rmm/rmm/tests/test_rmm.py
@@ -1076,3 +1076,8 @@ def test_available_device_memory():
     assert initial_memory[1] == final_memory[1]
     assert initial_memory[0] > 0
     assert final_memory[0] > 0
+
+
+def test_deprecate_rmm_lib():
+    with pytest.warns(FutureWarning):
+        rmm._lib.device_buffer.DeviceBuffer(size=100)

From a6a831608282300e0fa5b86fc8f4d5e3bfa46a63 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Tue, 17 Sep 2024 07:40:15 -0700
Subject: [PATCH 05/13] address review

---
 python/rmm/rmm/allocators/cupy.py  | 4 ++--
 python/rmm/rmm/allocators/numba.py | 4 ++--
 python/rmm/rmm/tests/test_rmm.py   | 1 +
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/python/rmm/rmm/allocators/cupy.py b/python/rmm/rmm/allocators/cupy.py
index 8cd41b3b3..94003541f 100644
--- a/python/rmm/rmm/allocators/cupy.py
+++ b/python/rmm/rmm/allocators/cupy.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from rmm import python as librmm
+from rmm import python as pylibrmm
 from rmm._cuda.stream import Stream
 
 try:
@@ -34,7 +34,7 @@ def rmm_cupy_allocator(nbytes):
         raise ModuleNotFoundError("No module named 'cupy'")
 
     stream = Stream(obj=cupy.cuda.get_current_stream())
-    buf = librmm.device_buffer.DeviceBuffer(size=nbytes, stream=stream)
+    buf = pylibrmm.device_buffer.DeviceBuffer(size=nbytes, stream=stream)
     dev_id = -1 if buf.ptr else cupy.cuda.device.get_device_id()
     mem = cupy.cuda.UnownedMemory(
         ptr=buf.ptr, size=buf.size, owner=buf, device_id=dev_id
diff --git a/python/rmm/rmm/allocators/numba.py b/python/rmm/rmm/allocators/numba.py
index 9838b8b59..d5bfdcf9f 100644
--- a/python/rmm/rmm/allocators/numba.py
+++ b/python/rmm/rmm/allocators/numba.py
@@ -19,7 +19,7 @@
 from numba import config, cuda
 from numba.cuda import HostOnlyCUDAMemoryManager, IpcHandle, MemoryPointer
 
-from rmm import python as librmm
+from rmm import python as pylibrmm
 
 
 def _make_emm_plugin_finalizer(handle, allocations):
@@ -70,7 +70,7 @@ def memalloc(self, size):
         """
         Allocate an on-device array from the RMM pool.
         """
-        buf = librmm.DeviceBuffer(size=size)
+        buf = pylibrmm.DeviceBuffer(size=size)
         ctx = self.context
 
         if config.CUDA_USE_NVIDIA_BINDING:
diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py
index a1a973950..826071fce 100644
--- a/python/rmm/rmm/tests/test_rmm.py
+++ b/python/rmm/rmm/tests/test_rmm.py
@@ -1078,6 +1078,7 @@ def test_available_device_memory():
     assert final_memory[0] > 0
 
 
+# TODO: Remove test when rmm._lib is removed in 24.12
 def test_deprecate_rmm_lib():
     with pytest.warns(FutureWarning):
         rmm._lib.device_buffer.DeviceBuffer(size=100)

From 724b96237a660e4ee67f3195d2cbdb1d002a4381 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Wed, 18 Sep 2024 09:52:11 -0700
Subject: [PATCH 06/13] replace lib with _logger

---
 python/rmm/rmm/allocators/torch.py |  6 +++---
 python/rmm/rmm/cpp/CMakeLists.txt  |  2 +-
 python/rmm/rmm/cpp/lib.pxd         | 17 -----------------
 python/rmm/rmm/cpp/lib.pyx         | 13 -------------
 4 files changed, 4 insertions(+), 34 deletions(-)
 delete mode 100644 python/rmm/rmm/cpp/lib.pxd
 delete mode 100644 python/rmm/rmm/cpp/lib.pyx

diff --git a/python/rmm/rmm/allocators/torch.py b/python/rmm/rmm/allocators/torch.py
index 8df921ad8..af675d8c8 100644
--- a/python/rmm/rmm/allocators/torch.py
+++ b/python/rmm/rmm/allocators/torch.py
@@ -28,10 +28,10 @@
         # allocator .so relative to the current file because the current file
         # is pure Python and will therefore be in the source directory.
         # Instead, we search relative to an arbitrary file in the compiled
-        # package. We use the _lib.lib module because it is small.
-        from rmm.cpp import lib
+        # package. We use the cpp._logger module because it is small.
+        from rmm.cpp import _logger
 
-        sofile = pathlib.Path(lib.__file__).parent / "_torch_allocator.so"
+        sofile = pathlib.Path(_logger.__file__).parent / "_torch_allocator.so"
         rmm_torch_allocator = CUDAPluggableAllocator(
             str(sofile.absolute()),
             alloc_fn_name="allocate",
diff --git a/python/rmm/rmm/cpp/CMakeLists.txt b/python/rmm/rmm/cpp/CMakeLists.txt
index a201187b8..5da2a1a01 100644
--- a/python/rmm/rmm/cpp/CMakeLists.txt
+++ b/python/rmm/rmm/cpp/CMakeLists.txt
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-set(cython_sources lib.pyx _logger.pyx)
+set(cython_sources _logger.pyx)
 set(linked_libraries rmm::rmm)
 
 # Build all of the Cython targets
diff --git a/python/rmm/rmm/cpp/lib.pxd b/python/rmm/rmm/cpp/lib.pxd
deleted file mode 100644
index 592574862..000000000
--- a/python/rmm/rmm/cpp/lib.pxd
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from libcpp.utility cimport pair
-
-ctypedef pair[const char*, unsigned int] caller_pair
diff --git a/python/rmm/rmm/cpp/lib.pyx b/python/rmm/rmm/cpp/lib.pyx
deleted file mode 100644
index 46753baa3..000000000
--- a/python/rmm/rmm/cpp/lib.pyx
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

From 10dfc8e0f2f7c3ed195b9aa794155a1610059887 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Thu, 19 Sep 2024 08:37:59 -0700
Subject: [PATCH 07/13] remove __init__.pxd

---
 python/rmm/rmm/cpp/__init__.pxd    | 13 -------------
 python/rmm/rmm/python/__init__.pxd | 13 -------------
 2 files changed, 26 deletions(-)
 delete mode 100644 python/rmm/rmm/cpp/__init__.pxd
 delete mode 100644 python/rmm/rmm/python/__init__.pxd

diff --git a/python/rmm/rmm/cpp/__init__.pxd b/python/rmm/rmm/cpp/__init__.pxd
deleted file mode 100644
index 46753baa3..000000000
--- a/python/rmm/rmm/cpp/__init__.pxd
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/python/rmm/rmm/python/__init__.pxd b/python/rmm/rmm/python/__init__.pxd
deleted file mode 100644
index 46753baa3..000000000
--- a/python/rmm/rmm/python/__init__.pxd
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

From 153cb3b4719f0be226335aeb5557d168a308e998 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Mon, 23 Sep 2024 18:44:23 -0700
Subject: [PATCH 08/13] Address review

---
 python/rmm/CMakeLists.txt                     |  4 +--
 python/rmm/rmm/__init__.py                    |  8 ++---
 python/rmm/rmm/_cuda/stream.pxd               |  2 +-
 python/rmm/rmm/_cuda/stream.pyx               |  4 +--
 python/rmm/rmm/_lib/__init__.py               |  4 +--
 python/rmm/rmm/_lib/cuda_stream.pxd           | 15 ++++++++
 python/rmm/rmm/_lib/device_buffer.pxd         | 21 ++++++++++++
 python/rmm/rmm/_lib/helper.pxd                | 15 ++++++++
 python/rmm/rmm/_lib/memory_resource.pxd       | 34 +++++++++++++++++++
 python/rmm/rmm/allocators/cupy.py             |  2 +-
 python/rmm/rmm/allocators/numba.py            |  2 +-
 python/rmm/rmm/allocators/torch.py            |  2 +-
 python/rmm/rmm/{cpp => librmm}/CMakeLists.txt |  0
 python/rmm/rmm/{cpp => librmm}/__init__.py    |  0
 python/rmm/rmm/{cpp => librmm}/_logger.pxd    |  0
 python/rmm/rmm/{cpp => librmm}/_logger.pyx    |  0
 .../rmm/{cpp => librmm}/_torch_allocator.cpp  |  0
 .../rmm/rmm/{cpp => librmm}/cuda_stream.pxd   |  2 +-
 .../rmm/{cpp => librmm}/cuda_stream_pool.pxd  |  2 +-
 .../rmm/{cpp => librmm}/cuda_stream_view.pxd  |  0
 .../rmm/rmm/{cpp => librmm}/device_buffer.pxd |  4 +--
 .../rmm/{cpp => librmm}/device_uvector.pxd    |  6 ++--
 .../rmm/{cpp => librmm}/memory_resource.pxd   |  4 +--
 .../{cpp => librmm}/per_device_resource.pxd   |  2 +-
 python/rmm/rmm/mr.py                          |  2 +-
 .../rmm/{python => pylibrmm}/CMakeLists.txt   |  0
 .../rmm/rmm/{python => pylibrmm}/__init__.py  |  0
 .../rmm/{python => pylibrmm}/cuda_stream.pxd  |  2 +-
 .../rmm/{python => pylibrmm}/cuda_stream.pyx  |  2 +-
 .../{python => pylibrmm}/device_buffer.pxd    |  4 +--
 .../{python => pylibrmm}/device_buffer.pyx    | 16 ++++-----
 .../rmm/rmm/{python => pylibrmm}/helper.pxd   |  0
 .../rmm/rmm/{python => pylibrmm}/helper.pyx   |  0
 .../rmm/rmm/{python => pylibrmm}/logger.pyx   |  4 +--
 .../{python => pylibrmm}/memory_resource.pxd  |  2 +-
 .../{python => pylibrmm}/memory_resource.pyx  |  8 ++---
 .../{python => pylibrmm}/tests/__init__.py    |  0
 .../tests/test_device_buffer.pyx              |  6 ++--
 python/rmm/rmm/tests/test_cython.py           |  2 +-
 python/rmm/rmm/tests/test_rmm.py              |  2 +-
 40 files changed, 134 insertions(+), 49 deletions(-)
 create mode 100644 python/rmm/rmm/_lib/cuda_stream.pxd
 create mode 100644 python/rmm/rmm/_lib/device_buffer.pxd
 create mode 100644 python/rmm/rmm/_lib/helper.pxd
 create mode 100644 python/rmm/rmm/_lib/memory_resource.pxd
 rename python/rmm/rmm/{cpp => librmm}/CMakeLists.txt (100%)
 rename python/rmm/rmm/{cpp => librmm}/__init__.py (100%)
 rename python/rmm/rmm/{cpp => librmm}/_logger.pxd (100%)
 rename python/rmm/rmm/{cpp => librmm}/_logger.pyx (100%)
 rename python/rmm/rmm/{cpp => librmm}/_torch_allocator.cpp (100%)
 rename python/rmm/rmm/{cpp => librmm}/cuda_stream.pxd (94%)
 rename python/rmm/rmm/{cpp => librmm}/cuda_stream_pool.pxd (93%)
 rename python/rmm/rmm/{cpp => librmm}/cuda_stream_view.pxd (100%)
 rename python/rmm/rmm/{cpp => librmm}/device_buffer.pxd (94%)
 rename python/rmm/rmm/{cpp => librmm}/device_uvector.pxd (89%)
 rename python/rmm/rmm/{cpp => librmm}/memory_resource.pxd (98%)
 rename python/rmm/rmm/{cpp => librmm}/per_device_resource.pxd (95%)
 rename python/rmm/rmm/{python => pylibrmm}/CMakeLists.txt (100%)
 rename python/rmm/rmm/{python => pylibrmm}/__init__.py (100%)
 rename python/rmm/rmm/{python => pylibrmm}/cuda_stream.pxd (94%)
 rename python/rmm/rmm/{python => pylibrmm}/cuda_stream.pyx (95%)
 rename python/rmm/rmm/{python => pylibrmm}/device_buffer.pxd (95%)
 rename python/rmm/rmm/{python => pylibrmm}/device_buffer.pyx (97%)
 rename python/rmm/rmm/{python => pylibrmm}/helper.pxd (100%)
 rename python/rmm/rmm/{python => pylibrmm}/helper.pyx (100%)
 rename python/rmm/rmm/{python => pylibrmm}/logger.pyx (98%)
 rename python/rmm/rmm/{python => pylibrmm}/memory_resource.pxd (97%)
 rename python/rmm/rmm/{python => pylibrmm}/memory_resource.pyx (99%)
 rename python/rmm/rmm/{python => pylibrmm}/tests/__init__.py (100%)
 rename python/rmm/rmm/{python => pylibrmm}/tests/test_device_buffer.pyx (87%)

diff --git a/python/rmm/CMakeLists.txt b/python/rmm/CMakeLists.txt
index 119888a4f..ac8495e14 100644
--- a/python/rmm/CMakeLists.txt
+++ b/python/rmm/CMakeLists.txt
@@ -30,5 +30,5 @@ rapids_cython_init()
 add_compile_definitions("SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}")
 
 add_subdirectory(rmm/_cuda)
-add_subdirectory(rmm/cpp)
-add_subdirectory(rmm/python)
+add_subdirectory(rmm/librmm)
+add_subdirectory(rmm/pylibrmm)
diff --git a/python/rmm/rmm/__init__.py b/python/rmm/rmm/__init__.py
index 3c55a5cbd..c52818a75 100644
--- a/python/rmm/rmm/__init__.py
+++ b/python/rmm/rmm/__init__.py
@@ -15,8 +15,8 @@
 from rmm import mr
 from rmm._version import __git_commit__, __version__
 from rmm.mr import disable_logging, enable_logging, get_log_filenames
-from rmm.python.device_buffer import DeviceBuffer
-from rmm.python.logger import (
+from rmm.pylibrmm.device_buffer import DeviceBuffer
+from rmm.pylibrmm.logger import (
     flush_logger,
     get_flush_level,
     get_logging_level,
@@ -60,10 +60,10 @@ def __getattr__(name):
         import warnings
 
         warnings.warn(
-            "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.python` instead.",
+            "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.pylibrmm` instead.",
             FutureWarning,
         )
-        module = importlib.import_module("rmm.python")
+        module = importlib.import_module("rmm.pylibrmm")
         return module
     else:
         raise AttributeError(f"Module '{__name__}' has no attribute '{name}'")
diff --git a/python/rmm/rmm/_cuda/stream.pxd b/python/rmm/rmm/_cuda/stream.pxd
index c978074fc..e91e2ce58 100644
--- a/python/rmm/rmm/_cuda/stream.pxd
+++ b/python/rmm/rmm/_cuda/stream.pxd
@@ -16,7 +16,7 @@ from cuda.ccudart cimport cudaStream_t
 from libc.stdint cimport uintptr_t
 from libcpp cimport bool
 
-from rmm.cpp.cuda_stream_view cimport cuda_stream_view
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef class Stream:
diff --git a/python/rmm/rmm/_cuda/stream.pyx b/python/rmm/rmm/_cuda/stream.pyx
index 3c6c10c18..37dcbd610 100644
--- a/python/rmm/rmm/_cuda/stream.pyx
+++ b/python/rmm/rmm/_cuda/stream.pyx
@@ -16,13 +16,13 @@ from cuda.ccudart cimport cudaStream_t
 from libc.stdint cimport uintptr_t
 from libcpp cimport bool
 
-from rmm.cpp.cuda_stream_view cimport (
+from rmm.librmm.cuda_stream_view cimport (
     cuda_stream_default,
     cuda_stream_legacy,
     cuda_stream_per_thread,
     cuda_stream_view,
 )
-from rmm.python.cuda_stream cimport CudaStream
+from rmm.pylibrmm.cuda_stream cimport CudaStream
 
 
 cdef class Stream:
diff --git a/python/rmm/rmm/_lib/__init__.py b/python/rmm/rmm/_lib/__init__.py
index 9b6d1daf4..965d8e917 100644
--- a/python/rmm/rmm/_lib/__init__.py
+++ b/python/rmm/rmm/_lib/__init__.py
@@ -15,8 +15,8 @@
 import warnings
 
 warnings.warn(
-    "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.python` instead.",
+    "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.pylibrmm` instead.",
     FutureWarning,
 )
 
-from rmm.python import *
+from rmm.pylibrmm import *
diff --git a/python/rmm/rmm/_lib/cuda_stream.pxd b/python/rmm/rmm/_lib/cuda_stream.pxd
new file mode 100644
index 000000000..f7b5759bd
--- /dev/null
+++ b/python/rmm/rmm/_lib/cuda_stream.pxd
@@ -0,0 +1,15 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rmm.pylibrmm.cuda_stream cimport CudaStream
diff --git a/python/rmm/rmm/_lib/device_buffer.pxd b/python/rmm/rmm/_lib/device_buffer.pxd
new file mode 100644
index 000000000..66c19d990
--- /dev/null
+++ b/python/rmm/rmm/_lib/device_buffer.pxd
@@ -0,0 +1,21 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rmm.pylibrmm.device_buffer cimport (
+    DeviceBuffer,
+    copy_device_to_ptr,
+    copy_host_to_ptr,
+    copy_ptr_to_host,
+    to_device,
+)
diff --git a/python/rmm/rmm/_lib/helper.pxd b/python/rmm/rmm/_lib/helper.pxd
new file mode 100644
index 000000000..4a5159435
--- /dev/null
+++ b/python/rmm/rmm/_lib/helper.pxd
@@ -0,0 +1,15 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rmm.pylibrmm.helper cimport parse_bytes
diff --git a/python/rmm/rmm/_lib/memory_resource.pxd b/python/rmm/rmm/_lib/memory_resource.pxd
new file mode 100644
index 000000000..79a28cac2
--- /dev/null
+++ b/python/rmm/rmm/_lib/memory_resource.pxd
@@ -0,0 +1,34 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rmm.pylibrmm.memory_resource cimport (
+    BinningMemoryResource,
+    CallbackMemoryResource,
+    CudaAsyncMemoryResource,
+    CudaMemoryResource,
+    DeviceMemoryResource,
+    FailureCallbackResourceAdaptor,
+    FixedSizeMemoryResource,
+    LimitingResourceAdaptor,
+    LoggingResourceAdaptor,
+    ManagedMemoryResource,
+    PoolMemoryResource,
+    PrefetchResourceAdaptor,
+    SamHeadroomMemoryResource,
+    StatisticsResourceAdaptor,
+    SystemMemoryResource,
+    TrackingResourceAdaptor,
+    UpstreamResourceAdaptor,
+    get_current_device_resource,
+)
diff --git a/python/rmm/rmm/allocators/cupy.py b/python/rmm/rmm/allocators/cupy.py
index 94003541f..780ff2abf 100644
--- a/python/rmm/rmm/allocators/cupy.py
+++ b/python/rmm/rmm/allocators/cupy.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from rmm import python as pylibrmm
+from rmm import pylibrmm
 from rmm._cuda.stream import Stream
 
 try:
diff --git a/python/rmm/rmm/allocators/numba.py b/python/rmm/rmm/allocators/numba.py
index d5bfdcf9f..fd9bacb5a 100644
--- a/python/rmm/rmm/allocators/numba.py
+++ b/python/rmm/rmm/allocators/numba.py
@@ -19,7 +19,7 @@
 from numba import config, cuda
 from numba.cuda import HostOnlyCUDAMemoryManager, IpcHandle, MemoryPointer
 
-from rmm import python as pylibrmm
+from rmm import pylibrmm
 
 
 def _make_emm_plugin_finalizer(handle, allocations):
diff --git a/python/rmm/rmm/allocators/torch.py b/python/rmm/rmm/allocators/torch.py
index af675d8c8..462a06e80 100644
--- a/python/rmm/rmm/allocators/torch.py
+++ b/python/rmm/rmm/allocators/torch.py
@@ -29,7 +29,7 @@
         # is pure Python and will therefore be in the source directory.
         # Instead, we search relative to an arbitrary file in the compiled
         # package. We use the cpp._logger module because it is small.
-        from rmm.cpp import _logger
+        from rmm.librmm import _logger
 
         sofile = pathlib.Path(_logger.__file__).parent / "_torch_allocator.so"
         rmm_torch_allocator = CUDAPluggableAllocator(
diff --git a/python/rmm/rmm/cpp/CMakeLists.txt b/python/rmm/rmm/librmm/CMakeLists.txt
similarity index 100%
rename from python/rmm/rmm/cpp/CMakeLists.txt
rename to python/rmm/rmm/librmm/CMakeLists.txt
diff --git a/python/rmm/rmm/cpp/__init__.py b/python/rmm/rmm/librmm/__init__.py
similarity index 100%
rename from python/rmm/rmm/cpp/__init__.py
rename to python/rmm/rmm/librmm/__init__.py
diff --git a/python/rmm/rmm/cpp/_logger.pxd b/python/rmm/rmm/librmm/_logger.pxd
similarity index 100%
rename from python/rmm/rmm/cpp/_logger.pxd
rename to python/rmm/rmm/librmm/_logger.pxd
diff --git a/python/rmm/rmm/cpp/_logger.pyx b/python/rmm/rmm/librmm/_logger.pyx
similarity index 100%
rename from python/rmm/rmm/cpp/_logger.pyx
rename to python/rmm/rmm/librmm/_logger.pyx
diff --git a/python/rmm/rmm/cpp/_torch_allocator.cpp b/python/rmm/rmm/librmm/_torch_allocator.cpp
similarity index 100%
rename from python/rmm/rmm/cpp/_torch_allocator.cpp
rename to python/rmm/rmm/librmm/_torch_allocator.cpp
diff --git a/python/rmm/rmm/cpp/cuda_stream.pxd b/python/rmm/rmm/librmm/cuda_stream.pxd
similarity index 94%
rename from python/rmm/rmm/cpp/cuda_stream.pxd
rename to python/rmm/rmm/librmm/cuda_stream.pxd
index fcd84198c..3f2ac3361 100644
--- a/python/rmm/rmm/cpp/cuda_stream.pxd
+++ b/python/rmm/rmm/librmm/cuda_stream.pxd
@@ -15,7 +15,7 @@
 from cuda.ccudart cimport cudaStream_t
 from libcpp cimport bool
 
-from rmm.cpp.cuda_stream_view cimport cuda_stream_view
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "rmm/cuda_stream.hpp" namespace "rmm" nogil:
diff --git a/python/rmm/rmm/cpp/cuda_stream_pool.pxd b/python/rmm/rmm/librmm/cuda_stream_pool.pxd
similarity index 93%
rename from python/rmm/rmm/cpp/cuda_stream_pool.pxd
rename to python/rmm/rmm/librmm/cuda_stream_pool.pxd
index f72cfd7f6..4f2cbb36d 100644
--- a/python/rmm/rmm/cpp/cuda_stream_pool.pxd
+++ b/python/rmm/rmm/librmm/cuda_stream_pool.pxd
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from rmm.cpp.cuda_stream_view cimport cuda_stream_view
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "rmm/cuda_stream_pool.hpp" namespace "rmm" nogil:
diff --git a/python/rmm/rmm/cpp/cuda_stream_view.pxd b/python/rmm/rmm/librmm/cuda_stream_view.pxd
similarity index 100%
rename from python/rmm/rmm/cpp/cuda_stream_view.pxd
rename to python/rmm/rmm/librmm/cuda_stream_view.pxd
diff --git a/python/rmm/rmm/cpp/device_buffer.pxd b/python/rmm/rmm/librmm/device_buffer.pxd
similarity index 94%
rename from python/rmm/rmm/cpp/device_buffer.pxd
rename to python/rmm/rmm/librmm/device_buffer.pxd
index a3801cf05..1c503ac9a 100644
--- a/python/rmm/rmm/cpp/device_buffer.pxd
+++ b/python/rmm/rmm/librmm/device_buffer.pxd
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from rmm.cpp.cuda_stream_view cimport cuda_stream_view
-from rmm.cpp.memory_resource cimport device_memory_resource
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from rmm.librmm.memory_resource cimport device_memory_resource
 
 
 cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil:
diff --git a/python/rmm/rmm/cpp/device_uvector.pxd b/python/rmm/rmm/librmm/device_uvector.pxd
similarity index 89%
rename from python/rmm/rmm/cpp/device_uvector.pxd
rename to python/rmm/rmm/librmm/device_uvector.pxd
index 2cb647e3c..f560a9e38 100644
--- a/python/rmm/rmm/cpp/device_uvector.pxd
+++ b/python/rmm/rmm/librmm/device_uvector.pxd
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from rmm.cpp.cuda_stream_view cimport cuda_stream_view
-from rmm.cpp.device_buffer cimport device_buffer
-from rmm.cpp.memory_resource cimport device_memory_resource
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from rmm.librmm.device_buffer cimport device_buffer
+from rmm.librmm.memory_resource cimport device_memory_resource
 
 
 cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil:
diff --git a/python/rmm/rmm/cpp/memory_resource.pxd b/python/rmm/rmm/librmm/memory_resource.pxd
similarity index 98%
rename from python/rmm/rmm/cpp/memory_resource.pxd
rename to python/rmm/rmm/librmm/memory_resource.pxd
index 5e457b653..a8e932e6c 100644
--- a/python/rmm/rmm/cpp/memory_resource.pxd
+++ b/python/rmm/rmm/librmm/memory_resource.pxd
@@ -23,8 +23,8 @@ from libcpp.optional cimport optional
 from libcpp.pair cimport pair
 from libcpp.string cimport string
 
-from rmm.cpp.cuda_stream_view cimport cuda_stream_view
-from rmm.cpp.memory_resource cimport device_memory_resource
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from rmm.librmm.memory_resource cimport device_memory_resource
 
 
 cdef extern from "rmm/mr/device/device_memory_resource.hpp" \
diff --git a/python/rmm/rmm/cpp/per_device_resource.pxd b/python/rmm/rmm/librmm/per_device_resource.pxd
similarity index 95%
rename from python/rmm/rmm/cpp/per_device_resource.pxd
rename to python/rmm/rmm/librmm/per_device_resource.pxd
index ec43d5f99..63ee29056 100644
--- a/python/rmm/rmm/cpp/per_device_resource.pxd
+++ b/python/rmm/rmm/librmm/per_device_resource.pxd
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from rmm.cpp.memory_resource cimport device_memory_resource
+from rmm.librmm.memory_resource cimport device_memory_resource
 
 
 cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil:
diff --git a/python/rmm/rmm/mr.py b/python/rmm/rmm/mr.py
index f820cf805..3f0c3fce3 100644
--- a/python/rmm/rmm/mr.py
+++ b/python/rmm/rmm/mr.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from rmm.python.memory_resource import (
+from rmm.pylibrmm.memory_resource import (
     BinningMemoryResource,
     CallbackMemoryResource,
     CudaAsyncMemoryResource,
diff --git a/python/rmm/rmm/python/CMakeLists.txt b/python/rmm/rmm/pylibrmm/CMakeLists.txt
similarity index 100%
rename from python/rmm/rmm/python/CMakeLists.txt
rename to python/rmm/rmm/pylibrmm/CMakeLists.txt
diff --git a/python/rmm/rmm/python/__init__.py b/python/rmm/rmm/pylibrmm/__init__.py
similarity index 100%
rename from python/rmm/rmm/python/__init__.py
rename to python/rmm/rmm/pylibrmm/__init__.py
diff --git a/python/rmm/rmm/python/cuda_stream.pxd b/python/rmm/rmm/pylibrmm/cuda_stream.pxd
similarity index 94%
rename from python/rmm/rmm/python/cuda_stream.pxd
rename to python/rmm/rmm/pylibrmm/cuda_stream.pxd
index 169e560a4..dd38387c2 100644
--- a/python/rmm/rmm/python/cuda_stream.pxd
+++ b/python/rmm/rmm/pylibrmm/cuda_stream.pxd
@@ -17,7 +17,7 @@ from cuda.ccudart cimport cudaStream_t
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 
-from rmm.cpp.cuda_stream cimport cuda_stream
+from rmm.librmm.cuda_stream cimport cuda_stream
 
 
 @cython.final
diff --git a/python/rmm/rmm/python/cuda_stream.pyx b/python/rmm/rmm/pylibrmm/cuda_stream.pyx
similarity index 95%
rename from python/rmm/rmm/python/cuda_stream.pyx
rename to python/rmm/rmm/pylibrmm/cuda_stream.pyx
index de02291d4..d6aa4edc7 100644
--- a/python/rmm/rmm/python/cuda_stream.pyx
+++ b/python/rmm/rmm/pylibrmm/cuda_stream.pyx
@@ -16,7 +16,7 @@ cimport cython
 from cuda.ccudart cimport cudaStream_t
 from libcpp cimport bool
 
-from rmm.cpp.cuda_stream cimport cuda_stream
+from rmm.librmm.cuda_stream cimport cuda_stream
 
 
 @cython.final
diff --git a/python/rmm/rmm/python/device_buffer.pxd b/python/rmm/rmm/pylibrmm/device_buffer.pxd
similarity index 95%
rename from python/rmm/rmm/python/device_buffer.pxd
rename to python/rmm/rmm/pylibrmm/device_buffer.pxd
index 2d4d932af..a0d287423 100644
--- a/python/rmm/rmm/python/device_buffer.pxd
+++ b/python/rmm/rmm/pylibrmm/device_buffer.pxd
@@ -16,8 +16,8 @@ from libc.stdint cimport uintptr_t
 from libcpp.memory cimport unique_ptr
 
 from rmm._cuda.stream cimport Stream
-from rmm.cpp.device_buffer cimport device_buffer
-from rmm.python.memory_resource cimport DeviceMemoryResource
+from rmm.librmm.device_buffer cimport device_buffer
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 
 cdef class DeviceBuffer:
diff --git a/python/rmm/rmm/python/device_buffer.pyx b/python/rmm/rmm/pylibrmm/device_buffer.pyx
similarity index 97%
rename from python/rmm/rmm/python/device_buffer.pyx
rename to python/rmm/rmm/pylibrmm/device_buffer.pyx
index 73b0d32ec..76fbceef8 100644
--- a/python/rmm/rmm/python/device_buffer.pyx
+++ b/python/rmm/rmm/pylibrmm/device_buffer.pyx
@@ -32,15 +32,15 @@ from cuda.ccudart cimport (
     cudaStream_t,
 )
 
-from rmm.cpp.cuda_stream_view cimport cuda_stream_view
-from rmm.cpp.device_buffer cimport (
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from rmm.librmm.device_buffer cimport (
     cuda_device_id,
     device_buffer,
     get_current_cuda_device,
     prefetch,
 )
-from rmm.cpp.memory_resource cimport device_memory_resource
-from rmm.python.memory_resource cimport (
+from rmm.librmm.memory_resource cimport device_memory_resource
+from rmm.pylibrmm.memory_resource cimport (
     DeviceMemoryResource,
     get_current_device_resource,
 )
@@ -401,7 +401,7 @@ cpdef DeviceBuffer to_device(const unsigned char[::1] b,
     Examples
     --------
     >>> import rmm
-    >>> db = rmm.python.device_buffer.to_device(b"abc")
+    >>> db = rmm.pylibrmm.device_buffer.to_device(b"abc")
     >>> print(bytes(db))
     b'abc'
     """
@@ -467,7 +467,7 @@ cpdef void copy_ptr_to_host(uintptr_t db,
     >>> import rmm
     >>> db = rmm.DeviceBuffer.to_device(b"abc")
     >>> hb = bytearray(db.nbytes)
-    >>> rmm.python.device_buffer.copy_ptr_to_host(db.ptr, hb)
+    >>> rmm.pylibrmm.device_buffer.copy_ptr_to_host(db.ptr, hb)
     >>> print(hb)
     bytearray(b'abc')
     """
@@ -509,7 +509,7 @@ cpdef void copy_host_to_ptr(const unsigned char[::1] hb,
     >>> import rmm
     >>> db = rmm.DeviceBuffer(size=10)
     >>> hb = b"abc"
-    >>> rmm.python.device_buffer.copy_host_to_ptr(hb, db.ptr)
+    >>> rmm.pylibrmm.device_buffer.copy_host_to_ptr(hb, db.ptr)
     >>> hb = db.copy_to_host()
     >>> print(hb)
     array([97, 98, 99,  0,  0,  0,  0,  0,  0,  0], dtype=uint8)
@@ -548,7 +548,7 @@ cpdef void copy_device_to_ptr(uintptr_t d_src,
     >>> import rmm
     >>> db = rmm.DeviceBuffer(size=5)
     >>> db2 = rmm.DeviceBuffer.to_device(b"abc")
-    >>> rmm.python.device_buffer.copy_device_to_ptr(db2.ptr, db.ptr, db2.size)
+    >>> rmm.pylibrmm.device_buffer.copy_device_to_ptr(db2.ptr, db.ptr, db2.size)
     >>> hb = db.copy_to_host()
     >>> hb
     array([97, 98, 99,  0,  0], dtype=uint8)
diff --git a/python/rmm/rmm/python/helper.pxd b/python/rmm/rmm/pylibrmm/helper.pxd
similarity index 100%
rename from python/rmm/rmm/python/helper.pxd
rename to python/rmm/rmm/pylibrmm/helper.pxd
diff --git a/python/rmm/rmm/python/helper.pyx b/python/rmm/rmm/pylibrmm/helper.pyx
similarity index 100%
rename from python/rmm/rmm/python/helper.pyx
rename to python/rmm/rmm/pylibrmm/helper.pyx
diff --git a/python/rmm/rmm/python/logger.pyx b/python/rmm/rmm/pylibrmm/logger.pyx
similarity index 98%
rename from python/rmm/rmm/python/logger.pyx
rename to python/rmm/rmm/pylibrmm/logger.pyx
index 15ba47308..119e1c92f 100644
--- a/python/rmm/rmm/python/logger.pyx
+++ b/python/rmm/rmm/pylibrmm/logger.pyx
@@ -14,9 +14,9 @@
 
 import warnings
 
-from rmm.cpp._logger cimport logger
+from rmm.librmm._logger cimport logger
 
-from rmm.cpp._logger import logging_level
+from rmm.librmm._logger import logging_level
 
 
 def _validate_level_type(level):
diff --git a/python/rmm/rmm/python/memory_resource.pxd b/python/rmm/rmm/pylibrmm/memory_resource.pxd
similarity index 97%
rename from python/rmm/rmm/python/memory_resource.pxd
rename to python/rmm/rmm/pylibrmm/memory_resource.pxd
index 4970d5230..985d5d31b 100644
--- a/python/rmm/rmm/python/memory_resource.pxd
+++ b/python/rmm/rmm/pylibrmm/memory_resource.pxd
@@ -14,7 +14,7 @@
 
 from libcpp.memory cimport shared_ptr
 
-from rmm.cpp.memory_resource cimport device_memory_resource
+from rmm.librmm.memory_resource cimport device_memory_resource
 
 
 cdef class DeviceMemoryResource:
diff --git a/python/rmm/rmm/python/memory_resource.pyx b/python/rmm/rmm/pylibrmm/memory_resource.pyx
similarity index 99%
rename from python/rmm/rmm/python/memory_resource.pyx
rename to python/rmm/rmm/pylibrmm/memory_resource.pyx
index 14585f36b..021125567 100644
--- a/python/rmm/rmm/python/memory_resource.pyx
+++ b/python/rmm/rmm/pylibrmm/memory_resource.pyx
@@ -36,16 +36,16 @@ from rmm._cuda.stream cimport Stream
 
 from rmm._cuda.stream import DEFAULT_STREAM
 
-from rmm.cpp.cuda_stream_view cimport cuda_stream_view
-from rmm.cpp.per_device_resource cimport (
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from rmm.librmm.per_device_resource cimport (
     cuda_device_id,
     set_per_device_resource as cpp_set_per_device_resource,
 )
-from rmm.python.helper cimport parse_bytes
+from rmm.pylibrmm.helper cimport parse_bytes
 
 from rmm.statistics import Statistics
 
-from rmm.cpp.memory_resource cimport (
+from rmm.librmm.memory_resource cimport (
     CppExcept,
     allocate_callback_t,
     allocation_handle_type,
diff --git a/python/rmm/rmm/python/tests/__init__.py b/python/rmm/rmm/pylibrmm/tests/__init__.py
similarity index 100%
rename from python/rmm/rmm/python/tests/__init__.py
rename to python/rmm/rmm/pylibrmm/tests/__init__.py
diff --git a/python/rmm/rmm/python/tests/test_device_buffer.pyx b/python/rmm/rmm/pylibrmm/tests/test_device_buffer.pyx
similarity index 87%
rename from python/rmm/rmm/python/tests/test_device_buffer.pyx
rename to python/rmm/rmm/pylibrmm/tests/test_device_buffer.pyx
index 0783dd9c6..ec2ff4def 100644
--- a/python/rmm/rmm/python/tests/test_device_buffer.pyx
+++ b/python/rmm/rmm/pylibrmm/tests/test_device_buffer.pyx
@@ -16,9 +16,9 @@ import numpy as np
 
 from libcpp.memory cimport make_unique
 
-from rmm.cpp.cuda_stream_view cimport cuda_stream_default
-from rmm.cpp.device_buffer cimport device_buffer
-from rmm.python.device_buffer cimport DeviceBuffer
+from rmm.librmm.cuda_stream_view cimport cuda_stream_default
+from rmm.librmm.device_buffer cimport device_buffer
+from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 
 
 def test_release():
diff --git a/python/rmm/rmm/tests/test_cython.py b/python/rmm/rmm/tests/test_cython.py
index a0364942c..5df933435 100644
--- a/python/rmm/rmm/tests/test_cython.py
+++ b/python/rmm/rmm/tests/test_cython.py
@@ -29,7 +29,7 @@ def wrapped(*args, **kwargs):
     return wrapped
 
 
-cython_test_modules = ["rmm.python.tests.test_device_buffer"]
+cython_test_modules = ["rmm.pylibrmm.tests.test_device_buffer"]
 
 
 for mod in cython_test_modules:
diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py
index 826071fce..7704cac25 100644
--- a/python/rmm/rmm/tests/test_rmm.py
+++ b/python/rmm/rmm/tests/test_rmm.py
@@ -354,7 +354,7 @@ def test_rmm_pool_numba_stream(stream):
     rmm.reinitialize(pool_allocator=True)
 
     stream = rmm._cuda.stream.Stream(stream)
-    a = rmm.python.device_buffer.DeviceBuffer(size=3, stream=stream)
+    a = rmm.pylibrmm.device_buffer.DeviceBuffer(size=3, stream=stream)
 
     assert a.size == 3
     assert a.ptr != 0

From 0d0ea02d1ad98150606d710586841498d4017fc9 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Tue, 24 Sep 2024 12:47:54 -0700
Subject: [PATCH 09/13] address reviews

---
 python/rmm/rmm/_lib/cuda_stream.pxd         |  1 +
 python/rmm/rmm/_lib/cuda_stream_pool.pxd    | 15 +++++
 python/rmm/rmm/_lib/cuda_stream_view.pxd    | 15 +++++
 python/rmm/rmm/_lib/device_buffer.pxd       |  6 ++
 python/rmm/rmm/_lib/device_uvector.pxd      | 15 +++++
 python/rmm/rmm/_lib/logger.pxd              | 75 +++++++++++++++++++++
 python/rmm/rmm/_lib/memory_resource.pxd     | 27 ++++++++
 python/rmm/rmm/_lib/per_device_resource.pxd | 21 ++++++
 python/rmm/rmm/allocators/torch.py          |  2 +-
 python/rmm/rmm/librmm/_logger.pyx           |  2 +-
 python/rmm/rmm/librmm/memory_resource.pxd   |  2 -
 11 files changed, 177 insertions(+), 4 deletions(-)
 create mode 100644 python/rmm/rmm/_lib/cuda_stream_pool.pxd
 create mode 100644 python/rmm/rmm/_lib/cuda_stream_view.pxd
 create mode 100644 python/rmm/rmm/_lib/device_uvector.pxd
 create mode 100644 python/rmm/rmm/_lib/logger.pxd
 create mode 100644 python/rmm/rmm/_lib/per_device_resource.pxd

diff --git a/python/rmm/rmm/_lib/cuda_stream.pxd b/python/rmm/rmm/_lib/cuda_stream.pxd
index f7b5759bd..afc365fbb 100644
--- a/python/rmm/rmm/_lib/cuda_stream.pxd
+++ b/python/rmm/rmm/_lib/cuda_stream.pxd
@@ -12,4 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from rmm.librmm.cuda_stream cimport cuda_stream
 from rmm.pylibrmm.cuda_stream cimport CudaStream
diff --git a/python/rmm/rmm/_lib/cuda_stream_pool.pxd b/python/rmm/rmm/_lib/cuda_stream_pool.pxd
new file mode 100644
index 000000000..4da59cc68
--- /dev/null
+++ b/python/rmm/rmm/_lib/cuda_stream_pool.pxd
@@ -0,0 +1,15 @@
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rmm.librmm.cuda_stream_pool cimport cuda_stream_pool
diff --git a/python/rmm/rmm/_lib/cuda_stream_view.pxd b/python/rmm/rmm/_lib/cuda_stream_view.pxd
new file mode 100644
index 000000000..78480420f
--- /dev/null
+++ b/python/rmm/rmm/_lib/cuda_stream_view.pxd
@@ -0,0 +1,15 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
diff --git a/python/rmm/rmm/_lib/device_buffer.pxd b/python/rmm/rmm/_lib/device_buffer.pxd
index 66c19d990..22833b1b8 100644
--- a/python/rmm/rmm/_lib/device_buffer.pxd
+++ b/python/rmm/rmm/_lib/device_buffer.pxd
@@ -12,6 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from rmm.librmm.device_buffer cimport (
+    cuda_device_id,
+    device_buffer,
+    get_current_cuda_device,
+    prefetch,
+)
 from rmm.pylibrmm.device_buffer cimport (
     DeviceBuffer,
     copy_device_to_ptr,
diff --git a/python/rmm/rmm/_lib/device_uvector.pxd b/python/rmm/rmm/_lib/device_uvector.pxd
new file mode 100644
index 000000000..230b0afb3
--- /dev/null
+++ b/python/rmm/rmm/_lib/device_uvector.pxd
@@ -0,0 +1,15 @@
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rmm.librmm.device_uvector cimport device_uvector
diff --git a/python/rmm/rmm/_lib/logger.pxd b/python/rmm/rmm/_lib/logger.pxd
new file mode 100644
index 000000000..27936d09c
--- /dev/null
+++ b/python/rmm/rmm/_lib/logger.pxd
@@ -0,0 +1,75 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rmm.librmm.logger cimport logger, logging_level, spdlog_logger
+from rmm.pylibrmm.logger cimport (
+    _validate_level_type,
+    flush_logger,
+    get_flush_level,
+    get_logging_level,
+    set_flush_level,
+    set_logging_level,
+    should_log,
+)
+
+
+cdef extern from "spdlog/common.h" namespace "spdlog::level" nogil:
+    cpdef enum logging_level "spdlog::level::level_enum":
+        """
+        The debug logging level for RMM.
+
+        Debug logging prints messages to a log file. See
+        `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+        for more information.
+
+        Valid levels, in decreasing order of verbosity, are TRACE, DEBUG,
+        INFO, WARN, ERR, CRITICAL, and OFF. Default is INFO.
+
+        Examples
+        --------
+        >>> import rmm
+        >>> rmm.logging_level.DEBUG
+        <logging_level.DEBUG: 1>
+        >>> rmm.logging_level.DEBUG.value
+        1
+        >>> rmm.logging_level.DEBUG.name
+        'DEBUG'
+
+        See Also
+        --------
+        set_logging_level : Set the debug logging level
+        get_logging_level : Get the current debug logging level
+        """
+        TRACE "spdlog::level::trace"
+        DEBUG "spdlog::level::debug"
+        INFO "spdlog::level::info"
+        WARN "spdlog::level::warn"
+        ERR "spdlog::level::err"
+        CRITICAL "spdlog::level::critical"
+        OFF "spdlog::level::off"
+
+
+cdef extern from "spdlog/spdlog.h" namespace "spdlog" nogil:
+    cdef cppclass spdlog_logger "spdlog::logger":
+        spdlog_logger() except +
+        void set_level(logging_level level)
+        logging_level level()
+        void flush() except +
+        void flush_on(logging_level level)
+        logging_level flush_level()
+        bool should_log(logging_level msg_level)
+
+
+cdef extern from "rmm/logger.hpp" namespace "rmm" nogil:
+    cdef spdlog_logger& logger() except +
diff --git a/python/rmm/rmm/_lib/memory_resource.pxd b/python/rmm/rmm/_lib/memory_resource.pxd
index 79a28cac2..983063914 100644
--- a/python/rmm/rmm/_lib/memory_resource.pxd
+++ b/python/rmm/rmm/_lib/memory_resource.pxd
@@ -12,6 +12,33 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from rmm.librmm.memory_resource cimport (
+    CppExcept,
+    allocate_callback_t,
+    allocation_handle_type,
+    available_device_memory,
+    binning_memory_resource,
+    callback_memory_resource,
+    cuda_async_memory_resource,
+    cuda_memory_resource,
+    deallocate_callback_t,
+    device_memory_resource,
+    failure_callback_resource_adaptor,
+    failure_callback_t,
+    fixed_size_memory_resource,
+    limiting_resource_adaptor,
+    logging_resource_adaptor,
+    managed_memory_resource,
+    percent_of_free_device_memory,
+    pool_memory_resource,
+    prefetch_resource_adaptor,
+    sam_headroom_memory_resource,
+    statistics_resource_adaptor,
+    system_memory_resource,
+    throw_cpp_except,
+    tracking_resource_adaptor,
+    translate_python_except_to_cpp,
+)
 from rmm.pylibrmm.memory_resource cimport (
     BinningMemoryResource,
     CallbackMemoryResource,
diff --git a/python/rmm/rmm/_lib/per_device_resource.pxd b/python/rmm/rmm/_lib/per_device_resource.pxd
new file mode 100644
index 000000000..29487f503
--- /dev/null
+++ b/python/rmm/rmm/_lib/per_device_resource.pxd
@@ -0,0 +1,21 @@
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rmm.librmm.per_device_resource cimport (
+    cuda_device_id,
+    get_current_device_resource,
+    get_per_device_resource,
+    set_current_device_resource,
+    set_per_device_resource,
+)
diff --git a/python/rmm/rmm/allocators/torch.py b/python/rmm/rmm/allocators/torch.py
index 462a06e80..eee0e9df9 100644
--- a/python/rmm/rmm/allocators/torch.py
+++ b/python/rmm/rmm/allocators/torch.py
@@ -28,7 +28,7 @@
         # allocator .so relative to the current file because the current file
         # is pure Python and will therefore be in the source directory.
         # Instead, we search relative to an arbitrary file in the compiled
-        # package. We use the cpp._logger module because it is small.
+        # package. We use the librmm._logger module because it is small.
         from rmm.librmm import _logger
 
         sofile = pathlib.Path(_logger.__file__).parent / "_torch_allocator.so"
diff --git a/python/rmm/rmm/librmm/_logger.pyx b/python/rmm/rmm/librmm/_logger.pyx
index e848c6dda..4392cb106 100644
--- a/python/rmm/rmm/librmm/_logger.pyx
+++ b/python/rmm/rmm/librmm/_logger.pyx
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ._logger cimport logging_level  # no-cython-lint
+from rmm.librmm._logger cimport logging_level  # no-cython-lint
diff --git a/python/rmm/rmm/librmm/memory_resource.pxd b/python/rmm/rmm/librmm/memory_resource.pxd
index a8e932e6c..9ddaf04b9 100644
--- a/python/rmm/rmm/librmm/memory_resource.pxd
+++ b/python/rmm/rmm/librmm/memory_resource.pxd
@@ -79,8 +79,6 @@ cdef extern from *:
     void throw_cpp_except(CppExcept) nogil
 
 
-# NOTE: Keep extern declarations in .pyx file as much as possible to avoid
-# leaking dependencies when importing RMM Cython .pxd files
 cdef extern from "rmm/mr/device/cuda_memory_resource.hpp" \
         namespace "rmm::mr" nogil:
     cdef cppclass cuda_memory_resource(device_memory_resource):

From bfe271ce86e4aabbc7e111cab4d8e84b03d4ba23 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Tue, 24 Sep 2024 20:48:16 -0700
Subject: [PATCH 10/13] add missing imports, clean up

---
 python/rmm/rmm/_lib/cuda_stream_view.pxd | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/python/rmm/rmm/_lib/cuda_stream_view.pxd b/python/rmm/rmm/_lib/cuda_stream_view.pxd
index 78480420f..c336b0fe8 100644
--- a/python/rmm/rmm/_lib/cuda_stream_view.pxd
+++ b/python/rmm/rmm/_lib/cuda_stream_view.pxd
@@ -12,4 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from rmm.librmm.cuda_stream_view cimport (
+    cuda_stream_default,
+    cuda_stream_legacy,
+    cuda_stream_per_thread,
+    cuda_stream_view,
+)

From efe3a48a5e456123c7ef82140dfef5e9197d30e8 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Wed, 25 Sep 2024 07:57:19 -0700
Subject: [PATCH 11/13] address review

---
 python/rmm/rmm/__init__.py       | 5 -----
 python/rmm/rmm/_lib/__init__.py  | 7 -------
 python/rmm/rmm/tests/test_rmm.py | 6 ------
 3 files changed, 18 deletions(-)

diff --git a/python/rmm/rmm/__init__.py b/python/rmm/rmm/__init__.py
index c52818a75..b23ad68f9 100644
--- a/python/rmm/rmm/__init__.py
+++ b/python/rmm/rmm/__init__.py
@@ -57,12 +57,7 @@
 def __getattr__(name):
     if name == "_lib":
         import importlib
-        import warnings
 
-        warnings.warn(
-            "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.pylibrmm` instead.",
-            FutureWarning,
-        )
         module = importlib.import_module("rmm.pylibrmm")
         return module
     else:
diff --git a/python/rmm/rmm/_lib/__init__.py b/python/rmm/rmm/_lib/__init__.py
index 965d8e917..7cfddab60 100644
--- a/python/rmm/rmm/_lib/__init__.py
+++ b/python/rmm/rmm/_lib/__init__.py
@@ -12,11 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import warnings
-
-warnings.warn(
-    "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.pylibrmm` instead.",
-    FutureWarning,
-)
-
 from rmm.pylibrmm import *
diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py
index 7704cac25..c03b9e501 100644
--- a/python/rmm/rmm/tests/test_rmm.py
+++ b/python/rmm/rmm/tests/test_rmm.py
@@ -1076,9 +1076,3 @@ def test_available_device_memory():
     assert initial_memory[1] == final_memory[1]
     assert initial_memory[0] > 0
     assert final_memory[0] > 0
-
-
-# TODO: Remove test when rmm._lib is removed in 24.12
-def test_deprecate_rmm_lib():
-    with pytest.warns(FutureWarning):
-        rmm._lib.device_buffer.DeviceBuffer(size=100)

From 30b4ebceab7128c218fddea3c5aa6adff942046a Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Tue, 1 Oct 2024 12:02:58 -0700
Subject: [PATCH 12/13] fix docs

---
 python/rmm/docs/guide.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/rmm/docs/guide.md b/python/rmm/docs/guide.md
index 22c0dc023..c7e940497 100644
--- a/python/rmm/docs/guide.md
+++ b/python/rmm/docs/guide.md
@@ -236,17 +236,17 @@ Common to both usages is that they modify the currently active RMM memory resour
 
 >>> # We start with the default cuda memory resource
 >>> rmm.mr.get_current_device_resource()
-<rmm._lib.memory_resource.CudaMemoryResource at 0x7f7e6c0a1ce0>
+<rmm.pylibrmm.memory_resource.CudaMemoryResource object at 0x7fa0da48a8e0>
 
 >>> # When using statistics, we get a StatisticsResourceAdaptor with the context
 >>> with rmm.statistics.statistics():
 ...     rmm.mr.get_current_device_resource()
-<rmm._lib.memory_resource.StatisticsResourceAdaptor at 0x7f7e6c524900>
+<rmm.pylibrmm.memory_resource.StatisticsResourceAdaptor object at 0x7fa0dd6e4a40>
 
 >>> # We can also enable statistics globally
 >>> rmm.statistics.enable_statistics()
 >>> print(rmm.mr.get_current_device_resource())
-<rmm._lib.memory_resource.StatisticsResourceAdaptor at 0x7f662c2bb3c0>
+<rmm.pylibrmm.memory_resource.StatisticsResourceAdaptor object at 0x7f9a11340a40>
 ```
 
 With statistics enabled, you can query statistics of the current and peak bytes and number of allocations performed by the current RMM memory resource:

From 6ee07a855481864bc8c1f15e4feb987adef690bc Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Wed, 2 Oct 2024 16:38:46 -0700
Subject: [PATCH 13/13] address review

---
 .gitignore                     | 11 ++++---
 python/rmm/rmm/_lib/logger.pxd | 53 +---------------------------------
 2 files changed, 8 insertions(+), 56 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2d0b150e1..36aafe643 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,10 +22,13 @@ rmm.egg-info/
 python/build
 python/*/build
 python/rmm/docs/_build
-python/rmm/**/_lib/**/*.cpp
-!python/rmm/_lib/_torch_allocator.cpp
-python/rmm/**/_lib/**/*.h
-python/rmm/**/_lib/.nfs*
+python/rmm/**/librmmm/**/*.cpp
+!python/rmm/librmmm/_torch_allocator.cpp
+python/rmm/**/librmm/**/*.h
+python/rmm/**/librmm/.nfs*
+python/rmm/**/pylibrmmm/**/*.cpp
+python/rmm/**/pylibrmmm/**/*.h
+python/rmm/**/pylibrmmm/.nfs*
 python/rmm/_cuda/*.cpp
 python/rmm/tests/*.cpp
 python/rmm/*.ipynb
diff --git a/python/rmm/rmm/_lib/logger.pxd b/python/rmm/rmm/_lib/logger.pxd
index 27936d09c..bef05c903 100644
--- a/python/rmm/rmm/_lib/logger.pxd
+++ b/python/rmm/rmm/_lib/logger.pxd
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from rmm.librmm.logger cimport logger, logging_level, spdlog_logger
+from rmm.librmm._logger cimport logger, logging_level, spdlog_logger
 from rmm.pylibrmm.logger cimport (
     _validate_level_type,
     flush_logger,
@@ -22,54 +22,3 @@ from rmm.pylibrmm.logger cimport (
     set_logging_level,
     should_log,
 )
-
-
-cdef extern from "spdlog/common.h" namespace "spdlog::level" nogil:
-    cpdef enum logging_level "spdlog::level::level_enum":
-        """
-        The debug logging level for RMM.
-
-        Debug logging prints messages to a log file. See
-        `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
-        for more information.
-
-        Valid levels, in decreasing order of verbosity, are TRACE, DEBUG,
-        INFO, WARN, ERR, CRITICAL, and OFF. Default is INFO.
-
-        Examples
-        --------
-        >>> import rmm
-        >>> rmm.logging_level.DEBUG
-        <logging_level.DEBUG: 1>
-        >>> rmm.logging_level.DEBUG.value
-        1
-        >>> rmm.logging_level.DEBUG.name
-        'DEBUG'
-
-        See Also
-        --------
-        set_logging_level : Set the debug logging level
-        get_logging_level : Get the current debug logging level
-        """
-        TRACE "spdlog::level::trace"
-        DEBUG "spdlog::level::debug"
-        INFO "spdlog::level::info"
-        WARN "spdlog::level::warn"
-        ERR "spdlog::level::err"
-        CRITICAL "spdlog::level::critical"
-        OFF "spdlog::level::off"
-
-
-cdef extern from "spdlog/spdlog.h" namespace "spdlog" nogil:
-    cdef cppclass spdlog_logger "spdlog::logger":
-        spdlog_logger() except +
-        void set_level(logging_level level)
-        logging_level level()
-        void flush() except +
-        void flush_on(logging_level level)
-        logging_level flush_level()
-        bool should_log(logging_level msg_level)
-
-
-cdef extern from "rmm/logger.hpp" namespace "rmm" nogil:
-    cdef spdlog_logger& logger() except +